aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/alpha/kernel/pci-sysfs.c2
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/mm/fault-armv.c3
-rw-r--r--arch/arm/mm/fault.c1
-rw-r--r--arch/arm/mm/flush.c3
-rw-r--r--arch/arm64/Kconfig4
-rw-r--r--arch/avr32/mm/fault.c1
-rw-r--r--arch/blackfin/Kconfig1
-rw-r--r--arch/cris/Kconfig1
-rw-r--r--arch/cris/mm/fault.c1
-rw-r--r--arch/frv/Kconfig2
-rw-r--r--arch/h8300/Kconfig1
-rw-r--r--arch/hexagon/mm/vm_fault.c1
-rw-r--r--arch/ia64/include/asm/hugetlb.h4
-rw-r--r--arch/ia64/kernel/perfmon.c2
-rw-r--r--arch/ia64/mm/fault.c1
-rw-r--r--arch/ia64/mm/init.c4
-rw-r--r--arch/m32r/Kconfig1
-rw-r--r--arch/m68k/Kconfig2
-rw-r--r--arch/m68k/mm/fault.c1
-rw-r--r--arch/microblaze/Kconfig1
-rw-r--r--arch/microblaze/include/asm/atomic.h1
-rw-r--r--arch/microblaze/mm/fault.c1
-rw-r--r--arch/mips/Kconfig1
-rw-r--r--arch/mips/include/asm/hugetlb.h4
-rw-r--r--arch/mips/mm/fault.c1
-rw-r--r--arch/openrisc/mm/fault.c1
-rw-r--r--arch/parisc/kernel/cache.c3
-rw-r--r--arch/powerpc/Kconfig2
-rw-r--r--arch/powerpc/include/asm/atomic.h1
-rw-r--r--arch/powerpc/include/asm/hugetlb.h4
-rw-r--r--arch/powerpc/kvm/book3s_hv.c2
-rw-r--r--arch/powerpc/mm/fault.c1
-rw-r--r--arch/powerpc/oprofile/cell/spu_task_sync.c15
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c13
-rw-r--r--arch/s390/Kconfig3
-rw-r--r--arch/s390/include/asm/hugetlb.h19
-rw-r--r--arch/s390/include/asm/pgtable.h210
-rw-r--r--arch/s390/include/asm/setup.h5
-rw-r--r--arch/s390/include/asm/tlb.h1
-rw-r--r--arch/s390/kernel/early.c2
-rw-r--r--arch/s390/mm/fault.c1
-rw-r--r--arch/s390/mm/gup.c11
-rw-r--r--arch/s390/mm/pgtable.c108
-rw-r--r--arch/sh/Kconfig3
-rw-r--r--arch/sh/include/asm/hugetlb.h6
-rw-r--r--arch/sh/mm/fault.c1
-rw-r--r--arch/sparc/Kconfig41
-rw-r--r--arch/sparc/include/asm/hugetlb.h9
-rw-r--r--arch/sparc/include/asm/mmu_64.h19
-rw-r--r--arch/sparc/include/asm/mmu_context_64.h2
-rw-r--r--arch/sparc/include/asm/page_64.h21
-rw-r--r--arch/sparc/include/asm/pgalloc_64.h56
-rw-r--r--arch/sparc/include/asm/pgtable_64.h253
-rw-r--r--arch/sparc/include/asm/tsb.h106
-rw-r--r--arch/sparc/kernel/pci.c2
-rw-r--r--arch/sparc/kernel/sun4v_tlb_miss.S2
-rw-r--r--arch/sparc/kernel/tsb.S9
-rw-r--r--arch/sparc/mm/fault_32.c1
-rw-r--r--arch/sparc/mm/fault_64.c5
-rw-r--r--arch/sparc/mm/hugetlbpage.c50
-rw-r--r--arch/sparc/mm/init_64.c314
-rw-r--r--arch/sparc/mm/tlb.c118
-rw-r--r--arch/sparc/mm/tsb.c40
-rw-r--r--arch/tile/Kconfig3
-rw-r--r--arch/tile/include/asm/hugetlb.h4
-rw-r--r--arch/tile/mm/elf.c19
-rw-r--r--arch/tile/mm/fault.c1
-rw-r--r--arch/um/Kconfig.common1
-rw-r--r--arch/um/kernel/trap.c1
-rw-r--r--arch/unicore32/kernel/process.c2
-rw-r--r--arch/x86/Kconfig5
-rw-r--r--arch/x86/include/asm/atomic.h24
-rw-r--r--arch/x86/include/asm/hugetlb.h4
-rw-r--r--arch/x86/include/asm/pgtable.h11
-rw-r--r--arch/x86/include/asm/pgtable_32.h1
-rw-r--r--arch/x86/include/asm/pgtable_64.h1
-rw-r--r--arch/x86/mm/fault.c1
-rw-r--r--arch/x86/mm/hugetlbpage.c3
-rw-r--r--arch/x86/mm/pat.c87
-rw-r--r--arch/x86/mm/pat_rbtree.c34
-rw-r--r--arch/x86/xen/mmu.c3
-rw-r--r--arch/xtensa/mm/fault.c1
84 files changed, 1283 insertions, 433 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index a62965d057f6..550cce4dd648 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -313,4 +313,7 @@ config HAVE_IRQ_TIME_ACCOUNTING
313 Archs need to ensure they use a high enough resolution clock to 313 Archs need to ensure they use a high enough resolution clock to
314 support irq time accounting and then call enable_sched_clock_irqtime(). 314 support irq time accounting and then call enable_sched_clock_irqtime().
315 315
316config HAVE_ARCH_TRANSPARENT_HUGEPAGE
317 bool
318
316source "kernel/gcov/Kconfig" 319source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c
index 53649c7d0068..b51f7b4818cd 100644
--- a/arch/alpha/kernel/pci-sysfs.c
+++ b/arch/alpha/kernel/pci-sysfs.c
@@ -26,7 +26,7 @@ static int hose_mmap_page_range(struct pci_controller *hose,
26 base = sparse ? hose->sparse_io_base : hose->dense_io_base; 26 base = sparse ? hose->sparse_io_base : hose->dense_io_base;
27 27
28 vma->vm_pgoff += base >> PAGE_SHIFT; 28 vma->vm_pgoff += base >> PAGE_SHIFT;
29 vma->vm_flags |= (VM_IO | VM_RESERVED); 29 vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
30 30
31 return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, 31 return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
32 vma->vm_end - vma->vm_start, 32 vma->vm_end - vma->vm_start,
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 6d2f7f5c0036..2867a7742306 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -25,6 +25,7 @@ config ARM
25 select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL) 25 select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL)
26 select ARCH_BINFMT_ELF_RANDOMIZE_PIE 26 select ARCH_BINFMT_ELF_RANDOMIZE_PIE
27 select HAVE_GENERIC_DMA_COHERENT 27 select HAVE_GENERIC_DMA_COHERENT
28 select HAVE_DEBUG_KMEMLEAK
28 select HAVE_KERNEL_GZIP 29 select HAVE_KERNEL_GZIP
29 select HAVE_KERNEL_LZO 30 select HAVE_KERNEL_LZO
30 select HAVE_KERNEL_LZMA 31 select HAVE_KERNEL_LZMA
@@ -39,6 +40,7 @@ config ARM
39 select HARDIRQS_SW_RESEND 40 select HARDIRQS_SW_RESEND
40 select GENERIC_IRQ_PROBE 41 select GENERIC_IRQ_PROBE
41 select GENERIC_IRQ_SHOW 42 select GENERIC_IRQ_SHOW
43 select HAVE_UID16
42 select ARCH_WANT_IPC_PARSE_VERSION 44 select ARCH_WANT_IPC_PARSE_VERSION
43 select HARDIRQS_SW_RESEND 45 select HARDIRQS_SW_RESEND
44 select CPU_PM if (SUSPEND || CPU_IDLE) 46 select CPU_PM if (SUSPEND || CPU_IDLE)
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index 7599e2625c7d..2a5907b5c8d2 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -134,7 +134,6 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
134{ 134{
135 struct mm_struct *mm = vma->vm_mm; 135 struct mm_struct *mm = vma->vm_mm;
136 struct vm_area_struct *mpnt; 136 struct vm_area_struct *mpnt;
137 struct prio_tree_iter iter;
138 unsigned long offset; 137 unsigned long offset;
139 pgoff_t pgoff; 138 pgoff_t pgoff;
140 int aliases = 0; 139 int aliases = 0;
@@ -147,7 +146,7 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
147 * cache coherency. 146 * cache coherency.
148 */ 147 */
149 flush_dcache_mmap_lock(mapping); 148 flush_dcache_mmap_lock(mapping);
150 vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) { 149 vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
151 /* 150 /*
152 * If this VMA is not in our MM, we can ignore it. 151 * If this VMA is not in our MM, we can ignore it.
153 * Note that we intentionally mask out the VMA 152 * Note that we intentionally mask out the VMA
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index c3bd83450227..5dbf13f954f6 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -336,6 +336,7 @@ retry:
336 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk 336 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
337 * of starvation. */ 337 * of starvation. */
338 flags &= ~FAULT_FLAG_ALLOW_RETRY; 338 flags &= ~FAULT_FLAG_ALLOW_RETRY;
339 flags |= FAULT_FLAG_TRIED;
339 goto retry; 340 goto retry;
340 } 341 }
341 } 342 }
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 40ca11ed6e5f..1c8f7f564175 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -196,7 +196,6 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
196{ 196{
197 struct mm_struct *mm = current->active_mm; 197 struct mm_struct *mm = current->active_mm;
198 struct vm_area_struct *mpnt; 198 struct vm_area_struct *mpnt;
199 struct prio_tree_iter iter;
200 pgoff_t pgoff; 199 pgoff_t pgoff;
201 200
202 /* 201 /*
@@ -208,7 +207,7 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
208 pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); 207 pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
209 208
210 flush_dcache_mmap_lock(mapping); 209 flush_dcache_mmap_lock(mapping);
211 vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) { 210 vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
212 unsigned long offset; 211 unsigned long offset;
213 212
214 /* 213 /*
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 767ba5685454..7ff68c946073 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -10,6 +10,8 @@ config ARM64
10 select GENERIC_TIME_VSYSCALL 10 select GENERIC_TIME_VSYSCALL
11 select HARDIRQS_SW_RESEND 11 select HARDIRQS_SW_RESEND
12 select HAVE_ARCH_TRACEHOOK 12 select HAVE_ARCH_TRACEHOOK
13 select HAVE_DEBUG_BUGVERBOSE
14 select HAVE_DEBUG_KMEMLEAK
13 select HAVE_DMA_API_DEBUG 15 select HAVE_DMA_API_DEBUG
14 select HAVE_DMA_ATTRS 16 select HAVE_DMA_ATTRS
15 select HAVE_GENERIC_DMA_COHERENT 17 select HAVE_GENERIC_DMA_COHERENT
@@ -26,6 +28,7 @@ config ARM64
26 select PERF_USE_VMALLOC 28 select PERF_USE_VMALLOC
27 select RTC_LIB 29 select RTC_LIB
28 select SPARSE_IRQ 30 select SPARSE_IRQ
31 select SYSCTL_EXCEPTION_TRACE
29 help 32 help
30 ARM 64-bit (AArch64) Linux support. 33 ARM 64-bit (AArch64) Linux support.
31 34
@@ -193,6 +196,7 @@ config COMPAT
193 bool "Kernel support for 32-bit EL0" 196 bool "Kernel support for 32-bit EL0"
194 depends on !ARM64_64K_PAGES 197 depends on !ARM64_64K_PAGES
195 select COMPAT_BINFMT_ELF 198 select COMPAT_BINFMT_ELF
199 select HAVE_UID16
196 help 200 help
197 This option enables support for a 32-bit EL0 running under a 64-bit 201 This option enables support for a 32-bit EL0 running under a 64-bit
198 kernel at EL1. AArch32-specific components such as system calls, 202 kernel at EL1. AArch32-specific components such as system calls,
diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
index b92e60958617..b2f2d2d66849 100644
--- a/arch/avr32/mm/fault.c
+++ b/arch/avr32/mm/fault.c
@@ -152,6 +152,7 @@ good_area:
152 tsk->min_flt++; 152 tsk->min_flt++;
153 if (fault & VM_FAULT_RETRY) { 153 if (fault & VM_FAULT_RETRY) {
154 flags &= ~FAULT_FLAG_ALLOW_RETRY; 154 flags &= ~FAULT_FLAG_ALLOW_RETRY;
155 flags |= FAULT_FLAG_TRIED;
155 156
156 /* 157 /*
157 * No need to up_read(&mm->mmap_sem) as we would have 158 * No need to up_read(&mm->mmap_sem) as we would have
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index 99224c4eb86b..ccd9193932b2 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -33,6 +33,7 @@ config BLACKFIN
33 select HAVE_PERF_EVENTS 33 select HAVE_PERF_EVENTS
34 select ARCH_HAVE_CUSTOM_GPIO_H 34 select ARCH_HAVE_CUSTOM_GPIO_H
35 select ARCH_WANT_OPTIONAL_GPIOLIB 35 select ARCH_WANT_OPTIONAL_GPIOLIB
36 select HAVE_UID16
36 select ARCH_WANT_IPC_PARSE_VERSION 37 select ARCH_WANT_IPC_PARSE_VERSION
37 select HAVE_GENERIC_HARDIRQS 38 select HAVE_GENERIC_HARDIRQS
38 select GENERIC_ATOMIC64 39 select GENERIC_ATOMIC64
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 72bd5ae50a89..a118163b04ee 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -42,6 +42,7 @@ config CRIS
42 select HAVE_IDE 42 select HAVE_IDE
43 select GENERIC_ATOMIC64 43 select GENERIC_ATOMIC64
44 select HAVE_GENERIC_HARDIRQS 44 select HAVE_GENERIC_HARDIRQS
45 select HAVE_UID16
45 select ARCH_WANT_IPC_PARSE_VERSION 46 select ARCH_WANT_IPC_PARSE_VERSION
46 select GENERIC_IRQ_SHOW 47 select GENERIC_IRQ_SHOW
47 select GENERIC_IOMAP 48 select GENERIC_IOMAP
diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c
index 45fd542cf173..73312ab6c696 100644
--- a/arch/cris/mm/fault.c
+++ b/arch/cris/mm/fault.c
@@ -186,6 +186,7 @@ retry:
186 tsk->min_flt++; 186 tsk->min_flt++;
187 if (fault & VM_FAULT_RETRY) { 187 if (fault & VM_FAULT_RETRY) {
188 flags &= ~FAULT_FLAG_ALLOW_RETRY; 188 flags &= ~FAULT_FLAG_ALLOW_RETRY;
189 flags |= FAULT_FLAG_TRIED;
189 190
190 /* 191 /*
191 * No need to up_read(&mm->mmap_sem) as we would 192 * No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index 971c0a19facb..9d262645f667 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -5,8 +5,10 @@ config FRV
5 select HAVE_ARCH_TRACEHOOK 5 select HAVE_ARCH_TRACEHOOK
6 select HAVE_IRQ_WORK 6 select HAVE_IRQ_WORK
7 select HAVE_PERF_EVENTS 7 select HAVE_PERF_EVENTS
8 select HAVE_UID16
8 select HAVE_GENERIC_HARDIRQS 9 select HAVE_GENERIC_HARDIRQS
9 select GENERIC_IRQ_SHOW 10 select GENERIC_IRQ_SHOW
11 select HAVE_DEBUG_BUGVERBOSE
10 select ARCH_HAVE_NMI_SAFE_CMPXCHG 12 select ARCH_HAVE_NMI_SAFE_CMPXCHG
11 select GENERIC_CPU_DEVICES 13 select GENERIC_CPU_DEVICES
12 select ARCH_WANT_IPC_PARSE_VERSION 14 select ARCH_WANT_IPC_PARSE_VERSION
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 5e8a0d9a09ce..90462eb23d02 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -3,6 +3,7 @@ config H8300
3 default y 3 default y
4 select HAVE_IDE 4 select HAVE_IDE
5 select HAVE_GENERIC_HARDIRQS 5 select HAVE_GENERIC_HARDIRQS
6 select HAVE_UID16
6 select ARCH_WANT_IPC_PARSE_VERSION 7 select ARCH_WANT_IPC_PARSE_VERSION
7 select GENERIC_IRQ_SHOW 8 select GENERIC_IRQ_SHOW
8 select GENERIC_CPU_DEVICES 9 select GENERIC_CPU_DEVICES
diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c
index 06695cc4fe58..513b74cb397e 100644
--- a/arch/hexagon/mm/vm_fault.c
+++ b/arch/hexagon/mm/vm_fault.c
@@ -113,6 +113,7 @@ good_area:
113 current->min_flt++; 113 current->min_flt++;
114 if (fault & VM_FAULT_RETRY) { 114 if (fault & VM_FAULT_RETRY) {
115 flags &= ~FAULT_FLAG_ALLOW_RETRY; 115 flags &= ~FAULT_FLAG_ALLOW_RETRY;
116 flags |= FAULT_FLAG_TRIED;
116 goto retry; 117 goto retry;
117 } 118 }
118 } 119 }
diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h
index da55c63728e0..94eaa5bd5d0c 100644
--- a/arch/ia64/include/asm/hugetlb.h
+++ b/arch/ia64/include/asm/hugetlb.h
@@ -77,4 +77,8 @@ static inline void arch_release_hugepage(struct page *page)
77{ 77{
78} 78}
79 79
80static inline void arch_clear_hugepage_flags(struct page *page)
81{
82}
83
80#endif /* _ASM_IA64_HUGETLB_H */ 84#endif /* _ASM_IA64_HUGETLB_H */
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index f388b4e18a37..ea39eba61ef5 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2307,7 +2307,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
2307 */ 2307 */
2308 vma->vm_mm = mm; 2308 vma->vm_mm = mm;
2309 vma->vm_file = get_file(filp); 2309 vma->vm_file = get_file(filp);
2310 vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED; 2310 vma->vm_flags = VM_READ|VM_MAYREAD|VM_DONTEXPAND|VM_DONTDUMP;
2311 vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ 2311 vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */
2312 2312
2313 /* 2313 /*
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 8443daf4f515..6cf0341f978e 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -184,6 +184,7 @@ retry:
184 current->min_flt++; 184 current->min_flt++;
185 if (fault & VM_FAULT_RETRY) { 185 if (fault & VM_FAULT_RETRY) {
186 flags &= ~FAULT_FLAG_ALLOW_RETRY; 186 flags &= ~FAULT_FLAG_ALLOW_RETRY;
187 flags |= FAULT_FLAG_TRIED;
187 188
188 /* No need to up_read(&mm->mmap_sem) as we would 189 /* No need to up_read(&mm->mmap_sem) as we would
189 * have already released it in __lock_page_or_retry 190 * have already released it in __lock_page_or_retry
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 0eab454867a2..acd5b68e8871 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -138,7 +138,8 @@ ia64_init_addr_space (void)
138 vma->vm_mm = current->mm; 138 vma->vm_mm = current->mm;
139 vma->vm_end = PAGE_SIZE; 139 vma->vm_end = PAGE_SIZE;
140 vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); 140 vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
141 vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED; 141 vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO |
142 VM_DONTEXPAND | VM_DONTDUMP;
142 down_write(&current->mm->mmap_sem); 143 down_write(&current->mm->mmap_sem);
143 if (insert_vm_struct(current->mm, vma)) { 144 if (insert_vm_struct(current->mm, vma)) {
144 up_write(&current->mm->mmap_sem); 145 up_write(&current->mm->mmap_sem);
@@ -636,6 +637,7 @@ mem_init (void)
636 637
637 high_memory = __va(max_low_pfn * PAGE_SIZE); 638 high_memory = __va(max_low_pfn * PAGE_SIZE);
638 639
640 reset_zone_present_pages();
639 for_each_online_pgdat(pgdat) 641 for_each_online_pgdat(pgdat)
640 if (pgdat->bdata->node_bootmem_map) 642 if (pgdat->bdata->node_bootmem_map)
641 totalram_pages += free_all_bootmem_node(pgdat); 643 totalram_pages += free_all_bootmem_node(pgdat);
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 49498bbb9616..e875fc3ce9cb 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -8,6 +8,7 @@ config M32R
8 select HAVE_KERNEL_BZIP2 8 select HAVE_KERNEL_BZIP2
9 select HAVE_KERNEL_LZMA 9 select HAVE_KERNEL_LZMA
10 select ARCH_WANT_IPC_PARSE_VERSION 10 select ARCH_WANT_IPC_PARSE_VERSION
11 select HAVE_DEBUG_BUGVERBOSE
11 select HAVE_GENERIC_HARDIRQS 12 select HAVE_GENERIC_HARDIRQS
12 select GENERIC_IRQ_PROBE 13 select GENERIC_IRQ_PROBE
13 select GENERIC_IRQ_SHOW 14 select GENERIC_IRQ_SHOW
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index b22df9410dce..dae1e7e16a37 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -3,9 +3,11 @@ config M68K
3 default y 3 default y
4 select HAVE_IDE 4 select HAVE_IDE
5 select HAVE_AOUT if MMU 5 select HAVE_AOUT if MMU
6 select HAVE_DEBUG_BUGVERBOSE
6 select HAVE_GENERIC_HARDIRQS 7 select HAVE_GENERIC_HARDIRQS
7 select GENERIC_IRQ_SHOW 8 select GENERIC_IRQ_SHOW
8 select GENERIC_ATOMIC64 9 select GENERIC_ATOMIC64
10 select HAVE_UID16
9 select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS 11 select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS
10 select GENERIC_CPU_DEVICES 12 select GENERIC_CPU_DEVICES
11 select GENERIC_STRNCPY_FROM_USER if MMU 13 select GENERIC_STRNCPY_FROM_USER if MMU
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index aeebbb7b30f0..a563727806bf 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -170,6 +170,7 @@ good_area:
170 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk 170 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
171 * of starvation. */ 171 * of starvation. */
172 flags &= ~FAULT_FLAG_ALLOW_RETRY; 172 flags &= ~FAULT_FLAG_ALLOW_RETRY;
173 flags |= FAULT_FLAG_TRIED;
173 174
174 /* 175 /*
175 * No need to up_read(&mm->mmap_sem) as we would 176 * No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 6133bed2b855..53fd94ab60f0 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -16,6 +16,7 @@ config MICROBLAZE
16 select OF 16 select OF
17 select OF_EARLY_FLATTREE 17 select OF_EARLY_FLATTREE
18 select ARCH_WANT_IPC_PARSE_VERSION 18 select ARCH_WANT_IPC_PARSE_VERSION
19 select HAVE_DEBUG_KMEMLEAK
19 select IRQ_DOMAIN 20 select IRQ_DOMAIN
20 select HAVE_GENERIC_HARDIRQS 21 select HAVE_GENERIC_HARDIRQS
21 select GENERIC_IRQ_PROBE 22 select GENERIC_IRQ_PROBE
diff --git a/arch/microblaze/include/asm/atomic.h b/arch/microblaze/include/asm/atomic.h
index 472d8bf726df..42ac382a09da 100644
--- a/arch/microblaze/include/asm/atomic.h
+++ b/arch/microblaze/include/asm/atomic.h
@@ -22,5 +22,6 @@ static inline int atomic_dec_if_positive(atomic_t *v)
22 22
23 return res; 23 return res;
24} 24}
25#define atomic_dec_if_positive atomic_dec_if_positive
25 26
26#endif /* _ASM_MICROBLAZE_ATOMIC_H */ 27#endif /* _ASM_MICROBLAZE_ATOMIC_H */
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
index eb365d6795fa..714b35a9c4f7 100644
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -233,6 +233,7 @@ good_area:
233 current->min_flt++; 233 current->min_flt++;
234 if (fault & VM_FAULT_RETRY) { 234 if (fault & VM_FAULT_RETRY) {
235 flags &= ~FAULT_FLAG_ALLOW_RETRY; 235 flags &= ~FAULT_FLAG_ALLOW_RETRY;
236 flags |= FAULT_FLAG_TRIED;
236 237
237 /* 238 /*
238 * No need to up_read(&mm->mmap_sem) as we would 239 * No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 4cd538b42a3f..35453eaeffb5 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -17,6 +17,7 @@ config MIPS
17 select HAVE_FUNCTION_GRAPH_TRACER 17 select HAVE_FUNCTION_GRAPH_TRACER
18 select HAVE_KPROBES 18 select HAVE_KPROBES
19 select HAVE_KRETPROBES 19 select HAVE_KRETPROBES
20 select HAVE_DEBUG_KMEMLEAK
20 select ARCH_BINFMT_ELF_RANDOMIZE_PIE 21 select ARCH_BINFMT_ELF_RANDOMIZE_PIE
21 select RTC_LIB if !MACH_LOONGSON 22 select RTC_LIB if !MACH_LOONGSON
22 select GENERIC_ATOMIC64 if !64BIT 23 select GENERIC_ATOMIC64 if !64BIT
diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h
index 58d36889f09b..bd94946a18f3 100644
--- a/arch/mips/include/asm/hugetlb.h
+++ b/arch/mips/include/asm/hugetlb.h
@@ -112,4 +112,8 @@ static inline void arch_release_hugepage(struct page *page)
112{ 112{
113} 113}
114 114
115static inline void arch_clear_hugepage_flags(struct page *page)
116{
117}
118
115#endif /* __ASM_HUGETLB_H */ 119#endif /* __ASM_HUGETLB_H */
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 7a19957735e9..ddcec1e1a0cd 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -171,6 +171,7 @@ good_area:
171 } 171 }
172 if (fault & VM_FAULT_RETRY) { 172 if (fault & VM_FAULT_RETRY) {
173 flags &= ~FAULT_FLAG_ALLOW_RETRY; 173 flags &= ~FAULT_FLAG_ALLOW_RETRY;
174 flags |= FAULT_FLAG_TRIED;
174 175
175 /* 176 /*
176 * No need to up_read(&mm->mmap_sem) as we would 177 * No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index 40f850e9766c..e2bfafce66c5 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -183,6 +183,7 @@ good_area:
183 tsk->min_flt++; 183 tsk->min_flt++;
184 if (fault & VM_FAULT_RETRY) { 184 if (fault & VM_FAULT_RETRY) {
185 flags &= ~FAULT_FLAG_ALLOW_RETRY; 185 flags &= ~FAULT_FLAG_ALLOW_RETRY;
186 flags |= FAULT_FLAG_TRIED;
186 187
187 /* No need to up_read(&mm->mmap_sem) as we would 188 /* No need to up_read(&mm->mmap_sem) as we would
188 * have already released it in __lock_page_or_retry 189 * have already released it in __lock_page_or_retry
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 9d181890a7e3..48e16dc20102 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -276,7 +276,6 @@ void flush_dcache_page(struct page *page)
276{ 276{
277 struct address_space *mapping = page_mapping(page); 277 struct address_space *mapping = page_mapping(page);
278 struct vm_area_struct *mpnt; 278 struct vm_area_struct *mpnt;
279 struct prio_tree_iter iter;
280 unsigned long offset; 279 unsigned long offset;
281 unsigned long addr, old_addr = 0; 280 unsigned long addr, old_addr = 0;
282 pgoff_t pgoff; 281 pgoff_t pgoff;
@@ -299,7 +298,7 @@ void flush_dcache_page(struct page *page)
299 * to flush one address here for them all to become coherent */ 298 * to flush one address here for them all to become coherent */
300 299
301 flush_dcache_mmap_lock(mapping); 300 flush_dcache_mmap_lock(mapping);
302 vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) { 301 vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
303 offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; 302 offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
304 addr = mpnt->vm_start + offset; 303 addr = mpnt->vm_start + offset;
305 304
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 4ce0be32d153..df7edb887a04 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -99,6 +99,7 @@ config PPC
99 select HAVE_DYNAMIC_FTRACE 99 select HAVE_DYNAMIC_FTRACE
100 select HAVE_FUNCTION_TRACER 100 select HAVE_FUNCTION_TRACER
101 select HAVE_FUNCTION_GRAPH_TRACER 101 select HAVE_FUNCTION_GRAPH_TRACER
102 select SYSCTL_EXCEPTION_TRACE
102 select ARCH_WANT_OPTIONAL_GPIOLIB 103 select ARCH_WANT_OPTIONAL_GPIOLIB
103 select HAVE_IDE 104 select HAVE_IDE
104 select HAVE_IOREMAP_PROT 105 select HAVE_IOREMAP_PROT
@@ -113,6 +114,7 @@ config PPC
113 select HAVE_DMA_API_DEBUG 114 select HAVE_DMA_API_DEBUG
114 select USE_GENERIC_SMP_HELPERS if SMP 115 select USE_GENERIC_SMP_HELPERS if SMP
115 select HAVE_OPROFILE 116 select HAVE_OPROFILE
117 select HAVE_DEBUG_KMEMLEAK
116 select HAVE_SYSCALL_WRAPPERS if PPC64 118 select HAVE_SYSCALL_WRAPPERS if PPC64
117 select GENERIC_ATOMIC64 if PPC32 119 select GENERIC_ATOMIC64 if PPC32
118 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE 120 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index da29032ae38f..e3b1d41c89be 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -268,6 +268,7 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
268 268
269 return t; 269 return t;
270} 270}
271#define atomic_dec_if_positive atomic_dec_if_positive
271 272
272#define smp_mb__before_atomic_dec() smp_mb() 273#define smp_mb__before_atomic_dec() smp_mb()
273#define smp_mb__after_atomic_dec() smp_mb() 274#define smp_mb__after_atomic_dec() smp_mb()
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index dfdb95bc59a5..62e11a32c4c2 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -151,6 +151,10 @@ static inline void arch_release_hugepage(struct page *page)
151{ 151{
152} 152}
153 153
154static inline void arch_clear_hugepage_flags(struct page *page)
155{
156}
157
154#else /* ! CONFIG_HUGETLB_PAGE */ 158#else /* ! CONFIG_HUGETLB_PAGE */
155static inline void flush_hugetlb_page(struct vm_area_struct *vma, 159static inline void flush_hugetlb_page(struct vm_area_struct *vma,
156 unsigned long vmaddr) 160 unsigned long vmaddr)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 83e929e66f9d..721d4603a235 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1183,7 +1183,7 @@ static const struct vm_operations_struct kvm_rma_vm_ops = {
1183 1183
1184static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma) 1184static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
1185{ 1185{
1186 vma->vm_flags |= VM_RESERVED; 1186 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
1187 vma->vm_ops = &kvm_rma_vm_ops; 1187 vma->vm_ops = &kvm_rma_vm_ops;
1188 return 0; 1188 return 0;
1189} 1189}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 5495ebe983a2..0a6b28336eb0 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -451,6 +451,7 @@ good_area:
451 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk 451 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
452 * of starvation. */ 452 * of starvation. */
453 flags &= ~FAULT_FLAG_ALLOW_RETRY; 453 flags &= ~FAULT_FLAG_ALLOW_RETRY;
454 flags |= FAULT_FLAG_TRIED;
454 goto retry; 455 goto retry;
455 } 456 }
456 } 457 }
diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
index 642fca137ccb..28f1af2db1f5 100644
--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -304,7 +304,7 @@ static inline unsigned long fast_get_dcookie(struct path *path)
304 return cookie; 304 return cookie;
305} 305}
306 306
307/* Look up the dcookie for the task's first VM_EXECUTABLE mapping, 307/* Look up the dcookie for the task's mm->exe_file,
308 * which corresponds loosely to "application name". Also, determine 308 * which corresponds loosely to "application name". Also, determine
309 * the offset for the SPU ELF object. If computed offset is 309 * the offset for the SPU ELF object. If computed offset is
310 * non-zero, it implies an embedded SPU object; otherwise, it's a 310 * non-zero, it implies an embedded SPU object; otherwise, it's a
@@ -321,7 +321,6 @@ get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
321{ 321{
322 unsigned long app_cookie = 0; 322 unsigned long app_cookie = 0;
323 unsigned int my_offset = 0; 323 unsigned int my_offset = 0;
324 struct file *app = NULL;
325 struct vm_area_struct *vma; 324 struct vm_area_struct *vma;
326 struct mm_struct *mm = spu->mm; 325 struct mm_struct *mm = spu->mm;
327 326
@@ -330,16 +329,10 @@ get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
330 329
331 down_read(&mm->mmap_sem); 330 down_read(&mm->mmap_sem);
332 331
333 for (vma = mm->mmap; vma; vma = vma->vm_next) { 332 if (mm->exe_file) {
334 if (!vma->vm_file) 333 app_cookie = fast_get_dcookie(&mm->exe_file->f_path);
335 continue;
336 if (!(vma->vm_flags & VM_EXECUTABLE))
337 continue;
338 app_cookie = fast_get_dcookie(&vma->vm_file->f_path);
339 pr_debug("got dcookie for %s\n", 334 pr_debug("got dcookie for %s\n",
340 vma->vm_file->f_dentry->d_name.name); 335 mm->exe_file->f_dentry->d_name.name);
341 app = vma->vm_file;
342 break;
343 } 336 }
344 337
345 for (vma = mm->mmap; vma; vma = vma->vm_next) { 338 for (vma = mm->mmap; vma; vma = vma->vm_next) {
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 11d8e0544ac0..dc0a035e63bb 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -77,7 +77,8 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz
77{ 77{
78 unsigned long start, start_pfn; 78 unsigned long start, start_pfn;
79 struct zone *zone; 79 struct zone *zone;
80 int ret; 80 int i, ret;
81 int sections_to_remove;
81 82
82 start_pfn = base >> PAGE_SHIFT; 83 start_pfn = base >> PAGE_SHIFT;
83 84
@@ -97,9 +98,13 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz
97 * to sysfs "state" file and we can't remove sysfs entries 98 * to sysfs "state" file and we can't remove sysfs entries
98 * while writing to it. So we have to defer it to here. 99 * while writing to it. So we have to defer it to here.
99 */ 100 */
100 ret = __remove_pages(zone, start_pfn, memblock_size >> PAGE_SHIFT); 101 sections_to_remove = (memblock_size >> PAGE_SHIFT) / PAGES_PER_SECTION;
101 if (ret) 102 for (i = 0; i < sections_to_remove; i++) {
102 return ret; 103 unsigned long pfn = start_pfn + i * PAGES_PER_SECTION;
104 ret = __remove_pages(zone, start_pfn, PAGES_PER_SECTION);
105 if (ret)
106 return ret;
107 }
103 108
104 /* 109 /*
105 * Update memory regions for memory remove 110 * Update memory regions for memory remove
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index c8af429991d9..ceff7aef2477 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -68,6 +68,7 @@ config S390
68 select HAVE_FTRACE_MCOUNT_RECORD 68 select HAVE_FTRACE_MCOUNT_RECORD
69 select HAVE_C_RECORDMCOUNT 69 select HAVE_C_RECORDMCOUNT
70 select HAVE_SYSCALL_TRACEPOINTS 70 select HAVE_SYSCALL_TRACEPOINTS
71 select SYSCTL_EXCEPTION_TRACE
71 select HAVE_DYNAMIC_FTRACE 72 select HAVE_DYNAMIC_FTRACE
72 select HAVE_FUNCTION_GRAPH_TRACER 73 select HAVE_FUNCTION_GRAPH_TRACER
73 select HAVE_REGS_AND_STACK_ACCESS_API 74 select HAVE_REGS_AND_STACK_ACCESS_API
@@ -80,6 +81,7 @@ config S390
80 select HAVE_IRQ_WORK 81 select HAVE_IRQ_WORK
81 select HAVE_PERF_EVENTS 82 select HAVE_PERF_EVENTS
82 select ARCH_HAVE_NMI_SAFE_CMPXCHG 83 select ARCH_HAVE_NMI_SAFE_CMPXCHG
84 select HAVE_DEBUG_KMEMLEAK
83 select HAVE_KERNEL_GZIP 85 select HAVE_KERNEL_GZIP
84 select HAVE_KERNEL_BZIP2 86 select HAVE_KERNEL_BZIP2
85 select HAVE_KERNEL_LZMA 87 select HAVE_KERNEL_LZMA
@@ -126,6 +128,7 @@ config S390
126 select ARCH_INLINE_WRITE_UNLOCK_BH 128 select ARCH_INLINE_WRITE_UNLOCK_BH
127 select ARCH_INLINE_WRITE_UNLOCK_IRQ 129 select ARCH_INLINE_WRITE_UNLOCK_IRQ
128 select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE 130 select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
131 select HAVE_UID16 if 32BIT
129 select ARCH_WANT_IPC_PARSE_VERSION 132 select ARCH_WANT_IPC_PARSE_VERSION
130 select GENERIC_SMP_IDLE_THREAD 133 select GENERIC_SMP_IDLE_THREAD
131 select GENERIC_TIME_VSYSCALL 134 select GENERIC_TIME_VSYSCALL
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index 2d6e6e380564..593753ee07f3 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -33,6 +33,7 @@ static inline int prepare_hugepage_range(struct file *file,
33} 33}
34 34
35#define hugetlb_prefault_arch_hook(mm) do { } while (0) 35#define hugetlb_prefault_arch_hook(mm) do { } while (0)
36#define arch_clear_hugepage_flags(page) do { } while (0)
36 37
37int arch_prepare_hugepage(struct page *page); 38int arch_prepare_hugepage(struct page *page);
38void arch_release_hugepage(struct page *page); 39void arch_release_hugepage(struct page *page);
@@ -77,23 +78,6 @@ static inline void __pmd_csp(pmd_t *pmdp)
77 " csp %1,%3" 78 " csp %1,%3"
78 : "=m" (*pmdp) 79 : "=m" (*pmdp)
79 : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc"); 80 : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
80 pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY;
81}
82
83static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
84{
85 unsigned long sto = (unsigned long) pmdp -
86 pmd_index(address) * sizeof(pmd_t);
87
88 if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) {
89 asm volatile(
90 " .insn rrf,0xb98e0000,%2,%3,0,0"
91 : "=m" (*pmdp)
92 : "m" (*pmdp), "a" (sto),
93 "a" ((address & HPAGE_MASK))
94 );
95 }
96 pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY;
97} 81}
98 82
99static inline void huge_ptep_invalidate(struct mm_struct *mm, 83static inline void huge_ptep_invalidate(struct mm_struct *mm,
@@ -105,6 +89,7 @@ static inline void huge_ptep_invalidate(struct mm_struct *mm,
105 __pmd_idte(address, pmdp); 89 __pmd_idte(address, pmdp);
106 else 90 else
107 __pmd_csp(pmdp); 91 __pmd_csp(pmdp);
92 pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY;
108} 93}
109 94
110static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, 95static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 6bd7d7483017..979fe3dc0788 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -42,6 +42,7 @@ extern void fault_init(void);
42 * tables contain all the necessary information. 42 * tables contain all the necessary information.
43 */ 43 */
44#define update_mmu_cache(vma, address, ptep) do { } while (0) 44#define update_mmu_cache(vma, address, ptep) do { } while (0)
45#define update_mmu_cache_pmd(vma, address, ptep) do { } while (0)
45 46
46/* 47/*
47 * ZERO_PAGE is a global shared page that is always zero; used 48 * ZERO_PAGE is a global shared page that is always zero; used
@@ -347,6 +348,12 @@ extern struct page *vmemmap;
347 348
348#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */ 349#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */
349#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */ 350#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */
351#define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */
352#define _SEGMENT_ENTRY_SPLIT (1UL << _SEGMENT_ENTRY_SPLIT_BIT)
353
354/* Set of bits not changed in pmd_modify */
355#define _SEGMENT_CHG_MASK (_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \
356 | _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO)
350 357
351/* Page status table bits for virtualization */ 358/* Page status table bits for virtualization */
352#define RCP_ACC_BITS 0xf000000000000000UL 359#define RCP_ACC_BITS 0xf000000000000000UL
@@ -506,6 +513,30 @@ static inline int pmd_bad(pmd_t pmd)
506 return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY; 513 return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY;
507} 514}
508 515
516#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
517extern void pmdp_splitting_flush(struct vm_area_struct *vma,
518 unsigned long addr, pmd_t *pmdp);
519
520#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
521extern int pmdp_set_access_flags(struct vm_area_struct *vma,
522 unsigned long address, pmd_t *pmdp,
523 pmd_t entry, int dirty);
524
525#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
526extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
527 unsigned long address, pmd_t *pmdp);
528
529#define __HAVE_ARCH_PMD_WRITE
530static inline int pmd_write(pmd_t pmd)
531{
532 return (pmd_val(pmd) & _SEGMENT_ENTRY_RO) == 0;
533}
534
535static inline int pmd_young(pmd_t pmd)
536{
537 return 0;
538}
539
509static inline int pte_none(pte_t pte) 540static inline int pte_none(pte_t pte)
510{ 541{
511 return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT); 542 return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT);
@@ -1159,6 +1190,185 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
1159#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) 1190#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
1160#define pte_unmap(pte) do { } while (0) 1191#define pte_unmap(pte) do { } while (0)
1161 1192
1193static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
1194{
1195 unsigned long sto = (unsigned long) pmdp -
1196 pmd_index(address) * sizeof(pmd_t);
1197
1198 if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) {
1199 asm volatile(
1200 " .insn rrf,0xb98e0000,%2,%3,0,0"
1201 : "=m" (*pmdp)
1202 : "m" (*pmdp), "a" (sto),
1203 "a" ((address & HPAGE_MASK))
1204 : "cc"
1205 );
1206 }
1207}
1208
1209#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1210#define __HAVE_ARCH_PGTABLE_DEPOSIT
1211extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable);
1212
1213#define __HAVE_ARCH_PGTABLE_WITHDRAW
1214extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm);
1215
1216static inline int pmd_trans_splitting(pmd_t pmd)
1217{
1218 return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT;
1219}
1220
1221static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
1222 pmd_t *pmdp, pmd_t entry)
1223{
1224 *pmdp = entry;
1225}
1226
1227static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
1228{
1229 unsigned long pgprot_pmd = 0;
1230
1231 if (pgprot_val(pgprot) & _PAGE_INVALID) {
1232 if (pgprot_val(pgprot) & _PAGE_SWT)
1233 pgprot_pmd |= _HPAGE_TYPE_NONE;
1234 pgprot_pmd |= _SEGMENT_ENTRY_INV;
1235 }
1236 if (pgprot_val(pgprot) & _PAGE_RO)
1237 pgprot_pmd |= _SEGMENT_ENTRY_RO;
1238 return pgprot_pmd;
1239}
1240
1241static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
1242{
1243 pmd_val(pmd) &= _SEGMENT_CHG_MASK;
1244 pmd_val(pmd) |= massage_pgprot_pmd(newprot);
1245 return pmd;
1246}
1247
1248static inline pmd_t pmd_mkhuge(pmd_t pmd)
1249{
1250 pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
1251 return pmd;
1252}
1253
1254static inline pmd_t pmd_mkwrite(pmd_t pmd)
1255{
1256 pmd_val(pmd) &= ~_SEGMENT_ENTRY_RO;
1257 return pmd;
1258}
1259
1260static inline pmd_t pmd_wrprotect(pmd_t pmd)
1261{
1262 pmd_val(pmd) |= _SEGMENT_ENTRY_RO;
1263 return pmd;
1264}
1265
1266static inline pmd_t pmd_mkdirty(pmd_t pmd)
1267{
1268 /* No dirty bit in the segment table entry. */
1269 return pmd;
1270}
1271
1272static inline pmd_t pmd_mkold(pmd_t pmd)
1273{
1274 /* No referenced bit in the segment table entry. */
1275 return pmd;
1276}
1277
1278static inline pmd_t pmd_mkyoung(pmd_t pmd)
1279{
1280 /* No referenced bit in the segment table entry. */
1281 return pmd;
1282}
1283
1284#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
1285static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
1286 unsigned long address, pmd_t *pmdp)
1287{
1288 unsigned long pmd_addr = pmd_val(*pmdp) & HPAGE_MASK;
1289 long tmp, rc;
1290 int counter;
1291
1292 rc = 0;
1293 if (MACHINE_HAS_RRBM) {
1294 counter = PTRS_PER_PTE >> 6;
1295 asm volatile(
1296 "0: .insn rre,0xb9ae0000,%0,%3\n" /* rrbm */
1297 " ogr %1,%0\n"
1298 " la %3,0(%4,%3)\n"
1299 " brct %2,0b\n"
1300 : "=&d" (tmp), "+&d" (rc), "+d" (counter),
1301 "+a" (pmd_addr)
1302 : "a" (64 * 4096UL) : "cc");
1303 rc = !!rc;
1304 } else {
1305 counter = PTRS_PER_PTE;
1306 asm volatile(
1307 "0: rrbe 0,%2\n"
1308 " la %2,0(%3,%2)\n"
1309 " brc 12,1f\n"
1310 " lhi %0,1\n"
1311 "1: brct %1,0b\n"
1312 : "+d" (rc), "+d" (counter), "+a" (pmd_addr)
1313 : "a" (4096UL) : "cc");
1314 }
1315 return rc;
1316}
1317
1318#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
1319static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
1320 unsigned long address, pmd_t *pmdp)
1321{
1322 pmd_t pmd = *pmdp;
1323
1324 __pmd_idte(address, pmdp);
1325 pmd_clear(pmdp);
1326 return pmd;
1327}
1328
1329#define __HAVE_ARCH_PMDP_CLEAR_FLUSH
1330static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
1331 unsigned long address, pmd_t *pmdp)
1332{
1333 return pmdp_get_and_clear(vma->vm_mm, address, pmdp);
1334}
1335
1336#define __HAVE_ARCH_PMDP_INVALIDATE
1337static inline void pmdp_invalidate(struct vm_area_struct *vma,
1338 unsigned long address, pmd_t *pmdp)
1339{
1340 __pmd_idte(address, pmdp);
1341}
1342
1343static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
1344{
1345 pmd_t __pmd;
1346 pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
1347 return __pmd;
1348}
1349
1350#define pfn_pmd(pfn, pgprot) mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot))
1351#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
1352
1353static inline int pmd_trans_huge(pmd_t pmd)
1354{
1355 return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE;
1356}
1357
1358static inline int has_transparent_hugepage(void)
1359{
1360 return MACHINE_HAS_HPAGE ? 1 : 0;
1361}
1362
1363static inline unsigned long pmd_pfn(pmd_t pmd)
1364{
1365 if (pmd_trans_huge(pmd))
1366 return pmd_val(pmd) >> HPAGE_SHIFT;
1367 else
1368 return pmd_val(pmd) >> PAGE_SHIFT;
1369}
1370#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1371
1162/* 1372/*
1163 * 31 bit swap entry format: 1373 * 31 bit swap entry format:
1164 * A page-table entry has some bits we have to treat in a special way. 1374 * A page-table entry has some bits we have to treat in a special way.
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 87b47ca954f1..8cfd731a18d8 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -81,6 +81,7 @@ extern unsigned int s390_user_mode;
81#define MACHINE_FLAG_SPP (1UL << 13) 81#define MACHINE_FLAG_SPP (1UL << 13)
82#define MACHINE_FLAG_TOPOLOGY (1UL << 14) 82#define MACHINE_FLAG_TOPOLOGY (1UL << 14)
83#define MACHINE_FLAG_TE (1UL << 15) 83#define MACHINE_FLAG_TE (1UL << 15)
84#define MACHINE_FLAG_RRBM (1UL << 16)
84 85
85#define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) 86#define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM)
86#define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) 87#define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
@@ -99,7 +100,8 @@ extern unsigned int s390_user_mode;
99#define MACHINE_HAS_PFMF (0) 100#define MACHINE_HAS_PFMF (0)
100#define MACHINE_HAS_SPP (0) 101#define MACHINE_HAS_SPP (0)
101#define MACHINE_HAS_TOPOLOGY (0) 102#define MACHINE_HAS_TOPOLOGY (0)
102#define MACHINE_HAS_TE (0) 103#define MACHINE_HAS_TE (0)
104#define MACHINE_HAS_RRBM (0)
103#else /* CONFIG_64BIT */ 105#else /* CONFIG_64BIT */
104#define MACHINE_HAS_IEEE (1) 106#define MACHINE_HAS_IEEE (1)
105#define MACHINE_HAS_CSP (1) 107#define MACHINE_HAS_CSP (1)
@@ -112,6 +114,7 @@ extern unsigned int s390_user_mode;
112#define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP) 114#define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP)
113#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) 115#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
114#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) 116#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE)
117#define MACHINE_HAS_RRBM (S390_lowcore.machine_flags & MACHINE_FLAG_RRBM)
115#endif /* CONFIG_64BIT */ 118#endif /* CONFIG_64BIT */
116 119
117#define ZFCPDUMP_HSA_SIZE (32UL<<20) 120#define ZFCPDUMP_HSA_SIZE (32UL<<20)
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 06e5acbc84bd..b75d7d686684 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -137,6 +137,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
137#define tlb_start_vma(tlb, vma) do { } while (0) 137#define tlb_start_vma(tlb, vma) do { } while (0)
138#define tlb_end_vma(tlb, vma) do { } while (0) 138#define tlb_end_vma(tlb, vma) do { } while (0)
139#define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0) 139#define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0)
140#define tlb_remove_pmd_tlb_entry(tlb, pmdp, addr) do { } while (0)
140#define tlb_migrate_finish(mm) do { } while (0) 141#define tlb_migrate_finish(mm) do { } while (0)
141 142
142#endif /* _S390_TLB_H */ 143#endif /* _S390_TLB_H */
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 7f4717675c19..00d114445068 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -388,6 +388,8 @@ static __init void detect_machine_facilities(void)
388 S390_lowcore.machine_flags |= MACHINE_FLAG_SPP; 388 S390_lowcore.machine_flags |= MACHINE_FLAG_SPP;
389 if (test_facility(50) && test_facility(73)) 389 if (test_facility(50) && test_facility(73))
390 S390_lowcore.machine_flags |= MACHINE_FLAG_TE; 390 S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
391 if (test_facility(66))
392 S390_lowcore.machine_flags |= MACHINE_FLAG_RRBM;
391#endif 393#endif
392} 394}
393 395
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index ac9122ca1152..04ad4001a289 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -367,6 +367,7 @@ retry:
367 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk 367 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
368 * of starvation. */ 368 * of starvation. */
369 flags &= ~FAULT_FLAG_ALLOW_RETRY; 369 flags &= ~FAULT_FLAG_ALLOW_RETRY;
370 flags |= FAULT_FLAG_TRIED;
370 down_read(&mm->mmap_sem); 371 down_read(&mm->mmap_sem);
371 goto retry; 372 goto retry;
372 } 373 }
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index eeaf8023851f..60acb93a4680 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -115,7 +115,16 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
115 pmd = *pmdp; 115 pmd = *pmdp;
116 barrier(); 116 barrier();
117 next = pmd_addr_end(addr, end); 117 next = pmd_addr_end(addr, end);
118 if (pmd_none(pmd)) 118 /*
119 * The pmd_trans_splitting() check below explains why
120 * pmdp_splitting_flush() has to serialize with
121 * smp_call_function() against our disabled IRQs, to stop
122 * this gup-fast code from running while we set the
123 * splitting bit in the pmd. Returning zero will take
124 * the slow path that will call wait_split_huge_page()
125 * if the pmd is still in splitting state.
126 */
127 if (pmd_none(pmd) || pmd_trans_splitting(pmd))
119 return 0; 128 return 0;
120 if (unlikely(pmd_huge(pmd))) { 129 if (unlikely(pmd_huge(pmd))) {
121 if (!gup_huge_pmd(pmdp, pmd, addr, next, 130 if (!gup_huge_pmd(pmdp, pmd, addr, next,
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index b402991e43d7..c8188a18af05 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -787,6 +787,30 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
787 tlb_table_flush(tlb); 787 tlb_table_flush(tlb);
788} 788}
789 789
790#ifdef CONFIG_TRANSPARENT_HUGEPAGE
791void thp_split_vma(struct vm_area_struct *vma)
792{
793 unsigned long addr;
794 struct page *page;
795
796 for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
797 page = follow_page(vma, addr, FOLL_SPLIT);
798 }
799}
800
801void thp_split_mm(struct mm_struct *mm)
802{
803 struct vm_area_struct *vma = mm->mmap;
804
805 while (vma != NULL) {
806 thp_split_vma(vma);
807 vma->vm_flags &= ~VM_HUGEPAGE;
808 vma->vm_flags |= VM_NOHUGEPAGE;
809 vma = vma->vm_next;
810 }
811}
812#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
813
790/* 814/*
791 * switch on pgstes for its userspace process (for kvm) 815 * switch on pgstes for its userspace process (for kvm)
792 */ 816 */
@@ -824,6 +848,12 @@ int s390_enable_sie(void)
824 if (!mm) 848 if (!mm)
825 return -ENOMEM; 849 return -ENOMEM;
826 850
851#ifdef CONFIG_TRANSPARENT_HUGEPAGE
852 /* split thp mappings and disable thp for future mappings */
853 thp_split_mm(mm);
854 mm->def_flags |= VM_NOHUGEPAGE;
855#endif
856
827 /* Now lets check again if something happened */ 857 /* Now lets check again if something happened */
828 task_lock(tsk); 858 task_lock(tsk);
829 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || 859 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
@@ -866,3 +896,81 @@ bool kernel_page_present(struct page *page)
866 return cc == 0; 896 return cc == 0;
867} 897}
868#endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */ 898#endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */
899
900#ifdef CONFIG_TRANSPARENT_HUGEPAGE
901int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address,
902 pmd_t *pmdp)
903{
904 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
905 /* No need to flush TLB
906 * On s390 reference bits are in storage key and never in TLB */
907 return pmdp_test_and_clear_young(vma, address, pmdp);
908}
909
910int pmdp_set_access_flags(struct vm_area_struct *vma,
911 unsigned long address, pmd_t *pmdp,
912 pmd_t entry, int dirty)
913{
914 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
915
916 if (pmd_same(*pmdp, entry))
917 return 0;
918 pmdp_invalidate(vma, address, pmdp);
919 set_pmd_at(vma->vm_mm, address, pmdp, entry);
920 return 1;
921}
922
923static void pmdp_splitting_flush_sync(void *arg)
924{
925 /* Simply deliver the interrupt */
926}
927
928void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
929 pmd_t *pmdp)
930{
931 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
932 if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT,
933 (unsigned long *) pmdp)) {
934 /* need to serialize against gup-fast (IRQ disabled) */
935 smp_call_function(pmdp_splitting_flush_sync, NULL, 1);
936 }
937}
938
939void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable)
940{
941 struct list_head *lh = (struct list_head *) pgtable;
942
943 assert_spin_locked(&mm->page_table_lock);
944
945 /* FIFO */
946 if (!mm->pmd_huge_pte)
947 INIT_LIST_HEAD(lh);
948 else
949 list_add(lh, (struct list_head *) mm->pmd_huge_pte);
950 mm->pmd_huge_pte = pgtable;
951}
952
953pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm)
954{
955 struct list_head *lh;
956 pgtable_t pgtable;
957 pte_t *ptep;
958
959 assert_spin_locked(&mm->page_table_lock);
960
961 /* FIFO */
962 pgtable = mm->pmd_huge_pte;
963 lh = (struct list_head *) pgtable;
964 if (list_empty(lh))
965 mm->pmd_huge_pte = NULL;
966 else {
967 mm->pmd_huge_pte = (pgtable_t) lh->next;
968 list_del(lh);
969 }
970 ptep = (pte_t *) pgtable;
971 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
972 ptep++;
973 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
974 return pgtable;
975}
976#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 36f5141e8041..3b3e27a3ff2c 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -13,14 +13,17 @@ config SUPERH
13 select HAVE_DMA_ATTRS 13 select HAVE_DMA_ATTRS
14 select HAVE_IRQ_WORK 14 select HAVE_IRQ_WORK
15 select HAVE_PERF_EVENTS 15 select HAVE_PERF_EVENTS
16 select HAVE_DEBUG_BUGVERBOSE
16 select ARCH_HAVE_CUSTOM_GPIO_H 17 select ARCH_HAVE_CUSTOM_GPIO_H
17 select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A) 18 select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A)
18 select PERF_USE_VMALLOC 19 select PERF_USE_VMALLOC
20 select HAVE_DEBUG_KMEMLEAK
19 select HAVE_KERNEL_GZIP 21 select HAVE_KERNEL_GZIP
20 select HAVE_KERNEL_BZIP2 22 select HAVE_KERNEL_BZIP2
21 select HAVE_KERNEL_LZMA 23 select HAVE_KERNEL_LZMA
22 select HAVE_KERNEL_XZ 24 select HAVE_KERNEL_XZ
23 select HAVE_KERNEL_LZO 25 select HAVE_KERNEL_LZO
26 select HAVE_UID16
24 select ARCH_WANT_IPC_PARSE_VERSION 27 select ARCH_WANT_IPC_PARSE_VERSION
25 select HAVE_SYSCALL_TRACEPOINTS 28 select HAVE_SYSCALL_TRACEPOINTS
26 select HAVE_REGS_AND_STACK_ACCESS_API 29 select HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/arch/sh/include/asm/hugetlb.h b/arch/sh/include/asm/hugetlb.h
index 967068fb79ac..b3808c7d67b2 100644
--- a/arch/sh/include/asm/hugetlb.h
+++ b/arch/sh/include/asm/hugetlb.h
@@ -1,6 +1,7 @@
1#ifndef _ASM_SH_HUGETLB_H 1#ifndef _ASM_SH_HUGETLB_H
2#define _ASM_SH_HUGETLB_H 2#define _ASM_SH_HUGETLB_H
3 3
4#include <asm/cacheflush.h>
4#include <asm/page.h> 5#include <asm/page.h>
5 6
6 7
@@ -89,4 +90,9 @@ static inline void arch_release_hugepage(struct page *page)
89{ 90{
90} 91}
91 92
93static inline void arch_clear_hugepage_flags(struct page *page)
94{
95 clear_bit(PG_dcache_clean, &page->flags);
96}
97
92#endif /* _ASM_SH_HUGETLB_H */ 98#endif /* _ASM_SH_HUGETLB_H */
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 3bdc1ad9a341..cbbdcad8fcb3 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -504,6 +504,7 @@ good_area:
504 } 504 }
505 if (fault & VM_FAULT_RETRY) { 505 if (fault & VM_FAULT_RETRY) {
506 flags &= ~FAULT_FLAG_ALLOW_RETRY; 506 flags &= ~FAULT_FLAG_ALLOW_RETRY;
507 flags |= FAULT_FLAG_TRIED;
507 508
508 /* 509 /*
509 * No need to up_read(&mm->mmap_sem) as we would 510 * No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 67f1f6f5f4e1..91c780c973ba 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -18,6 +18,7 @@ config SPARC
18 select HAVE_OPROFILE 18 select HAVE_OPROFILE
19 select HAVE_ARCH_KGDB if !SMP || SPARC64 19 select HAVE_ARCH_KGDB if !SMP || SPARC64
20 select HAVE_ARCH_TRACEHOOK 20 select HAVE_ARCH_TRACEHOOK
21 select SYSCTL_EXCEPTION_TRACE
21 select ARCH_WANT_OPTIONAL_GPIOLIB 22 select ARCH_WANT_OPTIONAL_GPIOLIB
22 select RTC_CLASS 23 select RTC_CLASS
23 select RTC_DRV_M48T59 24 select RTC_DRV_M48T59
@@ -32,6 +33,7 @@ config SPARC
32 select GENERIC_PCI_IOMAP 33 select GENERIC_PCI_IOMAP
33 select HAVE_NMI_WATCHDOG if SPARC64 34 select HAVE_NMI_WATCHDOG if SPARC64
34 select HAVE_BPF_JIT 35 select HAVE_BPF_JIT
36 select HAVE_DEBUG_BUGVERBOSE
35 select GENERIC_SMP_IDLE_THREAD 37 select GENERIC_SMP_IDLE_THREAD
36 select GENERIC_CMOS_UPDATE 38 select GENERIC_CMOS_UPDATE
37 select GENERIC_CLOCKEVENTS 39 select GENERIC_CLOCKEVENTS
@@ -42,6 +44,7 @@ config SPARC32
42 def_bool !64BIT 44 def_bool !64BIT
43 select GENERIC_ATOMIC64 45 select GENERIC_ATOMIC64
44 select CLZ_TAB 46 select CLZ_TAB
47 select HAVE_UID16
45 48
46config SPARC64 49config SPARC64
47 def_bool 64BIT 50 def_bool 64BIT
@@ -59,6 +62,7 @@ config SPARC64
59 select HAVE_DYNAMIC_FTRACE 62 select HAVE_DYNAMIC_FTRACE
60 select HAVE_FTRACE_MCOUNT_RECORD 63 select HAVE_FTRACE_MCOUNT_RECORD
61 select HAVE_SYSCALL_TRACEPOINTS 64 select HAVE_SYSCALL_TRACEPOINTS
65 select HAVE_DEBUG_KMEMLEAK
62 select RTC_DRV_CMOS 66 select RTC_DRV_CMOS
63 select RTC_DRV_BQ4802 67 select RTC_DRV_BQ4802
64 select RTC_DRV_SUN4V 68 select RTC_DRV_SUN4V
@@ -226,25 +230,6 @@ config EARLYFB
226 help 230 help
227 Say Y here to enable a faster early framebuffer boot console. 231 Say Y here to enable a faster early framebuffer boot console.
228 232
229choice
230 prompt "Kernel page size" if SPARC64
231 default SPARC64_PAGE_SIZE_8KB
232
233config SPARC64_PAGE_SIZE_8KB
234 bool "8KB"
235 help
236 This lets you select the page size of the kernel.
237
238 8KB and 64KB work quite well, since SPARC ELF sections
239 provide for up to 64KB alignment.
240
241 If you don't know what to do, choose 8KB.
242
243config SPARC64_PAGE_SIZE_64KB
244 bool "64KB"
245
246endchoice
247
248config SECCOMP 233config SECCOMP
249 bool "Enable seccomp to safely compute untrusted bytecode" 234 bool "Enable seccomp to safely compute untrusted bytecode"
250 depends on SPARC64 && PROC_FS 235 depends on SPARC64 && PROC_FS
@@ -316,23 +301,6 @@ config GENERIC_LOCKBREAK
316 default y 301 default y
317 depends on SPARC64 && SMP && PREEMPT 302 depends on SPARC64 && SMP && PREEMPT
318 303
319choice
320 prompt "SPARC64 Huge TLB Page Size"
321 depends on SPARC64 && HUGETLB_PAGE
322 default HUGETLB_PAGE_SIZE_4MB
323
324config HUGETLB_PAGE_SIZE_4MB
325 bool "4MB"
326
327config HUGETLB_PAGE_SIZE_512K
328 bool "512K"
329
330config HUGETLB_PAGE_SIZE_64K
331 depends on !SPARC64_PAGE_SIZE_64KB
332 bool "64K"
333
334endchoice
335
336config NUMA 304config NUMA
337 bool "NUMA support" 305 bool "NUMA support"
338 depends on SPARC64 && SMP 306 depends on SPARC64 && SMP
@@ -571,6 +539,7 @@ config COMPAT
571 depends on SPARC64 539 depends on SPARC64
572 default y 540 default y
573 select COMPAT_BINFMT_ELF 541 select COMPAT_BINFMT_ELF
542 select HAVE_UID16
574 select ARCH_WANT_OLD_COMPAT_IPC 543 select ARCH_WANT_OLD_COMPAT_IPC
575 544
576config SYSVIPC_COMPAT 545config SYSVIPC_COMPAT
diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h
index 177061064ee6..8c5eed6d267f 100644
--- a/arch/sparc/include/asm/hugetlb.h
+++ b/arch/sparc/include/asm/hugetlb.h
@@ -10,7 +10,10 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
10pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, 10pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
11 pte_t *ptep); 11 pte_t *ptep);
12 12
13void hugetlb_prefault_arch_hook(struct mm_struct *mm); 13static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
14{
15 hugetlb_setup(mm);
16}
14 17
15static inline int is_hugepage_only_range(struct mm_struct *mm, 18static inline int is_hugepage_only_range(struct mm_struct *mm,
16 unsigned long addr, 19 unsigned long addr,
@@ -82,4 +85,8 @@ static inline void arch_release_hugepage(struct page *page)
82{ 85{
83} 86}
84 87
88static inline void arch_clear_hugepage_flags(struct page *page)
89{
90}
91
85#endif /* _ASM_SPARC64_HUGETLB_H */ 92#endif /* _ASM_SPARC64_HUGETLB_H */
diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h
index 9067dc500535..76092c4dd277 100644
--- a/arch/sparc/include/asm/mmu_64.h
+++ b/arch/sparc/include/asm/mmu_64.h
@@ -30,22 +30,8 @@
30#define CTX_PGSZ_MASK ((CTX_PGSZ_BITS << CTX_PGSZ0_SHIFT) | \ 30#define CTX_PGSZ_MASK ((CTX_PGSZ_BITS << CTX_PGSZ0_SHIFT) | \
31 (CTX_PGSZ_BITS << CTX_PGSZ1_SHIFT)) 31 (CTX_PGSZ_BITS << CTX_PGSZ1_SHIFT))
32 32
33#if defined(CONFIG_SPARC64_PAGE_SIZE_8KB)
34#define CTX_PGSZ_BASE CTX_PGSZ_8KB 33#define CTX_PGSZ_BASE CTX_PGSZ_8KB
35#elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB) 34#define CTX_PGSZ_HUGE CTX_PGSZ_4MB
36#define CTX_PGSZ_BASE CTX_PGSZ_64KB
37#else
38#error No page size specified in kernel configuration
39#endif
40
41#if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
42#define CTX_PGSZ_HUGE CTX_PGSZ_4MB
43#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
44#define CTX_PGSZ_HUGE CTX_PGSZ_512KB
45#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
46#define CTX_PGSZ_HUGE CTX_PGSZ_64KB
47#endif
48
49#define CTX_PGSZ_KERN CTX_PGSZ_4MB 35#define CTX_PGSZ_KERN CTX_PGSZ_4MB
50 36
51/* Thus, when running on UltraSPARC-III+ and later, we use the following 37/* Thus, when running on UltraSPARC-III+ and later, we use the following
@@ -96,7 +82,7 @@ struct tsb_config {
96 82
97#define MM_TSB_BASE 0 83#define MM_TSB_BASE 0
98 84
99#ifdef CONFIG_HUGETLB_PAGE 85#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
100#define MM_TSB_HUGE 1 86#define MM_TSB_HUGE 1
101#define MM_NUM_TSBS 2 87#define MM_NUM_TSBS 2
102#else 88#else
@@ -107,6 +93,7 @@ typedef struct {
107 spinlock_t lock; 93 spinlock_t lock;
108 unsigned long sparc64_ctx_val; 94 unsigned long sparc64_ctx_val;
109 unsigned long huge_pte_count; 95 unsigned long huge_pte_count;
96 struct page *pgtable_page;
110 struct tsb_config tsb_block[MM_NUM_TSBS]; 97 struct tsb_config tsb_block[MM_NUM_TSBS];
111 struct hv_tsb_descr tsb_descr[MM_NUM_TSBS]; 98 struct hv_tsb_descr tsb_descr[MM_NUM_TSBS];
112} mm_context_t; 99} mm_context_t;
diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h
index a97fd085cebe..9191ca62ed9c 100644
--- a/arch/sparc/include/asm/mmu_context_64.h
+++ b/arch/sparc/include/asm/mmu_context_64.h
@@ -36,7 +36,7 @@ static inline void tsb_context_switch(struct mm_struct *mm)
36{ 36{
37 __tsb_context_switch(__pa(mm->pgd), 37 __tsb_context_switch(__pa(mm->pgd),
38 &mm->context.tsb_block[0], 38 &mm->context.tsb_block[0],
39#ifdef CONFIG_HUGETLB_PAGE 39#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
40 (mm->context.tsb_block[1].tsb ? 40 (mm->context.tsb_block[1].tsb ?
41 &mm->context.tsb_block[1] : 41 &mm->context.tsb_block[1] :
42 NULL) 42 NULL)
diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
index f0d09b401036..4b39f74d6ca0 100644
--- a/arch/sparc/include/asm/page_64.h
+++ b/arch/sparc/include/asm/page_64.h
@@ -3,13 +3,7 @@
3 3
4#include <linux/const.h> 4#include <linux/const.h>
5 5
6#if defined(CONFIG_SPARC64_PAGE_SIZE_8KB)
7#define PAGE_SHIFT 13 6#define PAGE_SHIFT 13
8#elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB)
9#define PAGE_SHIFT 16
10#else
11#error No page size specified in kernel configuration
12#endif
13 7
14#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) 8#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
15#define PAGE_MASK (~(PAGE_SIZE-1)) 9#define PAGE_MASK (~(PAGE_SIZE-1))
@@ -21,15 +15,9 @@
21#define DCACHE_ALIASING_POSSIBLE 15#define DCACHE_ALIASING_POSSIBLE
22#endif 16#endif
23 17
24#if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
25#define HPAGE_SHIFT 22 18#define HPAGE_SHIFT 22
26#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
27#define HPAGE_SHIFT 19
28#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
29#define HPAGE_SHIFT 16
30#endif
31 19
32#ifdef CONFIG_HUGETLB_PAGE 20#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
33#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) 21#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
34#define HPAGE_MASK (~(HPAGE_SIZE - 1UL)) 22#define HPAGE_MASK (~(HPAGE_SIZE - 1UL))
35#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) 23#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
@@ -38,6 +26,11 @@
38 26
39#ifndef __ASSEMBLY__ 27#ifndef __ASSEMBLY__
40 28
29#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
30struct mm_struct;
31extern void hugetlb_setup(struct mm_struct *mm);
32#endif
33
41#define WANT_PAGE_VIRTUAL 34#define WANT_PAGE_VIRTUAL
42 35
43extern void _clear_page(void *page); 36extern void _clear_page(void *page);
@@ -98,7 +91,7 @@ typedef unsigned long pgprot_t;
98 91
99#endif /* (STRICT_MM_TYPECHECKS) */ 92#endif /* (STRICT_MM_TYPECHECKS) */
100 93
101typedef struct page *pgtable_t; 94typedef pte_t *pgtable_t;
102 95
103#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \ 96#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \
104 (_AC(0x0000000070000000,UL)) : \ 97 (_AC(0x0000000070000000,UL)) : \
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 40b2d7a7023d..bcfe063bce23 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -38,51 +38,20 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
38 kmem_cache_free(pgtable_cache, pmd); 38 kmem_cache_free(pgtable_cache, pmd);
39} 39}
40 40
41static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, 41extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
42 unsigned long address) 42 unsigned long address);
43{ 43extern pgtable_t pte_alloc_one(struct mm_struct *mm,
44 return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); 44 unsigned long address);
45} 45extern void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
46 46extern void pte_free(struct mm_struct *mm, pgtable_t ptepage);
47static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
48 unsigned long address)
49{
50 struct page *page;
51 pte_t *pte;
52
53 pte = pte_alloc_one_kernel(mm, address);
54 if (!pte)
55 return NULL;
56 page = virt_to_page(pte);
57 pgtable_page_ctor(page);
58 return page;
59}
60
61static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
62{
63 free_page((unsigned long)pte);
64}
65
66static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
67{
68 pgtable_page_dtor(ptepage);
69 __free_page(ptepage);
70}
71 47
72#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE) 48#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(MM, PMD, PTE)
73#define pmd_populate(MM,PMD,PTE_PAGE) \ 49#define pmd_populate(MM, PMD, PTE) pmd_set(MM, PMD, PTE)
74 pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE)) 50#define pmd_pgtable(PMD) ((pte_t *)__pmd_page(PMD))
75#define pmd_pgtable(pmd) pmd_page(pmd)
76 51
77#define check_pgt_cache() do { } while (0) 52#define check_pgt_cache() do { } while (0)
78 53
79static inline void pgtable_free(void *table, bool is_page) 54extern void pgtable_free(void *table, bool is_page);
80{
81 if (is_page)
82 free_page((unsigned long)table);
83 else
84 kmem_cache_free(pgtable_cache, table);
85}
86 55
87#ifdef CONFIG_SMP 56#ifdef CONFIG_SMP
88 57
@@ -113,11 +82,10 @@ static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, bool is
113} 82}
114#endif /* !CONFIG_SMP */ 83#endif /* !CONFIG_SMP */
115 84
116static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage, 85static inline void __pte_free_tlb(struct mmu_gather *tlb, pte_t *pte,
117 unsigned long address) 86 unsigned long address)
118{ 87{
119 pgtable_page_dtor(ptepage); 88 pgtable_free_tlb(tlb, pte, true);
120 pgtable_free_tlb(tlb, page_address(ptepage), true);
121} 89}
122 90
123#define __pmd_free_tlb(tlb, pmd, addr) \ 91#define __pmd_free_tlb(tlb, pmd, addr) \
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 61210db139fb..95515f1e7cef 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -45,40 +45,59 @@
45 45
46#define vmemmap ((struct page *)VMEMMAP_BASE) 46#define vmemmap ((struct page *)VMEMMAP_BASE)
47 47
48/* XXX All of this needs to be rethought so we can take advantage
49 * XXX cheetah's full 64-bit virtual address space, ie. no more hole
50 * XXX in the middle like on spitfire. -DaveM
51 */
52/*
53 * Given a virtual address, the lowest PAGE_SHIFT bits determine offset
54 * into the page; the next higher PAGE_SHIFT-3 bits determine the pte#
55 * in the proper pagetable (the -3 is from the 8 byte ptes, and each page
56 * table is a single page long). The next higher PMD_BITS determine pmd#
57 * in the proper pmdtable (where we must have PMD_BITS <= (PAGE_SHIFT-2)
58 * since the pmd entries are 4 bytes, and each pmd page is a single page
59 * long). Finally, the higher few bits determine pgde#.
60 */
61
62/* PMD_SHIFT determines the size of the area a second-level page 48/* PMD_SHIFT determines the size of the area a second-level page
63 * table can map 49 * table can map
64 */ 50 */
65#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3)) 51#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-4))
66#define PMD_SIZE (_AC(1,UL) << PMD_SHIFT) 52#define PMD_SIZE (_AC(1,UL) << PMD_SHIFT)
67#define PMD_MASK (~(PMD_SIZE-1)) 53#define PMD_MASK (~(PMD_SIZE-1))
68#define PMD_BITS (PAGE_SHIFT - 2) 54#define PMD_BITS (PAGE_SHIFT - 2)
69 55
70/* PGDIR_SHIFT determines what a third-level page table entry can map */ 56/* PGDIR_SHIFT determines what a third-level page table entry can map */
71#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3) + PMD_BITS) 57#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-4) + PMD_BITS)
72#define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT) 58#define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT)
73#define PGDIR_MASK (~(PGDIR_SIZE-1)) 59#define PGDIR_MASK (~(PGDIR_SIZE-1))
74#define PGDIR_BITS (PAGE_SHIFT - 2) 60#define PGDIR_BITS (PAGE_SHIFT - 2)
75 61
62#if (PGDIR_SHIFT + PGDIR_BITS) != 44
63#error Page table parameters do not cover virtual address space properly.
64#endif
65
66#if (PMD_SHIFT != HPAGE_SHIFT)
67#error PMD_SHIFT must equal HPAGE_SHIFT for transparent huge pages.
68#endif
69
70/* PMDs point to PTE tables which are 4K aligned. */
71#define PMD_PADDR _AC(0xfffffffe,UL)
72#define PMD_PADDR_SHIFT _AC(11,UL)
73
74#ifdef CONFIG_TRANSPARENT_HUGEPAGE
75#define PMD_ISHUGE _AC(0x00000001,UL)
76
77/* This is the PMD layout when PMD_ISHUGE is set. With 4MB huge
78 * pages, this frees up a bunch of bits in the layout that we can
79 * use for the protection settings and software metadata.
80 */
81#define PMD_HUGE_PADDR _AC(0xfffff800,UL)
82#define PMD_HUGE_PROTBITS _AC(0x000007ff,UL)
83#define PMD_HUGE_PRESENT _AC(0x00000400,UL)
84#define PMD_HUGE_WRITE _AC(0x00000200,UL)
85#define PMD_HUGE_DIRTY _AC(0x00000100,UL)
86#define PMD_HUGE_ACCESSED _AC(0x00000080,UL)
87#define PMD_HUGE_EXEC _AC(0x00000040,UL)
88#define PMD_HUGE_SPLITTING _AC(0x00000020,UL)
89#endif
90
91/* PGDs point to PMD tables which are 8K aligned. */
92#define PGD_PADDR _AC(0xfffffffc,UL)
93#define PGD_PADDR_SHIFT _AC(11,UL)
94
76#ifndef __ASSEMBLY__ 95#ifndef __ASSEMBLY__
77 96
78#include <linux/sched.h> 97#include <linux/sched.h>
79 98
80/* Entries per page directory level. */ 99/* Entries per page directory level. */
81#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3)) 100#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-4))
82#define PTRS_PER_PMD (1UL << PMD_BITS) 101#define PTRS_PER_PMD (1UL << PMD_BITS)
83#define PTRS_PER_PGD (1UL << PGDIR_BITS) 102#define PTRS_PER_PGD (1UL << PGDIR_BITS)
84 103
@@ -160,26 +179,11 @@
160#define _PAGE_SZ8K_4V _AC(0x0000000000000000,UL) /* 8K Page */ 179#define _PAGE_SZ8K_4V _AC(0x0000000000000000,UL) /* 8K Page */
161#define _PAGE_SZALL_4V _AC(0x0000000000000007,UL) /* All pgsz bits */ 180#define _PAGE_SZALL_4V _AC(0x0000000000000007,UL) /* All pgsz bits */
162 181
163#if PAGE_SHIFT == 13
164#define _PAGE_SZBITS_4U _PAGE_SZ8K_4U 182#define _PAGE_SZBITS_4U _PAGE_SZ8K_4U
165#define _PAGE_SZBITS_4V _PAGE_SZ8K_4V 183#define _PAGE_SZBITS_4V _PAGE_SZ8K_4V
166#elif PAGE_SHIFT == 16
167#define _PAGE_SZBITS_4U _PAGE_SZ64K_4U
168#define _PAGE_SZBITS_4V _PAGE_SZ64K_4V
169#else
170#error Wrong PAGE_SHIFT specified
171#endif
172 184
173#if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
174#define _PAGE_SZHUGE_4U _PAGE_SZ4MB_4U 185#define _PAGE_SZHUGE_4U _PAGE_SZ4MB_4U
175#define _PAGE_SZHUGE_4V _PAGE_SZ4MB_4V 186#define _PAGE_SZHUGE_4V _PAGE_SZ4MB_4V
176#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
177#define _PAGE_SZHUGE_4U _PAGE_SZ512K_4U
178#define _PAGE_SZHUGE_4V _PAGE_SZ512K_4V
179#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
180#define _PAGE_SZHUGE_4U _PAGE_SZ64K_4U
181#define _PAGE_SZHUGE_4V _PAGE_SZ64K_4V
182#endif
183 187
184/* These are actually filled in at boot time by sun4{u,v}_pgprot_init() */ 188/* These are actually filled in at boot time by sun4{u,v}_pgprot_init() */
185#define __P000 __pgprot(0) 189#define __P000 __pgprot(0)
@@ -218,7 +222,6 @@ extern unsigned long _PAGE_CACHE;
218 222
219extern unsigned long pg_iobits; 223extern unsigned long pg_iobits;
220extern unsigned long _PAGE_ALL_SZ_BITS; 224extern unsigned long _PAGE_ALL_SZ_BITS;
221extern unsigned long _PAGE_SZBITS;
222 225
223extern struct page *mem_map_zero; 226extern struct page *mem_map_zero;
224#define ZERO_PAGE(vaddr) (mem_map_zero) 227#define ZERO_PAGE(vaddr) (mem_map_zero)
@@ -231,25 +234,25 @@ extern struct page *mem_map_zero;
231static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) 234static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
232{ 235{
233 unsigned long paddr = pfn << PAGE_SHIFT; 236 unsigned long paddr = pfn << PAGE_SHIFT;
234 unsigned long sz_bits; 237
235 238 BUILD_BUG_ON(_PAGE_SZBITS_4U != 0UL || _PAGE_SZBITS_4V != 0UL);
236 sz_bits = 0UL; 239 return __pte(paddr | pgprot_val(prot));
237 if (_PAGE_SZBITS_4U != 0UL || _PAGE_SZBITS_4V != 0UL) {
238 __asm__ __volatile__(
239 "\n661: sethi %%uhi(%1), %0\n"
240 " sllx %0, 32, %0\n"
241 " .section .sun4v_2insn_patch, \"ax\"\n"
242 " .word 661b\n"
243 " mov %2, %0\n"
244 " nop\n"
245 " .previous\n"
246 : "=r" (sz_bits)
247 : "i" (_PAGE_SZBITS_4U), "i" (_PAGE_SZBITS_4V));
248 }
249 return __pte(paddr | sz_bits | pgprot_val(prot));
250} 240}
251#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) 241#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
252 242
243#ifdef CONFIG_TRANSPARENT_HUGEPAGE
244extern pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot);
245#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
246
247extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
248
249static inline pmd_t pmd_mkhuge(pmd_t pmd)
250{
251 /* Do nothing, mk_pmd() does this part. */
252 return pmd;
253}
254#endif
255
253/* This one can be done with two shifts. */ 256/* This one can be done with two shifts. */
254static inline unsigned long pte_pfn(pte_t pte) 257static inline unsigned long pte_pfn(pte_t pte)
255{ 258{
@@ -286,6 +289,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot)
286 * Note: We encode this into 3 sun4v 2-insn patch sequences. 289 * Note: We encode this into 3 sun4v 2-insn patch sequences.
287 */ 290 */
288 291
292 BUILD_BUG_ON(_PAGE_SZBITS_4U != 0UL || _PAGE_SZBITS_4V != 0UL);
289 __asm__ __volatile__( 293 __asm__ __volatile__(
290 "\n661: sethi %%uhi(%2), %1\n" 294 "\n661: sethi %%uhi(%2), %1\n"
291 " sethi %%hi(%2), %0\n" 295 " sethi %%hi(%2), %0\n"
@@ -307,10 +311,10 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot)
307 : "=r" (mask), "=r" (tmp) 311 : "=r" (mask), "=r" (tmp)
308 : "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U | 312 : "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U |
309 _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | _PAGE_PRESENT_4U | 313 _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | _PAGE_PRESENT_4U |
310 _PAGE_SZBITS_4U | _PAGE_SPECIAL), 314 _PAGE_SPECIAL),
311 "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V | 315 "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V |
312 _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | _PAGE_PRESENT_4V | 316 _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | _PAGE_PRESENT_4V |
313 _PAGE_SZBITS_4V | _PAGE_SPECIAL)); 317 _PAGE_SPECIAL));
314 318
315 return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask)); 319 return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask));
316} 320}
@@ -618,19 +622,130 @@ static inline unsigned long pte_special(pte_t pte)
618 return pte_val(pte) & _PAGE_SPECIAL; 622 return pte_val(pte) & _PAGE_SPECIAL;
619} 623}
620 624
621#define pmd_set(pmdp, ptep) \ 625#ifdef CONFIG_TRANSPARENT_HUGEPAGE
622 (pmd_val(*(pmdp)) = (__pa((unsigned long) (ptep)) >> 11UL)) 626static inline int pmd_young(pmd_t pmd)
627{
628 return pmd_val(pmd) & PMD_HUGE_ACCESSED;
629}
630
631static inline int pmd_write(pmd_t pmd)
632{
633 return pmd_val(pmd) & PMD_HUGE_WRITE;
634}
635
636static inline unsigned long pmd_pfn(pmd_t pmd)
637{
638 unsigned long val = pmd_val(pmd) & PMD_HUGE_PADDR;
639
640 return val >> (PAGE_SHIFT - PMD_PADDR_SHIFT);
641}
642
643static inline int pmd_large(pmd_t pmd)
644{
645 return (pmd_val(pmd) & (PMD_ISHUGE | PMD_HUGE_PRESENT)) ==
646 (PMD_ISHUGE | PMD_HUGE_PRESENT);
647}
648
649static inline int pmd_trans_splitting(pmd_t pmd)
650{
651 return (pmd_val(pmd) & (PMD_ISHUGE|PMD_HUGE_SPLITTING)) ==
652 (PMD_ISHUGE|PMD_HUGE_SPLITTING);
653}
654
655static inline int pmd_trans_huge(pmd_t pmd)
656{
657 return pmd_val(pmd) & PMD_ISHUGE;
658}
659
660#define has_transparent_hugepage() 1
661
662static inline pmd_t pmd_mkold(pmd_t pmd)
663{
664 pmd_val(pmd) &= ~PMD_HUGE_ACCESSED;
665 return pmd;
666}
667
668static inline pmd_t pmd_wrprotect(pmd_t pmd)
669{
670 pmd_val(pmd) &= ~PMD_HUGE_WRITE;
671 return pmd;
672}
673
674static inline pmd_t pmd_mkdirty(pmd_t pmd)
675{
676 pmd_val(pmd) |= PMD_HUGE_DIRTY;
677 return pmd;
678}
679
680static inline pmd_t pmd_mkyoung(pmd_t pmd)
681{
682 pmd_val(pmd) |= PMD_HUGE_ACCESSED;
683 return pmd;
684}
685
686static inline pmd_t pmd_mkwrite(pmd_t pmd)
687{
688 pmd_val(pmd) |= PMD_HUGE_WRITE;
689 return pmd;
690}
691
692static inline pmd_t pmd_mknotpresent(pmd_t pmd)
693{
694 pmd_val(pmd) &= ~PMD_HUGE_PRESENT;
695 return pmd;
696}
697
698static inline pmd_t pmd_mksplitting(pmd_t pmd)
699{
700 pmd_val(pmd) |= PMD_HUGE_SPLITTING;
701 return pmd;
702}
703
704extern pgprot_t pmd_pgprot(pmd_t entry);
705#endif
706
707static inline int pmd_present(pmd_t pmd)
708{
709 return pmd_val(pmd) != 0U;
710}
711
712#define pmd_none(pmd) (!pmd_val(pmd))
713
714#ifdef CONFIG_TRANSPARENT_HUGEPAGE
715extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
716 pmd_t *pmdp, pmd_t pmd);
717#else
718static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
719 pmd_t *pmdp, pmd_t pmd)
720{
721 *pmdp = pmd;
722}
723#endif
724
725static inline void pmd_set(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
726{
727 unsigned long val = __pa((unsigned long) (ptep)) >> PMD_PADDR_SHIFT;
728
729 pmd_val(*pmdp) = val;
730}
731
623#define pud_set(pudp, pmdp) \ 732#define pud_set(pudp, pmdp) \
624 (pud_val(*(pudp)) = (__pa((unsigned long) (pmdp)) >> 11UL)) 733 (pud_val(*(pudp)) = (__pa((unsigned long) (pmdp)) >> PGD_PADDR_SHIFT))
625#define __pmd_page(pmd) \ 734static inline unsigned long __pmd_page(pmd_t pmd)
626 ((unsigned long) __va((((unsigned long)pmd_val(pmd))<<11UL))) 735{
736 unsigned long paddr = (unsigned long) pmd_val(pmd);
737#ifdef CONFIG_TRANSPARENT_HUGEPAGE
738 if (pmd_val(pmd) & PMD_ISHUGE)
739 paddr &= PMD_HUGE_PADDR;
740#endif
741 paddr <<= PMD_PADDR_SHIFT;
742 return ((unsigned long) __va(paddr));
743}
627#define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd)) 744#define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd))
628#define pud_page_vaddr(pud) \ 745#define pud_page_vaddr(pud) \
629 ((unsigned long) __va((((unsigned long)pud_val(pud))<<11UL))) 746 ((unsigned long) __va((((unsigned long)pud_val(pud))<<PGD_PADDR_SHIFT)))
630#define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud)) 747#define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud))
631#define pmd_none(pmd) (!pmd_val(pmd))
632#define pmd_bad(pmd) (0) 748#define pmd_bad(pmd) (0)
633#define pmd_present(pmd) (pmd_val(pmd) != 0U)
634#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0U) 749#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0U)
635#define pud_none(pud) (!pud_val(pud)) 750#define pud_none(pud) (!pud_val(pud))
636#define pud_bad(pud) (0) 751#define pud_bad(pud) (0)
@@ -664,6 +779,16 @@ static inline unsigned long pte_special(pte_t pte)
664extern void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, 779extern void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
665 pte_t *ptep, pte_t orig, int fullmm); 780 pte_t *ptep, pte_t orig, int fullmm);
666 781
782#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
783static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
784 unsigned long addr,
785 pmd_t *pmdp)
786{
787 pmd_t pmd = *pmdp;
788 set_pmd_at(mm, addr, pmdp, __pmd(0U));
789 return pmd;
790}
791
667static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, 792static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
668 pte_t *ptep, pte_t pte, int fullmm) 793 pte_t *ptep, pte_t pte, int fullmm)
669{ 794{
@@ -719,6 +844,16 @@ extern void mmu_info(struct seq_file *);
719 844
720struct vm_area_struct; 845struct vm_area_struct;
721extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *); 846extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
847#ifdef CONFIG_TRANSPARENT_HUGEPAGE
848extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
849 pmd_t *pmd);
850
851#define __HAVE_ARCH_PGTABLE_DEPOSIT
852extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable);
853
854#define __HAVE_ARCH_PGTABLE_WITHDRAW
855extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm);
856#endif
722 857
723/* Encode and de-code a swap entry */ 858/* Encode and de-code a swap entry */
724#define __swp_type(entry) (((entry).val >> PAGE_SHIFT) & 0xffUL) 859#define __swp_type(entry) (((entry).val >> PAGE_SHIFT) & 0xffUL)
diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h
index 1a8afd1ad04f..b4c258de4443 100644
--- a/arch/sparc/include/asm/tsb.h
+++ b/arch/sparc/include/asm/tsb.h
@@ -147,20 +147,96 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
147 brz,pn REG1, FAIL_LABEL; \ 147 brz,pn REG1, FAIL_LABEL; \
148 sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ 148 sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
149 srlx REG2, 64 - PAGE_SHIFT, REG2; \ 149 srlx REG2, 64 - PAGE_SHIFT, REG2; \
150 sllx REG1, 11, REG1; \ 150 sllx REG1, PGD_PADDR_SHIFT, REG1; \
151 andn REG2, 0x3, REG2; \ 151 andn REG2, 0x3, REG2; \
152 lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ 152 lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
153 brz,pn REG1, FAIL_LABEL; \ 153 brz,pn REG1, FAIL_LABEL; \
154 sllx VADDR, 64 - PMD_SHIFT, REG2; \ 154 sllx VADDR, 64 - PMD_SHIFT, REG2; \
155 srlx REG2, 64 - PAGE_SHIFT, REG2; \ 155 srlx REG2, 64 - (PAGE_SHIFT - 1), REG2; \
156 sllx REG1, 11, REG1; \ 156 sllx REG1, PMD_PADDR_SHIFT, REG1; \
157 andn REG2, 0x7, REG2; \ 157 andn REG2, 0x7, REG2; \
158 add REG1, REG2, REG1; 158 add REG1, REG2, REG1;
159 159
160 /* Do a user page table walk in MMU globals. Leaves physical PTE 160 /* This macro exists only to make the PMD translator below easier
161 * pointer in REG1. Jumps to FAIL_LABEL on early page table walk 161 * to read. It hides the ELF section switch for the sun4v code
162 * termination. Physical base of page tables is in PHYS_PGD which 162 * patching.
163 * will not be modified. 163 */
164#define OR_PTE_BIT(REG, NAME) \
165661: or REG, _PAGE_##NAME##_4U, REG; \
166 .section .sun4v_1insn_patch, "ax"; \
167 .word 661b; \
168 or REG, _PAGE_##NAME##_4V, REG; \
169 .previous;
170
171 /* Load into REG the PTE value for VALID, CACHE, and SZHUGE. */
172#define BUILD_PTE_VALID_SZHUGE_CACHE(REG) \
173661: sethi %uhi(_PAGE_VALID|_PAGE_SZHUGE_4U), REG; \
174 .section .sun4v_1insn_patch, "ax"; \
175 .word 661b; \
176 sethi %uhi(_PAGE_VALID), REG; \
177 .previous; \
178 sllx REG, 32, REG; \
179661: or REG, _PAGE_CP_4U|_PAGE_CV_4U, REG; \
180 .section .sun4v_1insn_patch, "ax"; \
181 .word 661b; \
182 or REG, _PAGE_CP_4V|_PAGE_CV_4V|_PAGE_SZHUGE_4V, REG; \
183 .previous;
184
185 /* PMD has been loaded into REG1, interpret the value, seeing
186 * if it is a HUGE PMD or a normal one. If it is not valid
187 * then jump to FAIL_LABEL. If it is a HUGE PMD, and it
188 * translates to a valid PTE, branch to PTE_LABEL.
189 *
190 * We translate the PMD by hand, one bit at a time,
191 * constructing the huge PTE.
192 *
193 * So we construct the PTE in REG2 as follows:
194 *
195 * 1) Extract the PMD PFN from REG1 and place it into REG2.
196 *
197 * 2) Translate PMD protection bits in REG1 into REG2, one bit
198 * at a time using andcc tests on REG1 and OR's into REG2.
199 *
200 * Only two bits to be concerned with here, EXEC and WRITE.
201 * Now REG1 is freed up and we can use it as a temporary.
202 *
203 * 3) Construct the VALID, CACHE, and page size PTE bits in
204 * REG1, OR with REG2 to form final PTE.
205 */
206#ifdef CONFIG_TRANSPARENT_HUGEPAGE
207#define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
208 brz,pn REG1, FAIL_LABEL; \
209 andcc REG1, PMD_ISHUGE, %g0; \
210 be,pt %xcc, 700f; \
211 and REG1, PMD_HUGE_PRESENT|PMD_HUGE_ACCESSED, REG2; \
212 cmp REG2, PMD_HUGE_PRESENT|PMD_HUGE_ACCESSED; \
213 bne,pn %xcc, FAIL_LABEL; \
214 andn REG1, PMD_HUGE_PROTBITS, REG2; \
215 sllx REG2, PMD_PADDR_SHIFT, REG2; \
216 /* REG2 now holds PFN << PAGE_SHIFT */ \
217 andcc REG1, PMD_HUGE_EXEC, %g0; \
218 bne,a,pt %xcc, 1f; \
219 OR_PTE_BIT(REG2, EXEC); \
2201: andcc REG1, PMD_HUGE_WRITE, %g0; \
221 bne,a,pt %xcc, 1f; \
222 OR_PTE_BIT(REG2, W); \
223 /* REG1 can now be clobbered, build final PTE */ \
2241: BUILD_PTE_VALID_SZHUGE_CACHE(REG1); \
225 ba,pt %xcc, PTE_LABEL; \
226 or REG1, REG2, REG1; \
227700:
228#else
229#define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
230 brz,pn REG1, FAIL_LABEL; \
231 nop;
232#endif
233
234 /* Do a user page table walk in MMU globals. Leaves final,
235 * valid, PTE value in REG1. Jumps to FAIL_LABEL on early
236 * page table walk termination or if the PTE is not valid.
237 *
238 * Physical base of page tables is in PHYS_PGD which will not
239 * be modified.
164 * 240 *
165 * VADDR will not be clobbered, but REG1 and REG2 will. 241 * VADDR will not be clobbered, but REG1 and REG2 will.
166 */ 242 */
@@ -172,15 +248,19 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
172 brz,pn REG1, FAIL_LABEL; \ 248 brz,pn REG1, FAIL_LABEL; \
173 sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ 249 sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
174 srlx REG2, 64 - PAGE_SHIFT, REG2; \ 250 srlx REG2, 64 - PAGE_SHIFT, REG2; \
175 sllx REG1, 11, REG1; \ 251 sllx REG1, PGD_PADDR_SHIFT, REG1; \
176 andn REG2, 0x3, REG2; \ 252 andn REG2, 0x3, REG2; \
177 lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ 253 lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
178 brz,pn REG1, FAIL_LABEL; \ 254 USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \
179 sllx VADDR, 64 - PMD_SHIFT, REG2; \ 255 sllx VADDR, 64 - PMD_SHIFT, REG2; \
180 srlx REG2, 64 - PAGE_SHIFT, REG2; \ 256 srlx REG2, 64 - (PAGE_SHIFT - 1), REG2; \
181 sllx REG1, 11, REG1; \ 257 sllx REG1, PMD_PADDR_SHIFT, REG1; \
182 andn REG2, 0x7, REG2; \ 258 andn REG2, 0x7, REG2; \
183 add REG1, REG2, REG1; 259 add REG1, REG2, REG1; \
260 ldxa [REG1] ASI_PHYS_USE_EC, REG1; \
261 brgez,pn REG1, FAIL_LABEL; \
262 nop; \
263800:
184 264
185/* Lookup a OBP mapping on VADDR in the prom_trans[] table at TL>0. 265/* Lookup a OBP mapping on VADDR in the prom_trans[] table at TL>0.
186 * If no entry is found, FAIL_LABEL will be branched to. On success 266 * If no entry is found, FAIL_LABEL will be branched to. On success
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index acc8c838ff72..75b31bcdeadf 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -779,7 +779,7 @@ static int __pci_mmap_make_offset(struct pci_dev *pdev,
779static void __pci_mmap_set_flags(struct pci_dev *dev, struct vm_area_struct *vma, 779static void __pci_mmap_set_flags(struct pci_dev *dev, struct vm_area_struct *vma,
780 enum pci_mmap_state mmap_state) 780 enum pci_mmap_state mmap_state)
781{ 781{
782 vma->vm_flags |= (VM_IO | VM_RESERVED); 782 vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
783} 783}
784 784
785/* Set vm_page_prot of VMA, as appropriate for this architecture, for a pci 785/* Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S
index e1fbf8c75787..bde867fd71e8 100644
--- a/arch/sparc/kernel/sun4v_tlb_miss.S
+++ b/arch/sparc/kernel/sun4v_tlb_miss.S
@@ -176,7 +176,7 @@ sun4v_tsb_miss_common:
176 176
177 sub %g2, TRAP_PER_CPU_FAULT_INFO, %g2 177 sub %g2, TRAP_PER_CPU_FAULT_INFO, %g2
178 178
179#ifdef CONFIG_HUGETLB_PAGE 179#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
180 mov SCRATCHPAD_UTSBREG2, %g5 180 mov SCRATCHPAD_UTSBREG2, %g5
181 ldxa [%g5] ASI_SCRATCHPAD, %g5 181 ldxa [%g5] ASI_SCRATCHPAD, %g5
182 cmp %g5, -1 182 cmp %g5, -1
diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S
index db15d123f054..d4bdc7a62375 100644
--- a/arch/sparc/kernel/tsb.S
+++ b/arch/sparc/kernel/tsb.S
@@ -49,7 +49,7 @@ tsb_miss_page_table_walk:
49 /* Before committing to a full page table walk, 49 /* Before committing to a full page table walk,
50 * check the huge page TSB. 50 * check the huge page TSB.
51 */ 51 */
52#ifdef CONFIG_HUGETLB_PAGE 52#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
53 53
54661: ldx [%g7 + TRAP_PER_CPU_TSB_HUGE], %g5 54661: ldx [%g7 + TRAP_PER_CPU_TSB_HUGE], %g5
55 nop 55 nop
@@ -110,12 +110,9 @@ tsb_miss_page_table_walk:
110tsb_miss_page_table_walk_sun4v_fastpath: 110tsb_miss_page_table_walk_sun4v_fastpath:
111 USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault) 111 USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault)
112 112
113 /* Load and check PTE. */ 113 /* Valid PTE is now in %g5. */
114 ldxa [%g5] ASI_PHYS_USE_EC, %g5
115 brgez,pn %g5, tsb_do_fault
116 nop
117 114
118#ifdef CONFIG_HUGETLB_PAGE 115#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
119661: sethi %uhi(_PAGE_SZALL_4U), %g7 116661: sethi %uhi(_PAGE_SZALL_4U), %g7
120 sllx %g7, 32, %g7 117 sllx %g7, 32, %g7
121 .section .sun4v_2insn_patch, "ax" 118 .section .sun4v_2insn_patch, "ax"
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index 77ac917be152..e98bfda205a2 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -265,6 +265,7 @@ good_area:
265 } 265 }
266 if (fault & VM_FAULT_RETRY) { 266 if (fault & VM_FAULT_RETRY) {
267 flags &= ~FAULT_FLAG_ALLOW_RETRY; 267 flags &= ~FAULT_FLAG_ALLOW_RETRY;
268 flags |= FAULT_FLAG_TRIED;
268 269
269 /* No need to up_read(&mm->mmap_sem) as we would 270 /* No need to up_read(&mm->mmap_sem) as we would
270 * have already released it in __lock_page_or_retry 271 * have already released it in __lock_page_or_retry
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 1fe0429b6314..2976dba1ebaf 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -452,6 +452,7 @@ good_area:
452 } 452 }
453 if (fault & VM_FAULT_RETRY) { 453 if (fault & VM_FAULT_RETRY) {
454 flags &= ~FAULT_FLAG_ALLOW_RETRY; 454 flags &= ~FAULT_FLAG_ALLOW_RETRY;
455 flags |= FAULT_FLAG_TRIED;
455 456
456 /* No need to up_read(&mm->mmap_sem) as we would 457 /* No need to up_read(&mm->mmap_sem) as we would
457 * have already released it in __lock_page_or_retry 458 * have already released it in __lock_page_or_retry
@@ -464,13 +465,13 @@ good_area:
464 up_read(&mm->mmap_sem); 465 up_read(&mm->mmap_sem);
465 466
466 mm_rss = get_mm_rss(mm); 467 mm_rss = get_mm_rss(mm);
467#ifdef CONFIG_HUGETLB_PAGE 468#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
468 mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE)); 469 mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE));
469#endif 470#endif
470 if (unlikely(mm_rss > 471 if (unlikely(mm_rss >
471 mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit)) 472 mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit))
472 tsb_grow(mm, MM_TSB_BASE, mm_rss); 473 tsb_grow(mm, MM_TSB_BASE, mm_rss);
473#ifdef CONFIG_HUGETLB_PAGE 474#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
474 mm_rss = mm->context.huge_pte_count; 475 mm_rss = mm->context.huge_pte_count;
475 if (unlikely(mm_rss > 476 if (unlikely(mm_rss >
476 mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) 477 mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit))
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 07e14535375c..f76f83d5ac63 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -303,53 +303,3 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
303{ 303{
304 return NULL; 304 return NULL;
305} 305}
306
307static void context_reload(void *__data)
308{
309 struct mm_struct *mm = __data;
310
311 if (mm == current->mm)
312 load_secondary_context(mm);
313}
314
315void hugetlb_prefault_arch_hook(struct mm_struct *mm)
316{
317 struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE];
318
319 if (likely(tp->tsb != NULL))
320 return;
321
322 tsb_grow(mm, MM_TSB_HUGE, 0);
323 tsb_context_switch(mm);
324 smp_tsb_sync(mm);
325
326 /* On UltraSPARC-III+ and later, configure the second half of
327 * the Data-TLB for huge pages.
328 */
329 if (tlb_type == cheetah_plus) {
330 unsigned long ctx;
331
332 spin_lock(&ctx_alloc_lock);
333 ctx = mm->context.sparc64_ctx_val;
334 ctx &= ~CTX_PGSZ_MASK;
335 ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
336 ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT;
337
338 if (ctx != mm->context.sparc64_ctx_val) {
339 /* When changing the page size fields, we
340 * must perform a context flush so that no
341 * stale entries match. This flush must
342 * occur with the original context register
343 * settings.
344 */
345 do_flush_tlb_mm(mm);
346
347 /* Reload the context register of all processors
348 * also executing in this address space.
349 */
350 mm->context.sparc64_ctx_val = ctx;
351 on_each_cpu(context_reload, mm, 0);
352 }
353 spin_unlock(&ctx_alloc_lock);
354 }
355}
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 7a9b788c6ced..9e28a118e6a4 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -276,7 +276,6 @@ static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long
276} 276}
277 277
278unsigned long _PAGE_ALL_SZ_BITS __read_mostly; 278unsigned long _PAGE_ALL_SZ_BITS __read_mostly;
279unsigned long _PAGE_SZBITS __read_mostly;
280 279
281static void flush_dcache(unsigned long pfn) 280static void flush_dcache(unsigned long pfn)
282{ 281{
@@ -307,12 +306,24 @@ static void flush_dcache(unsigned long pfn)
307 } 306 }
308} 307}
309 308
309/* mm->context.lock must be held */
310static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_index,
311 unsigned long tsb_hash_shift, unsigned long address,
312 unsigned long tte)
313{
314 struct tsb *tsb = mm->context.tsb_block[tsb_index].tsb;
315 unsigned long tag;
316
317 tsb += ((address >> tsb_hash_shift) &
318 (mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
319 tag = (address >> 22UL);
320 tsb_insert(tsb, tag, tte);
321}
322
310void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) 323void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
311{ 324{
325 unsigned long tsb_index, tsb_hash_shift, flags;
312 struct mm_struct *mm; 326 struct mm_struct *mm;
313 struct tsb *tsb;
314 unsigned long tag, flags;
315 unsigned long tsb_index, tsb_hash_shift;
316 pte_t pte = *ptep; 327 pte_t pte = *ptep;
317 328
318 if (tlb_type != hypervisor) { 329 if (tlb_type != hypervisor) {
@@ -329,7 +340,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
329 340
330 spin_lock_irqsave(&mm->context.lock, flags); 341 spin_lock_irqsave(&mm->context.lock, flags);
331 342
332#ifdef CONFIG_HUGETLB_PAGE 343#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
333 if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) { 344 if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) {
334 if ((tlb_type == hypervisor && 345 if ((tlb_type == hypervisor &&
335 (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) || 346 (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) ||
@@ -341,11 +352,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
341 } 352 }
342#endif 353#endif
343 354
344 tsb = mm->context.tsb_block[tsb_index].tsb; 355 __update_mmu_tsb_insert(mm, tsb_index, tsb_hash_shift,
345 tsb += ((address >> tsb_hash_shift) & 356 address, pte_val(pte));
346 (mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
347 tag = (address >> 22UL);
348 tsb_insert(tsb, tag, pte_val(pte));
349 357
350 spin_unlock_irqrestore(&mm->context.lock, flags); 358 spin_unlock_irqrestore(&mm->context.lock, flags);
351} 359}
@@ -2275,8 +2283,7 @@ static void __init sun4u_pgprot_init(void)
2275 __ACCESS_BITS_4U | _PAGE_E_4U); 2283 __ACCESS_BITS_4U | _PAGE_E_4U);
2276 2284
2277#ifdef CONFIG_DEBUG_PAGEALLOC 2285#ifdef CONFIG_DEBUG_PAGEALLOC
2278 kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZBITS_4U) ^ 2286 kern_linear_pte_xor[0] = _PAGE_VALID ^ 0xfffff80000000000UL;
2279 0xfffff80000000000UL;
2280#else 2287#else
2281 kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^ 2288 kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^
2282 0xfffff80000000000UL; 2289 0xfffff80000000000UL;
@@ -2287,7 +2294,6 @@ static void __init sun4u_pgprot_init(void)
2287 for (i = 1; i < 4; i++) 2294 for (i = 1; i < 4; i++)
2288 kern_linear_pte_xor[i] = kern_linear_pte_xor[0]; 2295 kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
2289 2296
2290 _PAGE_SZBITS = _PAGE_SZBITS_4U;
2291 _PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U | 2297 _PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U |
2292 _PAGE_SZ64K_4U | _PAGE_SZ8K_4U | 2298 _PAGE_SZ64K_4U | _PAGE_SZ8K_4U |
2293 _PAGE_SZ32MB_4U | _PAGE_SZ256MB_4U); 2299 _PAGE_SZ32MB_4U | _PAGE_SZ256MB_4U);
@@ -2324,8 +2330,7 @@ static void __init sun4v_pgprot_init(void)
2324 _PAGE_CACHE = _PAGE_CACHE_4V; 2330 _PAGE_CACHE = _PAGE_CACHE_4V;
2325 2331
2326#ifdef CONFIG_DEBUG_PAGEALLOC 2332#ifdef CONFIG_DEBUG_PAGEALLOC
2327 kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZBITS_4V) ^ 2333 kern_linear_pte_xor[0] = _PAGE_VALID ^ 0xfffff80000000000UL;
2328 0xfffff80000000000UL;
2329#else 2334#else
2330 kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^ 2335 kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^
2331 0xfffff80000000000UL; 2336 0xfffff80000000000UL;
@@ -2339,7 +2344,6 @@ static void __init sun4v_pgprot_init(void)
2339 pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V | 2344 pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V |
2340 __ACCESS_BITS_4V | _PAGE_E_4V); 2345 __ACCESS_BITS_4V | _PAGE_E_4V);
2341 2346
2342 _PAGE_SZBITS = _PAGE_SZBITS_4V;
2343 _PAGE_ALL_SZ_BITS = (_PAGE_SZ16GB_4V | _PAGE_SZ2GB_4V | 2347 _PAGE_ALL_SZ_BITS = (_PAGE_SZ16GB_4V | _PAGE_SZ2GB_4V |
2344 _PAGE_SZ256MB_4V | _PAGE_SZ32MB_4V | 2348 _PAGE_SZ256MB_4V | _PAGE_SZ32MB_4V |
2345 _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V | 2349 _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V |
@@ -2472,3 +2476,281 @@ void __flush_tlb_all(void)
2472 __asm__ __volatile__("wrpr %0, 0, %%pstate" 2476 __asm__ __volatile__("wrpr %0, 0, %%pstate"
2473 : : "r" (pstate)); 2477 : : "r" (pstate));
2474} 2478}
2479
2480static pte_t *get_from_cache(struct mm_struct *mm)
2481{
2482 struct page *page;
2483 pte_t *ret;
2484
2485 spin_lock(&mm->page_table_lock);
2486 page = mm->context.pgtable_page;
2487 ret = NULL;
2488 if (page) {
2489 void *p = page_address(page);
2490
2491 mm->context.pgtable_page = NULL;
2492
2493 ret = (pte_t *) (p + (PAGE_SIZE / 2));
2494 }
2495 spin_unlock(&mm->page_table_lock);
2496
2497 return ret;
2498}
2499
2500static struct page *__alloc_for_cache(struct mm_struct *mm)
2501{
2502 struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
2503 __GFP_REPEAT | __GFP_ZERO);
2504
2505 if (page) {
2506 spin_lock(&mm->page_table_lock);
2507 if (!mm->context.pgtable_page) {
2508 atomic_set(&page->_count, 2);
2509 mm->context.pgtable_page = page;
2510 }
2511 spin_unlock(&mm->page_table_lock);
2512 }
2513 return page;
2514}
2515
2516pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
2517 unsigned long address)
2518{
2519 struct page *page;
2520 pte_t *pte;
2521
2522 pte = get_from_cache(mm);
2523 if (pte)
2524 return pte;
2525
2526 page = __alloc_for_cache(mm);
2527 if (page)
2528 pte = (pte_t *) page_address(page);
2529
2530 return pte;
2531}
2532
2533pgtable_t pte_alloc_one(struct mm_struct *mm,
2534 unsigned long address)
2535{
2536 struct page *page;
2537 pte_t *pte;
2538
2539 pte = get_from_cache(mm);
2540 if (pte)
2541 return pte;
2542
2543 page = __alloc_for_cache(mm);
2544 if (page) {
2545 pgtable_page_ctor(page);
2546 pte = (pte_t *) page_address(page);
2547 }
2548
2549 return pte;
2550}
2551
2552void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
2553{
2554 struct page *page = virt_to_page(pte);
2555 if (put_page_testzero(page))
2556 free_hot_cold_page(page, 0);
2557}
2558
2559static void __pte_free(pgtable_t pte)
2560{
2561 struct page *page = virt_to_page(pte);
2562 if (put_page_testzero(page)) {
2563 pgtable_page_dtor(page);
2564 free_hot_cold_page(page, 0);
2565 }
2566}
2567
2568void pte_free(struct mm_struct *mm, pgtable_t pte)
2569{
2570 __pte_free(pte);
2571}
2572
2573void pgtable_free(void *table, bool is_page)
2574{
2575 if (is_page)
2576 __pte_free(table);
2577 else
2578 kmem_cache_free(pgtable_cache, table);
2579}
2580
2581#ifdef CONFIG_TRANSPARENT_HUGEPAGE
2582static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot, bool for_modify)
2583{
2584 if (pgprot_val(pgprot) & _PAGE_VALID)
2585 pmd_val(pmd) |= PMD_HUGE_PRESENT;
2586 if (tlb_type == hypervisor) {
2587 if (pgprot_val(pgprot) & _PAGE_WRITE_4V)
2588 pmd_val(pmd) |= PMD_HUGE_WRITE;
2589 if (pgprot_val(pgprot) & _PAGE_EXEC_4V)
2590 pmd_val(pmd) |= PMD_HUGE_EXEC;
2591
2592 if (!for_modify) {
2593 if (pgprot_val(pgprot) & _PAGE_ACCESSED_4V)
2594 pmd_val(pmd) |= PMD_HUGE_ACCESSED;
2595 if (pgprot_val(pgprot) & _PAGE_MODIFIED_4V)
2596 pmd_val(pmd) |= PMD_HUGE_DIRTY;
2597 }
2598 } else {
2599 if (pgprot_val(pgprot) & _PAGE_WRITE_4U)
2600 pmd_val(pmd) |= PMD_HUGE_WRITE;
2601 if (pgprot_val(pgprot) & _PAGE_EXEC_4U)
2602 pmd_val(pmd) |= PMD_HUGE_EXEC;
2603
2604 if (!for_modify) {
2605 if (pgprot_val(pgprot) & _PAGE_ACCESSED_4U)
2606 pmd_val(pmd) |= PMD_HUGE_ACCESSED;
2607 if (pgprot_val(pgprot) & _PAGE_MODIFIED_4U)
2608 pmd_val(pmd) |= PMD_HUGE_DIRTY;
2609 }
2610 }
2611
2612 return pmd;
2613}
2614
2615pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
2616{
2617 pmd_t pmd;
2618
2619 pmd_val(pmd) = (page_nr << ((PAGE_SHIFT - PMD_PADDR_SHIFT)));
2620 pmd_val(pmd) |= PMD_ISHUGE;
2621 pmd = pmd_set_protbits(pmd, pgprot, false);
2622 return pmd;
2623}
2624
2625pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
2626{
2627 pmd_val(pmd) &= ~(PMD_HUGE_PRESENT |
2628 PMD_HUGE_WRITE |
2629 PMD_HUGE_EXEC);
2630 pmd = pmd_set_protbits(pmd, newprot, true);
2631 return pmd;
2632}
2633
2634pgprot_t pmd_pgprot(pmd_t entry)
2635{
2636 unsigned long pte = 0;
2637
2638 if (pmd_val(entry) & PMD_HUGE_PRESENT)
2639 pte |= _PAGE_VALID;
2640
2641 if (tlb_type == hypervisor) {
2642 if (pmd_val(entry) & PMD_HUGE_PRESENT)
2643 pte |= _PAGE_PRESENT_4V;
2644 if (pmd_val(entry) & PMD_HUGE_EXEC)
2645 pte |= _PAGE_EXEC_4V;
2646 if (pmd_val(entry) & PMD_HUGE_WRITE)
2647 pte |= _PAGE_W_4V;
2648 if (pmd_val(entry) & PMD_HUGE_ACCESSED)
2649 pte |= _PAGE_ACCESSED_4V;
2650 if (pmd_val(entry) & PMD_HUGE_DIRTY)
2651 pte |= _PAGE_MODIFIED_4V;
2652 pte |= _PAGE_CP_4V|_PAGE_CV_4V;
2653 } else {
2654 if (pmd_val(entry) & PMD_HUGE_PRESENT)
2655 pte |= _PAGE_PRESENT_4U;
2656 if (pmd_val(entry) & PMD_HUGE_EXEC)
2657 pte |= _PAGE_EXEC_4U;
2658 if (pmd_val(entry) & PMD_HUGE_WRITE)
2659 pte |= _PAGE_W_4U;
2660 if (pmd_val(entry) & PMD_HUGE_ACCESSED)
2661 pte |= _PAGE_ACCESSED_4U;
2662 if (pmd_val(entry) & PMD_HUGE_DIRTY)
2663 pte |= _PAGE_MODIFIED_4U;
2664 pte |= _PAGE_CP_4U|_PAGE_CV_4U;
2665 }
2666
2667 return __pgprot(pte);
2668}
2669
2670void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
2671 pmd_t *pmd)
2672{
2673 unsigned long pte, flags;
2674 struct mm_struct *mm;
2675 pmd_t entry = *pmd;
2676 pgprot_t prot;
2677
2678 if (!pmd_large(entry) || !pmd_young(entry))
2679 return;
2680
2681 pte = (pmd_val(entry) & ~PMD_HUGE_PROTBITS);
2682 pte <<= PMD_PADDR_SHIFT;
2683 pte |= _PAGE_VALID;
2684
2685 prot = pmd_pgprot(entry);
2686
2687 if (tlb_type == hypervisor)
2688 pgprot_val(prot) |= _PAGE_SZHUGE_4V;
2689 else
2690 pgprot_val(prot) |= _PAGE_SZHUGE_4U;
2691
2692 pte |= pgprot_val(prot);
2693
2694 mm = vma->vm_mm;
2695
2696 spin_lock_irqsave(&mm->context.lock, flags);
2697
2698 if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL)
2699 __update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT,
2700 addr, pte);
2701
2702 spin_unlock_irqrestore(&mm->context.lock, flags);
2703}
2704#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
2705
2706#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
2707static void context_reload(void *__data)
2708{
2709 struct mm_struct *mm = __data;
2710
2711 if (mm == current->mm)
2712 load_secondary_context(mm);
2713}
2714
2715void hugetlb_setup(struct mm_struct *mm)
2716{
2717 struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE];
2718
2719 if (likely(tp->tsb != NULL))
2720 return;
2721
2722 tsb_grow(mm, MM_TSB_HUGE, 0);
2723 tsb_context_switch(mm);
2724 smp_tsb_sync(mm);
2725
2726 /* On UltraSPARC-III+ and later, configure the second half of
2727 * the Data-TLB for huge pages.
2728 */
2729 if (tlb_type == cheetah_plus) {
2730 unsigned long ctx;
2731
2732 spin_lock(&ctx_alloc_lock);
2733 ctx = mm->context.sparc64_ctx_val;
2734 ctx &= ~CTX_PGSZ_MASK;
2735 ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
2736 ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT;
2737
2738 if (ctx != mm->context.sparc64_ctx_val) {
2739 /* When changing the page size fields, we
2740 * must perform a context flush so that no
2741 * stale entries match. This flush must
2742 * occur with the original context register
2743 * settings.
2744 */
2745 do_flush_tlb_mm(mm);
2746
2747 /* Reload the context register of all processors
2748 * also executing in this address space.
2749 */
2750 mm->context.sparc64_ctx_val = ctx;
2751 on_each_cpu(context_reload, mm, 0);
2752 }
2753 spin_unlock(&ctx_alloc_lock);
2754 }
2755}
2756#endif
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index b1f279cd00bf..3e8fec391fe0 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -43,16 +43,37 @@ void flush_tlb_pending(void)
43 put_cpu_var(tlb_batch); 43 put_cpu_var(tlb_batch);
44} 44}
45 45
46void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, 46static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr,
47 pte_t *ptep, pte_t orig, int fullmm) 47 bool exec)
48{ 48{
49 struct tlb_batch *tb = &get_cpu_var(tlb_batch); 49 struct tlb_batch *tb = &get_cpu_var(tlb_batch);
50 unsigned long nr; 50 unsigned long nr;
51 51
52 vaddr &= PAGE_MASK; 52 vaddr &= PAGE_MASK;
53 if (pte_exec(orig)) 53 if (exec)
54 vaddr |= 0x1UL; 54 vaddr |= 0x1UL;
55 55
56 nr = tb->tlb_nr;
57
58 if (unlikely(nr != 0 && mm != tb->mm)) {
59 flush_tlb_pending();
60 nr = 0;
61 }
62
63 if (nr == 0)
64 tb->mm = mm;
65
66 tb->vaddrs[nr] = vaddr;
67 tb->tlb_nr = ++nr;
68 if (nr >= TLB_BATCH_NR)
69 flush_tlb_pending();
70
71 put_cpu_var(tlb_batch);
72}
73
74void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
75 pte_t *ptep, pte_t orig, int fullmm)
76{
56 if (tlb_type != hypervisor && 77 if (tlb_type != hypervisor &&
57 pte_dirty(orig)) { 78 pte_dirty(orig)) {
58 unsigned long paddr, pfn = pte_pfn(orig); 79 unsigned long paddr, pfn = pte_pfn(orig);
@@ -77,26 +98,91 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
77 } 98 }
78 99
79no_cache_flush: 100no_cache_flush:
101 if (!fullmm)
102 tlb_batch_add_one(mm, vaddr, pte_exec(orig));
103}
104
105#ifdef CONFIG_TRANSPARENT_HUGEPAGE
106static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr,
107 pmd_t pmd, bool exec)
108{
109 unsigned long end;
110 pte_t *pte;
111
112 pte = pte_offset_map(&pmd, vaddr);
113 end = vaddr + HPAGE_SIZE;
114 while (vaddr < end) {
115 if (pte_val(*pte) & _PAGE_VALID)
116 tlb_batch_add_one(mm, vaddr, exec);
117 pte++;
118 vaddr += PAGE_SIZE;
119 }
120 pte_unmap(pte);
121}
80 122
81 if (fullmm) { 123void set_pmd_at(struct mm_struct *mm, unsigned long addr,
82 put_cpu_var(tlb_batch); 124 pmd_t *pmdp, pmd_t pmd)
125{
126 pmd_t orig = *pmdp;
127
128 *pmdp = pmd;
129
130 if (mm == &init_mm)
83 return; 131 return;
132
133 if ((pmd_val(pmd) ^ pmd_val(orig)) & PMD_ISHUGE) {
134 if (pmd_val(pmd) & PMD_ISHUGE)
135 mm->context.huge_pte_count++;
136 else
137 mm->context.huge_pte_count--;
138 if (mm->context.huge_pte_count == 1)
139 hugetlb_setup(mm);
84 } 140 }
85 141
86 nr = tb->tlb_nr; 142 if (!pmd_none(orig)) {
143 bool exec = ((pmd_val(orig) & PMD_HUGE_EXEC) != 0);
87 144
88 if (unlikely(nr != 0 && mm != tb->mm)) { 145 addr &= HPAGE_MASK;
89 flush_tlb_pending(); 146 if (pmd_val(orig) & PMD_ISHUGE)
90 nr = 0; 147 tlb_batch_add_one(mm, addr, exec);
148 else
149 tlb_batch_pmd_scan(mm, addr, orig, exec);
91 } 150 }
151}
92 152
93 if (nr == 0) 153void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable)
94 tb->mm = mm; 154{
155 struct list_head *lh = (struct list_head *) pgtable;
95 156
96 tb->vaddrs[nr] = vaddr; 157 assert_spin_locked(&mm->page_table_lock);
97 tb->tlb_nr = ++nr;
98 if (nr >= TLB_BATCH_NR)
99 flush_tlb_pending();
100 158
101 put_cpu_var(tlb_batch); 159 /* FIFO */
160 if (!mm->pmd_huge_pte)
161 INIT_LIST_HEAD(lh);
162 else
163 list_add(lh, (struct list_head *) mm->pmd_huge_pte);
164 mm->pmd_huge_pte = pgtable;
165}
166
167pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm)
168{
169 struct list_head *lh;
170 pgtable_t pgtable;
171
172 assert_spin_locked(&mm->page_table_lock);
173
174 /* FIFO */
175 pgtable = mm->pmd_huge_pte;
176 lh = (struct list_head *) pgtable;
177 if (list_empty(lh))
178 mm->pmd_huge_pte = NULL;
179 else {
180 mm->pmd_huge_pte = (pgtable_t) lh->next;
181 list_del(lh);
182 }
183 pte_val(pgtable[0]) = 0;
184 pte_val(pgtable[1]) = 0;
185
186 return pgtable;
102} 187}
188#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
index c52add79b83d..7f6474347491 100644
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -78,7 +78,7 @@ void flush_tsb_user(struct tlb_batch *tb)
78 base = __pa(base); 78 base = __pa(base);
79 __flush_tsb_one(tb, PAGE_SHIFT, base, nentries); 79 __flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
80 80
81#ifdef CONFIG_HUGETLB_PAGE 81#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
82 if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { 82 if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
83 base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; 83 base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
84 nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; 84 nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
@@ -90,29 +90,12 @@ void flush_tsb_user(struct tlb_batch *tb)
90 spin_unlock_irqrestore(&mm->context.lock, flags); 90 spin_unlock_irqrestore(&mm->context.lock, flags);
91} 91}
92 92
93#if defined(CONFIG_SPARC64_PAGE_SIZE_8KB)
94#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K 93#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K
95#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K 94#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K
96#elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB)
97#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_64K
98#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_64K
99#else
100#error Broken base page size setting...
101#endif
102 95
103#ifdef CONFIG_HUGETLB_PAGE 96#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
104#if defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
105#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_64K
106#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_64K
107#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
108#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_512K
109#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_512K
110#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
111#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB 97#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB
112#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB 98#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB
113#else
114#error Broken huge page size setting...
115#endif
116#endif 99#endif
117 100
118static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes) 101static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes)
@@ -207,7 +190,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign
207 case MM_TSB_BASE: 190 case MM_TSB_BASE:
208 hp->pgsz_idx = HV_PGSZ_IDX_BASE; 191 hp->pgsz_idx = HV_PGSZ_IDX_BASE;
209 break; 192 break;
210#ifdef CONFIG_HUGETLB_PAGE 193#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
211 case MM_TSB_HUGE: 194 case MM_TSB_HUGE:
212 hp->pgsz_idx = HV_PGSZ_IDX_HUGE; 195 hp->pgsz_idx = HV_PGSZ_IDX_HUGE;
213 break; 196 break;
@@ -222,7 +205,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign
222 case MM_TSB_BASE: 205 case MM_TSB_BASE:
223 hp->pgsz_mask = HV_PGSZ_MASK_BASE; 206 hp->pgsz_mask = HV_PGSZ_MASK_BASE;
224 break; 207 break;
225#ifdef CONFIG_HUGETLB_PAGE 208#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
226 case MM_TSB_HUGE: 209 case MM_TSB_HUGE:
227 hp->pgsz_mask = HV_PGSZ_MASK_HUGE; 210 hp->pgsz_mask = HV_PGSZ_MASK_HUGE;
228 break; 211 break;
@@ -444,7 +427,7 @@ retry_tsb_alloc:
444 427
445int init_new_context(struct task_struct *tsk, struct mm_struct *mm) 428int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
446{ 429{
447#ifdef CONFIG_HUGETLB_PAGE 430#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
448 unsigned long huge_pte_count; 431 unsigned long huge_pte_count;
449#endif 432#endif
450 unsigned int i; 433 unsigned int i;
@@ -453,7 +436,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
453 436
454 mm->context.sparc64_ctx_val = 0UL; 437 mm->context.sparc64_ctx_val = 0UL;
455 438
456#ifdef CONFIG_HUGETLB_PAGE 439#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
457 /* We reset it to zero because the fork() page copying 440 /* We reset it to zero because the fork() page copying
458 * will re-increment the counters as the parent PTEs are 441 * will re-increment the counters as the parent PTEs are
459 * copied into the child address space. 442 * copied into the child address space.
@@ -462,6 +445,8 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
462 mm->context.huge_pte_count = 0; 445 mm->context.huge_pte_count = 0;
463#endif 446#endif
464 447
448 mm->context.pgtable_page = NULL;
449
465 /* copy_mm() copies over the parent's mm_struct before calling 450 /* copy_mm() copies over the parent's mm_struct before calling
466 * us, so we need to zero out the TSB pointer or else tsb_grow() 451 * us, so we need to zero out the TSB pointer or else tsb_grow()
467 * will be confused and think there is an older TSB to free up. 452 * will be confused and think there is an older TSB to free up.
@@ -474,7 +459,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
474 */ 459 */
475 tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); 460 tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
476 461
477#ifdef CONFIG_HUGETLB_PAGE 462#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
478 if (unlikely(huge_pte_count)) 463 if (unlikely(huge_pte_count))
479 tsb_grow(mm, MM_TSB_HUGE, huge_pte_count); 464 tsb_grow(mm, MM_TSB_HUGE, huge_pte_count);
480#endif 465#endif
@@ -500,10 +485,17 @@ static void tsb_destroy_one(struct tsb_config *tp)
500void destroy_context(struct mm_struct *mm) 485void destroy_context(struct mm_struct *mm)
501{ 486{
502 unsigned long flags, i; 487 unsigned long flags, i;
488 struct page *page;
503 489
504 for (i = 0; i < MM_NUM_TSBS; i++) 490 for (i = 0; i < MM_NUM_TSBS; i++)
505 tsb_destroy_one(&mm->context.tsb_block[i]); 491 tsb_destroy_one(&mm->context.tsb_block[i]);
506 492
493 page = mm->context.pgtable_page;
494 if (page && put_page_testzero(page)) {
495 pgtable_page_dtor(page);
496 free_hot_cold_page(page, 0);
497 }
498
507 spin_lock_irqsave(&ctx_alloc_lock, flags); 499 spin_lock_irqsave(&ctx_alloc_lock, flags);
508 500
509 if (CTX_VALID(mm->context)) { 501 if (CTX_VALID(mm->context)) {
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index c9a3c1fe7297..dc46490adca0 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -7,12 +7,15 @@ config TILE
7 select HAVE_DMA_API_DEBUG 7 select HAVE_DMA_API_DEBUG
8 select HAVE_KVM if !TILEGX 8 select HAVE_KVM if !TILEGX
9 select GENERIC_FIND_FIRST_BIT 9 select GENERIC_FIND_FIRST_BIT
10 select SYSCTL_EXCEPTION_TRACE
10 select USE_GENERIC_SMP_HELPERS 11 select USE_GENERIC_SMP_HELPERS
11 select CC_OPTIMIZE_FOR_SIZE 12 select CC_OPTIMIZE_FOR_SIZE
13 select HAVE_DEBUG_KMEMLEAK
12 select HAVE_GENERIC_HARDIRQS 14 select HAVE_GENERIC_HARDIRQS
13 select GENERIC_IRQ_PROBE 15 select GENERIC_IRQ_PROBE
14 select GENERIC_PENDING_IRQ if SMP 16 select GENERIC_PENDING_IRQ if SMP
15 select GENERIC_IRQ_SHOW 17 select GENERIC_IRQ_SHOW
18 select HAVE_DEBUG_BUGVERBOSE
16 select HAVE_SYSCALL_WRAPPERS if TILEGX 19 select HAVE_SYSCALL_WRAPPERS if TILEGX
17 select SYS_HYPERVISOR 20 select SYS_HYPERVISOR
18 select ARCH_HAVE_NMI_SAFE_CMPXCHG 21 select ARCH_HAVE_NMI_SAFE_CMPXCHG
diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h
index b2042380a5aa..0f885af2b621 100644
--- a/arch/tile/include/asm/hugetlb.h
+++ b/arch/tile/include/asm/hugetlb.h
@@ -106,6 +106,10 @@ static inline void arch_release_hugepage(struct page *page)
106{ 106{
107} 107}
108 108
109static inline void arch_clear_hugepage_flags(struct page *page)
110{
111}
112
109#ifdef CONFIG_HUGETLB_SUPER_PAGES 113#ifdef CONFIG_HUGETLB_SUPER_PAGES
110static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, 114static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
111 struct page *page, int writable) 115 struct page *page, int writable)
diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c
index 758b6038c2b7..3cfa98bf9125 100644
--- a/arch/tile/mm/elf.c
+++ b/arch/tile/mm/elf.c
@@ -36,19 +36,14 @@ static void sim_notify_exec(const char *binary_name)
36 } while (c); 36 } while (c);
37} 37}
38 38
39static int notify_exec(void) 39static int notify_exec(struct mm_struct *mm)
40{ 40{
41 int retval = 0; /* failure */ 41 int retval = 0; /* failure */
42 struct vm_area_struct *vma = current->mm->mmap; 42
43 while (vma) { 43 if (mm->exe_file) {
44 if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file)
45 break;
46 vma = vma->vm_next;
47 }
48 if (vma) {
49 char *buf = (char *) __get_free_page(GFP_KERNEL); 44 char *buf = (char *) __get_free_page(GFP_KERNEL);
50 if (buf) { 45 if (buf) {
51 char *path = d_path(&vma->vm_file->f_path, 46 char *path = d_path(&mm->exe_file->f_path,
52 buf, PAGE_SIZE); 47 buf, PAGE_SIZE);
53 if (!IS_ERR(path)) { 48 if (!IS_ERR(path)) {
54 sim_notify_exec(path); 49 sim_notify_exec(path);
@@ -106,16 +101,16 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
106 unsigned long vdso_base; 101 unsigned long vdso_base;
107 int retval = 0; 102 int retval = 0;
108 103
104 down_write(&mm->mmap_sem);
105
109 /* 106 /*
110 * Notify the simulator that an exec just occurred. 107 * Notify the simulator that an exec just occurred.
111 * If we can't find the filename of the mapping, just use 108 * If we can't find the filename of the mapping, just use
112 * whatever was passed as the linux_binprm filename. 109 * whatever was passed as the linux_binprm filename.
113 */ 110 */
114 if (!notify_exec()) 111 if (!notify_exec(mm))
115 sim_notify_exec(bprm->filename); 112 sim_notify_exec(bprm->filename);
116 113
117 down_write(&mm->mmap_sem);
118
119 /* 114 /*
120 * MAYWRITE to allow gdb to COW and set breakpoints 115 * MAYWRITE to allow gdb to COW and set breakpoints
121 */ 116 */
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 84ce7abbf5af..fe811fa5f1b9 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -454,6 +454,7 @@ good_area:
454 tsk->min_flt++; 454 tsk->min_flt++;
455 if (fault & VM_FAULT_RETRY) { 455 if (fault & VM_FAULT_RETRY) {
456 flags &= ~FAULT_FLAG_ALLOW_RETRY; 456 flags &= ~FAULT_FLAG_ALLOW_RETRY;
457 flags |= FAULT_FLAG_TRIED;
457 458
458 /* 459 /*
459 * No need to up_read(&mm->mmap_sem) as we would 460 * No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common
index cb837c223922..648121b037d5 100644
--- a/arch/um/Kconfig.common
+++ b/arch/um/Kconfig.common
@@ -7,6 +7,7 @@ config UML
7 bool 7 bool
8 default y 8 default y
9 select HAVE_GENERIC_HARDIRQS 9 select HAVE_GENERIC_HARDIRQS
10 select HAVE_UID16
10 select GENERIC_IRQ_SHOW 11 select GENERIC_IRQ_SHOW
11 select GENERIC_CPU_DEVICES 12 select GENERIC_CPU_DEVICES
12 select GENERIC_IO 13 select GENERIC_IO
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 0353b98ae35a..0f00e9c82080 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -89,6 +89,7 @@ good_area:
89 current->min_flt++; 89 current->min_flt++;
90 if (fault & VM_FAULT_RETRY) { 90 if (fault & VM_FAULT_RETRY) {
91 flags &= ~FAULT_FLAG_ALLOW_RETRY; 91 flags &= ~FAULT_FLAG_ALLOW_RETRY;
92 flags |= FAULT_FLAG_TRIED;
92 93
93 goto retry; 94 goto retry;
94 } 95 }
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c
index b6f0458c3143..b008586dad75 100644
--- a/arch/unicore32/kernel/process.c
+++ b/arch/unicore32/kernel/process.c
@@ -380,7 +380,7 @@ int vectors_user_mapping(void)
380 return install_special_mapping(mm, 0xffff0000, PAGE_SIZE, 380 return install_special_mapping(mm, 0xffff0000, PAGE_SIZE,
381 VM_READ | VM_EXEC | 381 VM_READ | VM_EXEC |
382 VM_MAYREAD | VM_MAYEXEC | 382 VM_MAYREAD | VM_MAYEXEC |
383 VM_RESERVED, 383 VM_DONTEXPAND | VM_DONTDUMP,
384 NULL); 384 NULL);
385} 385}
386 386
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b72777ff32a9..1ae94bcae5d9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -10,6 +10,7 @@ config X86_32
10 def_bool y 10 def_bool y
11 depends on !64BIT 11 depends on !64BIT
12 select CLKSRC_I8253 12 select CLKSRC_I8253
13 select HAVE_UID16
13 14
14config X86_64 15config X86_64
15 def_bool y 16 def_bool y
@@ -46,6 +47,7 @@ config X86
46 select HAVE_FUNCTION_GRAPH_FP_TEST 47 select HAVE_FUNCTION_GRAPH_FP_TEST
47 select HAVE_FUNCTION_TRACE_MCOUNT_TEST 48 select HAVE_FUNCTION_TRACE_MCOUNT_TEST
48 select HAVE_SYSCALL_TRACEPOINTS 49 select HAVE_SYSCALL_TRACEPOINTS
50 select SYSCTL_EXCEPTION_TRACE
49 select HAVE_KVM 51 select HAVE_KVM
50 select HAVE_ARCH_KGDB 52 select HAVE_ARCH_KGDB
51 select HAVE_ARCH_TRACEHOOK 53 select HAVE_ARCH_TRACEHOOK
@@ -65,6 +67,7 @@ config X86
65 select HAVE_PERF_EVENTS_NMI 67 select HAVE_PERF_EVENTS_NMI
66 select HAVE_PERF_REGS 68 select HAVE_PERF_REGS
67 select HAVE_PERF_USER_STACK_DUMP 69 select HAVE_PERF_USER_STACK_DUMP
70 select HAVE_DEBUG_KMEMLEAK
68 select ANON_INODES 71 select ANON_INODES
69 select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386 72 select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386
70 select HAVE_CMPXCHG_LOCAL if !M386 73 select HAVE_CMPXCHG_LOCAL if !M386
@@ -85,6 +88,7 @@ config X86
85 select IRQ_FORCED_THREADING 88 select IRQ_FORCED_THREADING
86 select USE_GENERIC_SMP_HELPERS if SMP 89 select USE_GENERIC_SMP_HELPERS if SMP
87 select HAVE_BPF_JIT if X86_64 90 select HAVE_BPF_JIT if X86_64
91 select HAVE_ARCH_TRANSPARENT_HUGEPAGE
88 select CLKEVT_I8253 92 select CLKEVT_I8253
89 select ARCH_HAVE_NMI_SAFE_CMPXCHG 93 select ARCH_HAVE_NMI_SAFE_CMPXCHG
90 select GENERIC_IOMAP 94 select GENERIC_IOMAP
@@ -2168,6 +2172,7 @@ config IA32_EMULATION
2168 bool "IA32 Emulation" 2172 bool "IA32 Emulation"
2169 depends on X86_64 2173 depends on X86_64
2170 select COMPAT_BINFMT_ELF 2174 select COMPAT_BINFMT_ELF
2175 select HAVE_UID16
2171 ---help--- 2176 ---help---
2172 Include code to run legacy 32-bit programs under a 2177 Include code to run legacy 32-bit programs under a
2173 64-bit kernel. You should likely turn this on, unless you're 2178 64-bit kernel. You should likely turn this on, unless you're
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 250b8774c158..b6c3b821acf6 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -240,30 +240,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
240 return c; 240 return c;
241} 241}
242 242
243
244/*
245 * atomic_dec_if_positive - decrement by 1 if old value positive
246 * @v: pointer of type atomic_t
247 *
248 * The function returns the old value of *v minus 1, even if
249 * the atomic variable, v, was not decremented.
250 */
251static inline int atomic_dec_if_positive(atomic_t *v)
252{
253 int c, old, dec;
254 c = atomic_read(v);
255 for (;;) {
256 dec = c - 1;
257 if (unlikely(dec < 0))
258 break;
259 old = atomic_cmpxchg((v), c, dec);
260 if (likely(old == c))
261 break;
262 c = old;
263 }
264 return dec;
265}
266
267/** 243/**
268 * atomic_inc_short - increment of a short integer 244 * atomic_inc_short - increment of a short integer
269 * @v: pointer to type int 245 * @v: pointer to type int
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index 439a9acc132d..bdd35dbd0605 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -90,4 +90,8 @@ static inline void arch_release_hugepage(struct page *page)
90{ 90{
91} 91}
92 92
93static inline void arch_clear_hugepage_flags(struct page *page)
94{
95}
96
93#endif /* _ASM_X86_HUGETLB_H */ 97#endif /* _ASM_X86_HUGETLB_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index fc9948465293..a1f780d45f76 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -146,8 +146,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
146 146
147static inline int pmd_large(pmd_t pte) 147static inline int pmd_large(pmd_t pte)
148{ 148{
149 return (pmd_flags(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == 149 return pmd_flags(pte) & _PAGE_PSE;
150 (_PAGE_PSE | _PAGE_PRESENT);
151} 150}
152 151
153#ifdef CONFIG_TRANSPARENT_HUGEPAGE 152#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -415,7 +414,13 @@ static inline int pte_hidden(pte_t pte)
415 414
416static inline int pmd_present(pmd_t pmd) 415static inline int pmd_present(pmd_t pmd)
417{ 416{
418 return pmd_flags(pmd) & _PAGE_PRESENT; 417 /*
418 * Checking for _PAGE_PSE is needed too because
419 * split_huge_page will temporarily clear the present bit (but
420 * the _PAGE_PSE flag will remain set at all times while the
421 * _PAGE_PRESENT bit is clear).
422 */
423 return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE);
419} 424}
420 425
421static inline int pmd_none(pmd_t pmd) 426static inline int pmd_none(pmd_t pmd)
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 0c92113c4cb6..8faa215a503e 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -71,6 +71,7 @@ do { \
71 * tables contain all the necessary information. 71 * tables contain all the necessary information.
72 */ 72 */
73#define update_mmu_cache(vma, address, ptep) do { } while (0) 73#define update_mmu_cache(vma, address, ptep) do { } while (0)
74#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
74 75
75#endif /* !__ASSEMBLY__ */ 76#endif /* !__ASSEMBLY__ */
76 77
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 8251be02301e..47356f9df82e 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -143,6 +143,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
143#define pte_unmap(pte) ((void)(pte))/* NOP */ 143#define pte_unmap(pte) ((void)(pte))/* NOP */
144 144
145#define update_mmu_cache(vma, address, ptep) do { } while (0) 145#define update_mmu_cache(vma, address, ptep) do { } while (0)
146#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
146 147
147/* Encode and de-code a swap entry */ 148/* Encode and de-code a swap entry */
148#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE 149#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a530b230e7d7..8e13ecb41bee 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1220,6 +1220,7 @@ good_area:
1220 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk 1220 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
1221 * of starvation. */ 1221 * of starvation. */
1222 flags &= ~FAULT_FLAG_ALLOW_RETRY; 1222 flags &= ~FAULT_FLAG_ALLOW_RETRY;
1223 flags |= FAULT_FLAG_TRIED;
1223 goto retry; 1224 goto retry;
1224 } 1225 }
1225 } 1226 }
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index b91e48512425..937bff5cdaa7 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -71,7 +71,6 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
71 struct address_space *mapping = vma->vm_file->f_mapping; 71 struct address_space *mapping = vma->vm_file->f_mapping;
72 pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + 72 pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
73 vma->vm_pgoff; 73 vma->vm_pgoff;
74 struct prio_tree_iter iter;
75 struct vm_area_struct *svma; 74 struct vm_area_struct *svma;
76 unsigned long saddr; 75 unsigned long saddr;
77 pte_t *spte = NULL; 76 pte_t *spte = NULL;
@@ -81,7 +80,7 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
81 return (pte_t *)pmd_alloc(mm, pud, addr); 80 return (pte_t *)pmd_alloc(mm, pud, addr);
82 81
83 mutex_lock(&mapping->i_mmap_mutex); 82 mutex_lock(&mapping->i_mmap_mutex);
84 vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { 83 vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
85 if (svma == vma) 84 if (svma == vma)
86 continue; 85 continue;
87 86
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 3d68ef6d2266..0eb572eda406 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -664,20 +664,20 @@ static void free_pfn_range(u64 paddr, unsigned long size)
664} 664}
665 665
666/* 666/*
667 * track_pfn_vma_copy is called when vma that is covering the pfnmap gets 667 * track_pfn_copy is called when vma that is covering the pfnmap gets
668 * copied through copy_page_range(). 668 * copied through copy_page_range().
669 * 669 *
670 * If the vma has a linear pfn mapping for the entire range, we get the prot 670 * If the vma has a linear pfn mapping for the entire range, we get the prot
671 * from pte and reserve the entire vma range with single reserve_pfn_range call. 671 * from pte and reserve the entire vma range with single reserve_pfn_range call.
672 */ 672 */
673int track_pfn_vma_copy(struct vm_area_struct *vma) 673int track_pfn_copy(struct vm_area_struct *vma)
674{ 674{
675 resource_size_t paddr; 675 resource_size_t paddr;
676 unsigned long prot; 676 unsigned long prot;
677 unsigned long vma_size = vma->vm_end - vma->vm_start; 677 unsigned long vma_size = vma->vm_end - vma->vm_start;
678 pgprot_t pgprot; 678 pgprot_t pgprot;
679 679
680 if (is_linear_pfn_mapping(vma)) { 680 if (vma->vm_flags & VM_PAT) {
681 /* 681 /*
682 * reserve the whole chunk covered by vma. We need the 682 * reserve the whole chunk covered by vma. We need the
683 * starting address and protection from pte. 683 * starting address and protection from pte.
@@ -694,31 +694,59 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
694} 694}
695 695
696/* 696/*
697 * track_pfn_vma_new is called when a _new_ pfn mapping is being established
698 * for physical range indicated by pfn and size.
699 *
700 * prot is passed in as a parameter for the new mapping. If the vma has a 697 * prot is passed in as a parameter for the new mapping. If the vma has a
701 * linear pfn mapping for the entire range reserve the entire vma range with 698 * linear pfn mapping for the entire range reserve the entire vma range with
702 * single reserve_pfn_range call. 699 * single reserve_pfn_range call.
703 */ 700 */
704int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, 701int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
705 unsigned long pfn, unsigned long size) 702 unsigned long pfn, unsigned long addr, unsigned long size)
706{ 703{
704 resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
707 unsigned long flags; 705 unsigned long flags;
708 resource_size_t paddr;
709 unsigned long vma_size = vma->vm_end - vma->vm_start;
710 706
711 if (is_linear_pfn_mapping(vma)) { 707 /* reserve the whole chunk starting from paddr */
712 /* reserve the whole chunk starting from vm_pgoff */ 708 if (addr == vma->vm_start && size == (vma->vm_end - vma->vm_start)) {
713 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; 709 int ret;
714 return reserve_pfn_range(paddr, vma_size, prot, 0); 710
711 ret = reserve_pfn_range(paddr, size, prot, 0);
712 if (!ret)
713 vma->vm_flags |= VM_PAT;
714 return ret;
715 } 715 }
716 716
717 if (!pat_enabled) 717 if (!pat_enabled)
718 return 0; 718 return 0;
719 719
720 /* for vm_insert_pfn and friends, we set prot based on lookup */ 720 /*
721 flags = lookup_memtype(pfn << PAGE_SHIFT); 721 * For anything smaller than the vma size we set prot based on the
722 * lookup.
723 */
724 flags = lookup_memtype(paddr);
725
726 /* Check memtype for the remaining pages */
727 while (size > PAGE_SIZE) {
728 size -= PAGE_SIZE;
729 paddr += PAGE_SIZE;
730 if (flags != lookup_memtype(paddr))
731 return -EINVAL;
732 }
733
734 *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
735 flags);
736
737 return 0;
738}
739
740int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
741 unsigned long pfn)
742{
743 unsigned long flags;
744
745 if (!pat_enabled)
746 return 0;
747
748 /* Set prot based on lookup */
749 flags = lookup_memtype((resource_size_t)pfn << PAGE_SHIFT);
722 *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | 750 *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
723 flags); 751 flags);
724 752
@@ -726,22 +754,31 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
726} 754}
727 755
728/* 756/*
729 * untrack_pfn_vma is called while unmapping a pfnmap for a region. 757 * untrack_pfn is called while unmapping a pfnmap for a region.
730 * untrack can be called for a specific region indicated by pfn and size or 758 * untrack can be called for a specific region indicated by pfn and size or
731 * can be for the entire vma (in which case size can be zero). 759 * can be for the entire vma (in which case pfn, size are zero).
732 */ 760 */
733void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, 761void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
734 unsigned long size) 762 unsigned long size)
735{ 763{
736 resource_size_t paddr; 764 resource_size_t paddr;
737 unsigned long vma_size = vma->vm_end - vma->vm_start; 765 unsigned long prot;
738 766
739 if (is_linear_pfn_mapping(vma)) { 767 if (!(vma->vm_flags & VM_PAT))
740 /* free the whole chunk starting from vm_pgoff */
741 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
742 free_pfn_range(paddr, vma_size);
743 return; 768 return;
769
770 /* free the chunk starting from pfn or the whole chunk */
771 paddr = (resource_size_t)pfn << PAGE_SHIFT;
772 if (!paddr && !size) {
773 if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
774 WARN_ON_ONCE(1);
775 return;
776 }
777
778 size = vma->vm_end - vma->vm_start;
744 } 779 }
780 free_pfn_range(paddr, size);
781 vma->vm_flags &= ~VM_PAT;
745} 782}
746 783
747pgprot_t pgprot_writecombine(pgprot_t prot) 784pgprot_t pgprot_writecombine(pgprot_t prot)
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index 8acaddd0fb21..415f6c4ced36 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -12,7 +12,7 @@
12#include <linux/debugfs.h> 12#include <linux/debugfs.h>
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/rbtree.h> 15#include <linux/rbtree_augmented.h>
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/gfp.h> 17#include <linux/gfp.h>
18 18
@@ -54,29 +54,24 @@ static u64 get_subtree_max_end(struct rb_node *node)
54 return ret; 54 return ret;
55} 55}
56 56
57/* Update 'subtree_max_end' for a node, based on node and its children */ 57static u64 compute_subtree_max_end(struct memtype *data)
58static void memtype_rb_augment_cb(struct rb_node *node, void *__unused)
59{ 58{
60 struct memtype *data; 59 u64 max_end = data->end, child_max_end;
61 u64 max_end, child_max_end;
62
63 if (!node)
64 return;
65 60
66 data = container_of(node, struct memtype, rb); 61 child_max_end = get_subtree_max_end(data->rb.rb_right);
67 max_end = data->end;
68
69 child_max_end = get_subtree_max_end(node->rb_right);
70 if (child_max_end > max_end) 62 if (child_max_end > max_end)
71 max_end = child_max_end; 63 max_end = child_max_end;
72 64
73 child_max_end = get_subtree_max_end(node->rb_left); 65 child_max_end = get_subtree_max_end(data->rb.rb_left);
74 if (child_max_end > max_end) 66 if (child_max_end > max_end)
75 max_end = child_max_end; 67 max_end = child_max_end;
76 68
77 data->subtree_max_end = max_end; 69 return max_end;
78} 70}
79 71
72RB_DECLARE_CALLBACKS(static, memtype_rb_augment_cb, struct memtype, rb,
73 u64, subtree_max_end, compute_subtree_max_end)
74
80/* Find the first (lowest start addr) overlapping range from rb tree */ 75/* Find the first (lowest start addr) overlapping range from rb tree */
81static struct memtype *memtype_rb_lowest_match(struct rb_root *root, 76static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
82 u64 start, u64 end) 77 u64 start, u64 end)
@@ -179,15 +174,17 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
179 struct memtype *data = container_of(*node, struct memtype, rb); 174 struct memtype *data = container_of(*node, struct memtype, rb);
180 175
181 parent = *node; 176 parent = *node;
177 if (data->subtree_max_end < newdata->end)
178 data->subtree_max_end = newdata->end;
182 if (newdata->start <= data->start) 179 if (newdata->start <= data->start)
183 node = &((*node)->rb_left); 180 node = &((*node)->rb_left);
184 else if (newdata->start > data->start) 181 else if (newdata->start > data->start)
185 node = &((*node)->rb_right); 182 node = &((*node)->rb_right);
186 } 183 }
187 184
185 newdata->subtree_max_end = newdata->end;
188 rb_link_node(&newdata->rb, parent, node); 186 rb_link_node(&newdata->rb, parent, node);
189 rb_insert_color(&newdata->rb, root); 187 rb_insert_augmented(&newdata->rb, root, &memtype_rb_augment_cb);
190 rb_augment_insert(&newdata->rb, memtype_rb_augment_cb, NULL);
191} 188}
192 189
193int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) 190int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type)
@@ -209,16 +206,13 @@ int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type)
209 206
210struct memtype *rbt_memtype_erase(u64 start, u64 end) 207struct memtype *rbt_memtype_erase(u64 start, u64 end)
211{ 208{
212 struct rb_node *deepest;
213 struct memtype *data; 209 struct memtype *data;
214 210
215 data = memtype_rb_exact_match(&memtype_rbroot, start, end); 211 data = memtype_rb_exact_match(&memtype_rbroot, start, end);
216 if (!data) 212 if (!data)
217 goto out; 213 goto out;
218 214
219 deepest = rb_augment_erase_begin(&data->rb); 215 rb_erase_augmented(&data->rb, &memtype_rbroot, &memtype_rb_augment_cb);
220 rb_erase(&data->rb, &memtype_rbroot);
221 rb_augment_erase_end(deepest, memtype_rb_augment_cb, NULL);
222out: 216out:
223 return data; 217 return data;
224} 218}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5a16824cc2b3..fd28d86fe3d2 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2451,8 +2451,7 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
2451 2451
2452 prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); 2452 prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
2453 2453
2454 BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == 2454 BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
2455 (VM_PFNMAP | VM_RESERVED | VM_IO)));
2456 2455
2457 rmd.mfn = mfn; 2456 rmd.mfn = mfn;
2458 rmd.prot = prot; 2457 rmd.prot = prot;
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 5a74c53bc69c..2c2f710ed1dc 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -126,6 +126,7 @@ good_area:
126 current->min_flt++; 126 current->min_flt++;
127 if (fault & VM_FAULT_RETRY) { 127 if (fault & VM_FAULT_RETRY) {
128 flags &= ~FAULT_FLAG_ALLOW_RETRY; 128 flags &= ~FAULT_FLAG_ALLOW_RETRY;
129 flags |= FAULT_FLAG_TRIED;
129 130
130 /* No need to up_read(&mm->mmap_sem) as we would 131 /* No need to up_read(&mm->mmap_sem) as we would
131 * have already released it in __lock_page_or_retry 132 * have already released it in __lock_page_or_retry