aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/tile/Kconfig6
-rw-r--r--arch/tile/include/asm/hugetlb.h2
-rw-r--r--arch/tile/include/asm/page.h34
-rw-r--r--arch/tile/include/asm/pgalloc.h7
-rw-r--r--arch/tile/include/asm/pgtable.h31
-rw-r--r--arch/tile/include/asm/pgtable_32.h8
-rw-r--r--arch/tile/include/asm/stack.h3
-rw-r--r--arch/tile/include/asm/thread_info.h1
-rw-r--r--arch/tile/kernel/intvec_32.S16
-rw-r--r--arch/tile/kernel/machine_kexec.c7
-rw-r--r--arch/tile/kernel/pci-dma.c38
-rw-r--r--arch/tile/kernel/process.c2
-rw-r--r--arch/tile/kernel/setup.c20
-rw-r--r--arch/tile/lib/memcpy_tile64.c4
-rw-r--r--arch/tile/mm/homecache.c2
-rw-r--r--arch/tile/mm/init.c18
-rw-r--r--arch/tile/mm/migrate_32.S1
-rw-r--r--arch/tile/mm/pgtable.c170
18 files changed, 235 insertions, 135 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index eed0fc5dfe67..f3b78701c219 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -202,12 +202,6 @@ config NODES_SHIFT
202 By default, 2, i.e. 2^2 == 4 DDR2 controllers. 202 By default, 2, i.e. 2^2 == 4 DDR2 controllers.
203 In a system with more controllers, this value should be raised. 203 In a system with more controllers, this value should be raised.
204 204
205# Need 16MB areas to enable hugetlb
206# See build-time check in arch/tile/mm/init.c.
207config FORCE_MAX_ZONEORDER
208 int
209 default 9
210
211choice 205choice
212 depends on !TILEGX 206 depends on !TILEGX
213 prompt "Memory split" if EXPERT 207 prompt "Memory split" if EXPERT
diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h
index 0521c277bbde..d396d1805163 100644
--- a/arch/tile/include/asm/hugetlb.h
+++ b/arch/tile/include/asm/hugetlb.h
@@ -54,7 +54,7 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
54static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 54static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
55 pte_t *ptep, pte_t pte) 55 pte_t *ptep, pte_t pte)
56{ 56{
57 set_pte_order(ptep, pte, HUGETLB_PAGE_ORDER); 57 set_pte(ptep, pte);
58} 58}
59 59
60static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, 60static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index 7979a45430d3..3eb53525bf9d 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -16,10 +16,11 @@
16#define _ASM_TILE_PAGE_H 16#define _ASM_TILE_PAGE_H
17 17
18#include <linux/const.h> 18#include <linux/const.h>
19#include <hv/pagesize.h>
19 20
20/* PAGE_SHIFT and HPAGE_SHIFT determine the page sizes. */ 21/* PAGE_SHIFT and HPAGE_SHIFT determine the page sizes. */
21#define PAGE_SHIFT 16 22#define PAGE_SHIFT HV_LOG2_PAGE_SIZE_SMALL
22#define HPAGE_SHIFT 24 23#define HPAGE_SHIFT HV_LOG2_PAGE_SIZE_LARGE
23 24
24#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) 25#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
25#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) 26#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT)
@@ -29,25 +30,18 @@
29 30
30#ifdef __KERNEL__ 31#ifdef __KERNEL__
31 32
32#include <hv/hypervisor.h>
33#include <arch/chip.h>
34
35/* 33/*
36 * The {,H}PAGE_SHIFT values must match the HV_LOG2_PAGE_SIZE_xxx 34 * If the Kconfig doesn't specify, set a maximum zone order that
37 * definitions in <hv/hypervisor.h>. We validate this at build time 35 * is enough so that we can create huge pages from small pages given
38 * here, and again at runtime during early boot. We provide a 36 * the respective sizes of the two page types. See <linux/mmzone.h>.
39 * separate definition since userspace doesn't have <hv/hypervisor.h>.
40 *
41 * Be careful to distinguish PAGE_SHIFT from HV_PTE_INDEX_PFN, since
42 * they are the same on i386 but not TILE.
43 */ 37 */
44#if HV_LOG2_PAGE_SIZE_SMALL != PAGE_SHIFT 38#ifndef CONFIG_FORCE_MAX_ZONEORDER
45# error Small page size mismatch in Linux 39#define CONFIG_FORCE_MAX_ZONEORDER (HPAGE_SHIFT - PAGE_SHIFT + 1)
46#endif
47#if HV_LOG2_PAGE_SIZE_LARGE != HPAGE_SHIFT
48# error Huge page size mismatch in Linux
49#endif 40#endif
50 41
42#include <hv/hypervisor.h>
43#include <arch/chip.h>
44
51#ifndef __ASSEMBLY__ 45#ifndef __ASSEMBLY__
52 46
53#include <linux/types.h> 47#include <linux/types.h>
@@ -81,12 +75,6 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
81 * Hypervisor page tables are made of the same basic structure. 75 * Hypervisor page tables are made of the same basic structure.
82 */ 76 */
83 77
84typedef __u64 pteval_t;
85typedef __u64 pmdval_t;
86typedef __u64 pudval_t;
87typedef __u64 pgdval_t;
88typedef __u64 pgprotval_t;
89
90typedef HV_PTE pte_t; 78typedef HV_PTE pte_t;
91typedef HV_PTE pgd_t; 79typedef HV_PTE pgd_t;
92typedef HV_PTE pgprot_t; 80typedef HV_PTE pgprot_t;
diff --git a/arch/tile/include/asm/pgalloc.h b/arch/tile/include/asm/pgalloc.h
index cf52791a5501..e919c0bdc22d 100644
--- a/arch/tile/include/asm/pgalloc.h
+++ b/arch/tile/include/asm/pgalloc.h
@@ -41,9 +41,9 @@
41static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) 41static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
42{ 42{
43#ifdef CONFIG_64BIT 43#ifdef CONFIG_64BIT
44 set_pte_order(pmdp, pmd, L2_USER_PGTABLE_ORDER); 44 set_pte(pmdp, pmd);
45#else 45#else
46 set_pte_order(&pmdp->pud.pgd, pmd.pud.pgd, L2_USER_PGTABLE_ORDER); 46 set_pte(&pmdp->pud.pgd, pmd.pud.pgd);
47#endif 47#endif
48} 48}
49 49
@@ -100,6 +100,9 @@ pte_t *get_prealloc_pte(unsigned long pfn);
100/* During init, we can shatter kernel huge pages if needed. */ 100/* During init, we can shatter kernel huge pages if needed. */
101void shatter_pmd(pmd_t *pmd); 101void shatter_pmd(pmd_t *pmd);
102 102
103/* After init, a more complex technique is required. */
104void shatter_huge_page(unsigned long addr);
105
103#ifdef __tilegx__ 106#ifdef __tilegx__
104/* We share a single page allocator for both L1 and L2 page tables. */ 107/* We share a single page allocator for both L1 and L2 page tables. */
105#if HV_L1_SIZE != HV_L2_SIZE 108#if HV_L1_SIZE != HV_L2_SIZE
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h
index a6604e9485da..1a20b7ef8ea2 100644
--- a/arch/tile/include/asm/pgtable.h
+++ b/arch/tile/include/asm/pgtable.h
@@ -233,15 +233,23 @@ static inline void __pte_clear(pte_t *ptep)
233#define pgd_ERROR(e) \ 233#define pgd_ERROR(e) \
234 pr_err("%s:%d: bad pgd 0x%016llx.\n", __FILE__, __LINE__, pgd_val(e)) 234 pr_err("%s:%d: bad pgd 0x%016llx.\n", __FILE__, __LINE__, pgd_val(e))
235 235
236/* Return PA and protection info for a given kernel VA. */
237int va_to_cpa_and_pte(void *va, phys_addr_t *cpa, pte_t *pte);
238
239/*
240 * __set_pte() ensures we write the 64-bit PTE with 32-bit words in
241 * the right order on 32-bit platforms and also allows us to write
242 * hooks to check valid PTEs, etc., if we want.
243 */
244void __set_pte(pte_t *ptep, pte_t pte);
245
236/* 246/*
237 * set_pte_order() sets the given PTE and also sanity-checks the 247 * set_pte() sets the given PTE and also sanity-checks the
238 * requested PTE against the page homecaching. Unspecified parts 248 * requested PTE against the page homecaching. Unspecified parts
239 * of the PTE are filled in when it is written to memory, i.e. all 249 * of the PTE are filled in when it is written to memory, i.e. all
240 * caching attributes if "!forcecache", or the home cpu if "anyhome". 250 * caching attributes if "!forcecache", or the home cpu if "anyhome".
241 */ 251 */
242extern void set_pte_order(pte_t *ptep, pte_t pte, int order); 252extern void set_pte(pte_t *ptep, pte_t pte);
243
244#define set_pte(ptep, pteval) set_pte_order(ptep, pteval, 0)
245#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval) 253#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
246#define set_pte_atomic(pteptr, pteval) set_pte(pteptr, pteval) 254#define set_pte_atomic(pteptr, pteval) set_pte(pteptr, pteval)
247 255
@@ -293,21 +301,6 @@ extern void check_mm_caching(struct mm_struct *prev, struct mm_struct *next);
293#define __swp_entry_to_pte(swp) ((pte_t) { (((long long) ((swp).val)) << 32) }) 301#define __swp_entry_to_pte(swp) ((pte_t) { (((long long) ((swp).val)) << 32) })
294 302
295/* 303/*
296 * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
297 *
298 * dst - pointer to pgd range anwhere on a pgd page
299 * src - ""
300 * count - the number of pgds to copy.
301 *
302 * dst and src can be on the same page, but the range must not overlap,
303 * and must not cross a page boundary.
304 */
305static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
306{
307 memcpy(dst, src, count * sizeof(pgd_t));
308}
309
310/*
311 * Conversion functions: convert a page and protection to a page entry, 304 * Conversion functions: convert a page and protection to a page entry,
312 * and a page entry and page directory to the page they refer to. 305 * and a page entry and page directory to the page they refer to.
313 */ 306 */
diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h
index 53ec34884744..9f98529761fd 100644
--- a/arch/tile/include/asm/pgtable_32.h
+++ b/arch/tile/include/asm/pgtable_32.h
@@ -24,6 +24,7 @@
24#define PGDIR_SIZE HV_PAGE_SIZE_LARGE 24#define PGDIR_SIZE HV_PAGE_SIZE_LARGE
25#define PGDIR_MASK (~(PGDIR_SIZE-1)) 25#define PGDIR_MASK (~(PGDIR_SIZE-1))
26#define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT)) 26#define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT))
27#define SIZEOF_PGD (PTRS_PER_PGD * sizeof(pgd_t))
27 28
28/* 29/*
29 * The level-2 index is defined by the difference between the huge 30 * The level-2 index is defined by the difference between the huge
@@ -33,6 +34,7 @@
33 * this nomenclature is somewhat confusing. 34 * this nomenclature is somewhat confusing.
34 */ 35 */
35#define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL)) 36#define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL))
37#define SIZEOF_PTE (PTRS_PER_PTE * sizeof(pte_t))
36 38
37#ifndef __ASSEMBLY__ 39#ifndef __ASSEMBLY__
38 40
@@ -94,7 +96,6 @@ static inline int pgd_addr_invalid(unsigned long addr)
94 */ 96 */
95#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 97#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
96#define __HAVE_ARCH_PTEP_SET_WRPROTECT 98#define __HAVE_ARCH_PTEP_SET_WRPROTECT
97#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
98 99
99extern int ptep_test_and_clear_young(struct vm_area_struct *, 100extern int ptep_test_and_clear_young(struct vm_area_struct *,
100 unsigned long addr, pte_t *); 101 unsigned long addr, pte_t *);
@@ -110,6 +111,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
110 return pte; 111 return pte;
111} 112}
112 113
114static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval)
115{
116 set_pte(&pmdp->pud.pgd, pmdval.pud.pgd);
117}
118
113/* Create a pmd from a PTFN. */ 119/* Create a pmd from a PTFN. */
114static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot) 120static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot)
115{ 121{
diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h
index f908473c322d..4d97a2db932e 100644
--- a/arch/tile/include/asm/stack.h
+++ b/arch/tile/include/asm/stack.h
@@ -18,13 +18,14 @@
18#include <linux/types.h> 18#include <linux/types.h>
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <asm/backtrace.h> 20#include <asm/backtrace.h>
21#include <asm/page.h>
21#include <hv/hypervisor.h> 22#include <hv/hypervisor.h>
22 23
23/* Everything we need to keep track of a backtrace iteration */ 24/* Everything we need to keep track of a backtrace iteration */
24struct KBacktraceIterator { 25struct KBacktraceIterator {
25 BacktraceIterator it; 26 BacktraceIterator it;
26 struct task_struct *task; /* task we are backtracing */ 27 struct task_struct *task; /* task we are backtracing */
27 HV_PTE *pgtable; /* page table for user space access */ 28 pte_t *pgtable; /* page table for user space access */
28 int end; /* iteration complete. */ 29 int end; /* iteration complete. */
29 int new_context; /* new context is starting */ 30 int new_context; /* new context is starting */
30 int profile; /* profiling, so stop on async intrpt */ 31 int profile; /* profiling, so stop on async intrpt */
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index 3872f2b345d2..9e8e9c4dfa2a 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -68,6 +68,7 @@ struct thread_info {
68#else 68#else
69#define THREAD_SIZE_ORDER (0) 69#define THREAD_SIZE_ORDER (0)
70#endif 70#endif
71#define THREAD_SIZE_PAGES (1 << THREAD_SIZE_ORDER)
71 72
72#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) 73#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
73#define LOG2_THREAD_SIZE (PAGE_SHIFT + THREAD_SIZE_ORDER) 74#define LOG2_THREAD_SIZE (PAGE_SHIFT + THREAD_SIZE_ORDER)
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index eabf1ef02cb2..fffcfa6b3a62 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -1556,7 +1556,10 @@ STD_ENTRY(_sys_clone)
1556 .align 64 1556 .align 64
1557 /* Align much later jump on the start of a cache line. */ 1557 /* Align much later jump on the start of a cache line. */
1558#if !ATOMIC_LOCKS_FOUND_VIA_TABLE() 1558#if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
1559 nop; nop 1559 nop
1560#if PAGE_SIZE >= 0x10000
1561 nop
1562#endif
1560#endif 1563#endif
1561ENTRY(sys_cmpxchg) 1564ENTRY(sys_cmpxchg)
1562 1565
@@ -1587,6 +1590,10 @@ ENTRY(sys_cmpxchg)
1587 * NOTE: this must match __atomic_hashed_lock() in lib/atomic_32.c. 1590 * NOTE: this must match __atomic_hashed_lock() in lib/atomic_32.c.
1588 */ 1591 */
1589 1592
1593#if (PAGE_OFFSET & 0xffff) != 0
1594# error Code here assumes PAGE_OFFSET can be loaded with just hi16()
1595#endif
1596
1590#if ATOMIC_LOCKS_FOUND_VIA_TABLE() 1597#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
1591 { 1598 {
1592 /* Check for unaligned input. */ 1599 /* Check for unaligned input. */
@@ -1679,11 +1686,14 @@ ENTRY(sys_cmpxchg)
1679 lw r26, r0 1686 lw r26, r0
1680 } 1687 }
1681 { 1688 {
1682 /* atomic_locks is page aligned so this suffices to get its addr. */ 1689 auli r21, zero, ha16(atomic_locks)
1683 auli r21, zero, hi16(atomic_locks)
1684 1690
1685 bbns r23, .Lcmpxchg_badaddr 1691 bbns r23, .Lcmpxchg_badaddr
1686 } 1692 }
1693#if PAGE_SIZE < 0x10000
1694 /* atomic_locks is page-aligned so for big pages we don't need this. */
1695 addli r21, r21, lo16(atomic_locks)
1696#endif
1687 { 1697 {
1688 /* 1698 /*
1689 * Insert the hash bits into the page-aligned pointer. 1699 * Insert the hash bits into the page-aligned pointer.
diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c
index 0d8b9e933487..e00d7179989e 100644
--- a/arch/tile/kernel/machine_kexec.c
+++ b/arch/tile/kernel/machine_kexec.c
@@ -240,8 +240,11 @@ static void setup_quasi_va_is_pa(void)
240 pte = hv_pte(_PAGE_KERNEL | _PAGE_HUGE_PAGE); 240 pte = hv_pte(_PAGE_KERNEL | _PAGE_HUGE_PAGE);
241 pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); 241 pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3);
242 242
243 for (i = 0; i < pgd_index(PAGE_OFFSET); i++) 243 for (i = 0; i < pgd_index(PAGE_OFFSET); i++) {
244 pgtable[i] = pfn_pte(i << (HPAGE_SHIFT - PAGE_SHIFT), pte); 244 unsigned long pfn = i << (HPAGE_SHIFT - PAGE_SHIFT);
245 if (pfn_valid(pfn))
246 __set_pte(&pgtable[i], pfn_pte(pfn, pte));
247 }
245} 248}
246 249
247 250
diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c
index 5ad5e13b0fa6..658752b2835e 100644
--- a/arch/tile/kernel/pci-dma.c
+++ b/arch/tile/kernel/pci-dma.c
@@ -86,6 +86,21 @@ EXPORT_SYMBOL(dma_free_coherent);
86 * can count on nothing having been touched. 86 * can count on nothing having been touched.
87 */ 87 */
88 88
89/* Flush a PA range from cache page by page. */
90static void __dma_map_pa_range(dma_addr_t dma_addr, size_t size)
91{
92 struct page *page = pfn_to_page(PFN_DOWN(dma_addr));
93 size_t bytesleft = PAGE_SIZE - (dma_addr & (PAGE_SIZE - 1));
94
95 while ((ssize_t)size > 0) {
96 /* Flush the page. */
97 homecache_flush_cache(page++, 0);
98
99 /* Figure out if we need to continue on the next page. */
100 size -= bytesleft;
101 bytesleft = PAGE_SIZE;
102 }
103}
89 104
90/* 105/*
91 * dma_map_single can be passed any memory address, and there appear 106 * dma_map_single can be passed any memory address, and there appear
@@ -97,26 +112,12 @@ EXPORT_SYMBOL(dma_free_coherent);
97dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, 112dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
98 enum dma_data_direction direction) 113 enum dma_data_direction direction)
99{ 114{
100 struct page *page; 115 dma_addr_t dma_addr = __pa(ptr);
101 dma_addr_t dma_addr;
102 int thispage;
103 116
104 BUG_ON(!valid_dma_direction(direction)); 117 BUG_ON(!valid_dma_direction(direction));
105 WARN_ON(size == 0); 118 WARN_ON(size == 0);
106 119
107 dma_addr = __pa(ptr); 120 __dma_map_pa_range(dma_addr, size);
108
109 /* We might have been handed a buffer that wraps a page boundary */
110 while ((int)size > 0) {
111 /* The amount to flush that's on this page */
112 thispage = PAGE_SIZE - ((unsigned long)ptr & (PAGE_SIZE - 1));
113 thispage = min((int)thispage, (int)size);
114 /* Is this valid for any page we could be handed? */
115 page = pfn_to_page(kaddr_to_pfn(ptr));
116 homecache_flush_cache(page, 0);
117 ptr += thispage;
118 size -= thispage;
119 }
120 121
121 return dma_addr; 122 return dma_addr;
122} 123}
@@ -140,10 +141,8 @@ int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
140 WARN_ON(nents == 0 || sglist->length == 0); 141 WARN_ON(nents == 0 || sglist->length == 0);
141 142
142 for_each_sg(sglist, sg, nents, i) { 143 for_each_sg(sglist, sg, nents, i) {
143 struct page *page;
144 sg->dma_address = sg_phys(sg); 144 sg->dma_address = sg_phys(sg);
145 page = pfn_to_page(sg->dma_address >> PAGE_SHIFT); 145 __dma_map_pa_range(sg->dma_address, sg->length);
146 homecache_flush_cache(page, 0);
147 } 146 }
148 147
149 return nents; 148 return nents;
@@ -163,6 +162,7 @@ dma_addr_t dma_map_page(struct device *dev, struct page *page,
163{ 162{
164 BUG_ON(!valid_dma_direction(direction)); 163 BUG_ON(!valid_dma_direction(direction));
165 164
165 BUG_ON(offset + size > PAGE_SIZE);
166 homecache_flush_cache(page, 0); 166 homecache_flush_cache(page, 0);
167 167
168 return page_to_pa(page) + offset; 168 return page_to_pa(page) + offset;
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 5db8b5b63cea..b9cd962e1d30 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -165,7 +165,7 @@ void free_thread_info(struct thread_info *info)
165 kfree(step_state); 165 kfree(step_state);
166 } 166 }
167 167
168 free_page((unsigned long)info); 168 free_pages((unsigned long)info, THREAD_SIZE_ORDER);
169} 169}
170 170
171static void save_arch_state(struct thread_struct *t); 171static void save_arch_state(struct thread_struct *t);
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index f18573643ed1..3696b1832566 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -59,6 +59,8 @@ unsigned long __initdata node_memmap_pfn[MAX_NUMNODES];
59unsigned long __initdata node_percpu_pfn[MAX_NUMNODES]; 59unsigned long __initdata node_percpu_pfn[MAX_NUMNODES];
60unsigned long __initdata node_free_pfn[MAX_NUMNODES]; 60unsigned long __initdata node_free_pfn[MAX_NUMNODES];
61 61
62static unsigned long __initdata node_percpu[MAX_NUMNODES];
63
62#ifdef CONFIG_HIGHMEM 64#ifdef CONFIG_HIGHMEM
63/* Page frame index of end of lowmem on each controller. */ 65/* Page frame index of end of lowmem on each controller. */
64unsigned long __cpuinitdata node_lowmem_end_pfn[MAX_NUMNODES]; 66unsigned long __cpuinitdata node_lowmem_end_pfn[MAX_NUMNODES];
@@ -554,7 +556,6 @@ static void __init setup_bootmem_allocator(void)
554 reserve_bootmem(crashk_res.start, 556 reserve_bootmem(crashk_res.start,
555 crashk_res.end - crashk_res.start + 1, 0); 557 crashk_res.end - crashk_res.start + 1, 0);
556#endif 558#endif
557
558} 559}
559 560
560void *__init alloc_remap(int nid, unsigned long size) 561void *__init alloc_remap(int nid, unsigned long size)
@@ -568,11 +569,13 @@ void *__init alloc_remap(int nid, unsigned long size)
568 569
569static int __init percpu_size(void) 570static int __init percpu_size(void)
570{ 571{
571 int size = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE); 572 int size = __per_cpu_end - __per_cpu_start;
572#ifdef CONFIG_MODULES 573 size += PERCPU_MODULE_RESERVE;
573 if (size < PERCPU_ENOUGH_ROOM) 574 size += PERCPU_DYNAMIC_EARLY_SIZE;
574 size = PERCPU_ENOUGH_ROOM; 575 if (size < PCPU_MIN_UNIT_SIZE)
575#endif 576 size = PCPU_MIN_UNIT_SIZE;
577 size = roundup(size, PAGE_SIZE);
578
576 /* In several places we assume the per-cpu data fits on a huge page. */ 579 /* In several places we assume the per-cpu data fits on a huge page. */
577 BUG_ON(kdata_huge && size > HPAGE_SIZE); 580 BUG_ON(kdata_huge && size > HPAGE_SIZE);
578 return size; 581 return size;
@@ -589,7 +592,6 @@ static inline unsigned long alloc_bootmem_pfn(int size, unsigned long goal)
589static void __init zone_sizes_init(void) 592static void __init zone_sizes_init(void)
590{ 593{
591 unsigned long zones_size[MAX_NR_ZONES] = { 0 }; 594 unsigned long zones_size[MAX_NR_ZONES] = { 0 };
592 unsigned long node_percpu[MAX_NUMNODES] = { 0 };
593 int size = percpu_size(); 595 int size = percpu_size();
594 int num_cpus = smp_height * smp_width; 596 int num_cpus = smp_height * smp_width;
595 int i; 597 int i;
@@ -674,7 +676,7 @@ static void __init zone_sizes_init(void)
674 NODE_DATA(i)->bdata = NODE_DATA(0)->bdata; 676 NODE_DATA(i)->bdata = NODE_DATA(0)->bdata;
675 677
676 free_area_init_node(i, zones_size, start, NULL); 678 free_area_init_node(i, zones_size, start, NULL);
677 printk(KERN_DEBUG " DMA zone: %ld per-cpu pages\n", 679 printk(KERN_DEBUG " Normal zone: %ld per-cpu pages\n",
678 PFN_UP(node_percpu[i])); 680 PFN_UP(node_percpu[i]));
679 681
680 /* Track the type of memory on each node */ 682 /* Track the type of memory on each node */
@@ -1312,6 +1314,8 @@ static void *__init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
1312 1314
1313 BUG_ON(size % PAGE_SIZE != 0); 1315 BUG_ON(size % PAGE_SIZE != 0);
1314 pfn_offset[nid] += size / PAGE_SIZE; 1316 pfn_offset[nid] += size / PAGE_SIZE;
1317 BUG_ON(node_percpu[nid] < size);
1318 node_percpu[nid] -= size;
1315 if (percpu_pfn[cpu] == 0) 1319 if (percpu_pfn[cpu] == 0)
1316 percpu_pfn[cpu] = pfn; 1320 percpu_pfn[cpu] = pfn;
1317 return pfn_to_kaddr(pfn); 1321 return pfn_to_kaddr(pfn);
diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c
index f7d4a6ad61e8..b2fe15e01075 100644
--- a/arch/tile/lib/memcpy_tile64.c
+++ b/arch/tile/lib/memcpy_tile64.c
@@ -96,7 +96,7 @@ static void memcpy_multicache(void *dest, const void *source,
96 newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1)); 96 newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1));
97 pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc); 97 pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc);
98 ptep = pte_offset_kernel(pmdp, newsrc); 98 ptep = pte_offset_kernel(pmdp, newsrc);
99 *ptep = src_pte; /* set_pte() would be confused by this */ 99 __set_pte(ptep, src_pte); /* set_pte() would be confused by this */
100 local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); 100 local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
101 101
102 /* Actually move the data. */ 102 /* Actually move the data. */
@@ -109,7 +109,7 @@ static void memcpy_multicache(void *dest, const void *source,
109 */ 109 */
110 src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3); 110 src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3);
111 src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */ 111 src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */
112 *ptep = src_pte; /* set_pte() would be confused by this */ 112 __set_pte(ptep, src_pte); /* set_pte() would be confused by this */
113 local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); 113 local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
114 114
115 /* 115 /*
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
index f344f4fc7342..cbe6f4f9eca3 100644
--- a/arch/tile/mm/homecache.c
+++ b/arch/tile/mm/homecache.c
@@ -412,7 +412,7 @@ void homecache_change_page_home(struct page *page, int order, int home)
412 pte_t *ptep = virt_to_pte(NULL, kva); 412 pte_t *ptep = virt_to_pte(NULL, kva);
413 pte_t pteval = *ptep; 413 pte_t pteval = *ptep;
414 BUG_ON(!pte_present(pteval) || pte_huge(pteval)); 414 BUG_ON(!pte_present(pteval) || pte_huge(pteval));
415 *ptep = pte_set_home(pteval, home); 415 __set_pte(ptep, pte_set_home(pteval, home));
416 } 416 }
417} 417}
418 418
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index f89ed5dc08d2..d6e87fda2fb2 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -53,18 +53,6 @@
53 53
54#include "migrate.h" 54#include "migrate.h"
55 55
56/*
57 * We could set FORCE_MAX_ZONEORDER to "(HPAGE_SHIFT - PAGE_SHIFT + 1)"
58 * in the Tile Kconfig, but this generates configure warnings.
59 * Do it here and force people to get it right to compile this file.
60 * The problem is that with 4KB small pages and 16MB huge pages,
61 * the default value doesn't allow us to group enough small pages
62 * together to make up a huge page.
63 */
64#if CONFIG_FORCE_MAX_ZONEORDER < HPAGE_SHIFT - PAGE_SHIFT + 1
65# error "Change FORCE_MAX_ZONEORDER in arch/tile/Kconfig to match page size"
66#endif
67
68#define clear_pgd(pmdptr) (*(pmdptr) = hv_pte(0)) 56#define clear_pgd(pmdptr) (*(pmdptr) = hv_pte(0))
69 57
70#ifndef __tilegx__ 58#ifndef __tilegx__
@@ -962,11 +950,7 @@ struct kmem_cache *pgd_cache;
962 950
963void __init pgtable_cache_init(void) 951void __init pgtable_cache_init(void)
964{ 952{
965 pgd_cache = kmem_cache_create("pgd", 953 pgd_cache = kmem_cache_create("pgd", SIZEOF_PGD, SIZEOF_PGD, 0, NULL);
966 PTRS_PER_PGD*sizeof(pgd_t),
967 PTRS_PER_PGD*sizeof(pgd_t),
968 0,
969 NULL);
970 if (!pgd_cache) 954 if (!pgd_cache)
971 panic("pgtable_cache_init(): Cannot create pgd cache"); 955 panic("pgtable_cache_init(): Cannot create pgd cache");
972} 956}
diff --git a/arch/tile/mm/migrate_32.S b/arch/tile/mm/migrate_32.S
index f738765cd1e6..ac01a7cdf77f 100644
--- a/arch/tile/mm/migrate_32.S
+++ b/arch/tile/mm/migrate_32.S
@@ -18,6 +18,7 @@
18#include <linux/linkage.h> 18#include <linux/linkage.h>
19#include <linux/threads.h> 19#include <linux/threads.h>
20#include <asm/page.h> 20#include <asm/page.h>
21#include <asm/thread_info.h>
21#include <asm/types.h> 22#include <asm/types.h>
22#include <asm/asm-offsets.h> 23#include <asm/asm-offsets.h>
23#include <hv/hypervisor.h> 24#include <hv/hypervisor.h>
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index 2c850d9864e3..1a2b36f8866d 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -142,6 +142,76 @@ pte_t *_pte_offset_map(pmd_t *dir, unsigned long address)
142} 142}
143#endif 143#endif
144 144
145/**
146 * shatter_huge_page() - ensure a given address is mapped by a small page.
147 *
148 * This function converts a huge PTE mapping kernel LOWMEM into a bunch
149 * of small PTEs with the same caching. No cache flush required, but we
150 * must do a global TLB flush.
151 *
152 * Any caller that wishes to modify a kernel mapping that might
153 * have been made with a huge page should call this function,
154 * since doing so properly avoids race conditions with installing the
155 * newly-shattered page and then flushing all the TLB entries.
156 *
157 * @addr: Address at which to shatter any existing huge page.
158 */
159void shatter_huge_page(unsigned long addr)
160{
161 pgd_t *pgd;
162 pud_t *pud;
163 pmd_t *pmd;
164 unsigned long flags = 0; /* happy compiler */
165#ifdef __PAGETABLE_PMD_FOLDED
166 struct list_head *pos;
167#endif
168
169 /* Get a pointer to the pmd entry that we need to change. */
170 addr &= HPAGE_MASK;
171 BUG_ON(pgd_addr_invalid(addr));
172 BUG_ON(addr < PAGE_OFFSET); /* only for kernel LOWMEM */
173 pgd = swapper_pg_dir + pgd_index(addr);
174 pud = pud_offset(pgd, addr);
175 BUG_ON(!pud_present(*pud));
176 pmd = pmd_offset(pud, addr);
177 BUG_ON(!pmd_present(*pmd));
178 if (!pmd_huge_page(*pmd))
179 return;
180
181 /*
182 * Grab the pgd_lock, since we may need it to walk the pgd_list,
183 * and since we need some kind of lock here to avoid races.
184 */
185 spin_lock_irqsave(&pgd_lock, flags);
186 if (!pmd_huge_page(*pmd)) {
187 /* Lost the race to convert the huge page. */
188 spin_unlock_irqrestore(&pgd_lock, flags);
189 return;
190 }
191
192 /* Shatter the huge page into the preallocated L2 page table. */
193 pmd_populate_kernel(&init_mm, pmd,
194 get_prealloc_pte(pte_pfn(*(pte_t *)pmd)));
195
196#ifdef __PAGETABLE_PMD_FOLDED
197 /* Walk every pgd on the system and update the pmd there. */
198 list_for_each(pos, &pgd_list) {
199 pmd_t *copy_pmd;
200 pgd = list_to_pgd(pos) + pgd_index(addr);
201 pud = pud_offset(pgd, addr);
202 copy_pmd = pmd_offset(pud, addr);
203 __set_pmd(copy_pmd, *pmd);
204 }
205#endif
206
207 /* Tell every cpu to notice the change. */
208 flush_remote(0, 0, NULL, addr, HPAGE_SIZE, HPAGE_SIZE,
209 cpu_possible_mask, NULL, 0);
210
211 /* Hold the lock until the TLB flush is finished to avoid races. */
212 spin_unlock_irqrestore(&pgd_lock, flags);
213}
214
145/* 215/*
146 * List of all pgd's needed so it can invalidate entries in both cached 216 * List of all pgd's needed so it can invalidate entries in both cached
147 * and uncached pgd's. This is essentially codepath-based locking 217 * and uncached pgd's. This is essentially codepath-based locking
@@ -184,9 +254,9 @@ static void pgd_ctor(pgd_t *pgd)
184 BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0); 254 BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0);
185#endif 255#endif
186 256
187 clone_pgd_range(pgd + KERNEL_PGD_INDEX_START, 257 memcpy(pgd + KERNEL_PGD_INDEX_START,
188 swapper_pg_dir + KERNEL_PGD_INDEX_START, 258 swapper_pg_dir + KERNEL_PGD_INDEX_START,
189 KERNEL_PGD_PTRS); 259 KERNEL_PGD_PTRS * sizeof(pgd_t));
190 260
191 pgd_list_add(pgd); 261 pgd_list_add(pgd);
192 spin_unlock_irqrestore(&pgd_lock, flags); 262 spin_unlock_irqrestore(&pgd_lock, flags);
@@ -220,8 +290,11 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
220 290
221struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) 291struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
222{ 292{
223 gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO|__GFP_COMP; 293 gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO;
224 struct page *p; 294 struct page *p;
295#if L2_USER_PGTABLE_ORDER > 0
296 int i;
297#endif
225 298
226#ifdef CONFIG_HIGHPTE 299#ifdef CONFIG_HIGHPTE
227 flags |= __GFP_HIGHMEM; 300 flags |= __GFP_HIGHMEM;
@@ -231,6 +304,18 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
231 if (p == NULL) 304 if (p == NULL)
232 return NULL; 305 return NULL;
233 306
307#if L2_USER_PGTABLE_ORDER > 0
308 /*
309 * Make every page have a page_count() of one, not just the first.
310 * We don't use __GFP_COMP since it doesn't look like it works
311 * correctly with tlb_remove_page().
312 */
313 for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
314 init_page_count(p+i);
315 inc_zone_page_state(p+i, NR_PAGETABLE);
316 }
317#endif
318
234 pgtable_page_ctor(p); 319 pgtable_page_ctor(p);
235 return p; 320 return p;
236} 321}
@@ -242,8 +327,15 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
242 */ 327 */
243void pte_free(struct mm_struct *mm, struct page *p) 328void pte_free(struct mm_struct *mm, struct page *p)
244{ 329{
330 int i;
331
245 pgtable_page_dtor(p); 332 pgtable_page_dtor(p);
246 __free_pages(p, L2_USER_PGTABLE_ORDER); 333 __free_page(p);
334
335 for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
336 __free_page(p+i);
337 dec_zone_page_state(p+i, NR_PAGETABLE);
338 }
247} 339}
248 340
249void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, 341void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
@@ -252,8 +344,12 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
252 int i; 344 int i;
253 345
254 pgtable_page_dtor(pte); 346 pgtable_page_dtor(pte);
255 for (i = 0; i < L2_USER_PGTABLE_PAGES; ++i) 347 tlb_remove_page(tlb, pte);
348
349 for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
256 tlb_remove_page(tlb, pte + i); 350 tlb_remove_page(tlb, pte + i);
351 dec_zone_page_state(pte + i, NR_PAGETABLE);
352 }
257} 353}
258 354
259#ifndef __tilegx__ 355#ifndef __tilegx__
@@ -335,35 +431,51 @@ int get_remote_cache_cpu(pgprot_t prot)
335 return x + y * smp_width; 431 return x + y * smp_width;
336} 432}
337 433
338void set_pte_order(pte_t *ptep, pte_t pte, int order) 434/*
435 * Convert a kernel VA to a PA and homing information.
436 */
437int va_to_cpa_and_pte(void *va, unsigned long long *cpa, pte_t *pte)
339{ 438{
340 unsigned long pfn = pte_pfn(pte); 439 struct page *page = virt_to_page(va);
341 struct page *page = pfn_to_page(pfn); 440 pte_t null_pte = { 0 };
342 441
343 /* Update the home of a PTE if necessary */ 442 *cpa = __pa(va);
344 pte = pte_set_home(pte, page_home(page)); 443
444 /* Note that this is not writing a page table, just returning a pte. */
445 *pte = pte_set_home(null_pte, page_home(page));
446
447 return 0; /* return non-zero if not hfh? */
448}
449EXPORT_SYMBOL(va_to_cpa_and_pte);
345 450
451void __set_pte(pte_t *ptep, pte_t pte)
452{
346#ifdef __tilegx__ 453#ifdef __tilegx__
347 *ptep = pte; 454 *ptep = pte;
348#else 455#else
349 /* 456# if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32
350 * When setting a PTE, write the high bits first, then write 457# error Must write the present and migrating bits last
351 * the low bits. This sets the "present" bit only after the 458# endif
352 * other bits are in place. If a particular PTE update 459 if (pte_present(pte)) {
353 * involves transitioning from one valid PTE to another, it 460 ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
354 * may be necessary to call set_pte_order() more than once, 461 barrier();
355 * transitioning via a suitable intermediate state. 462 ((u32 *)ptep)[0] = (u32)(pte_val(pte));
356 * Note that this sequence also means that if we are transitioning 463 } else {
357 * from any migrating PTE to a non-migrating one, we will not 464 ((u32 *)ptep)[0] = (u32)(pte_val(pte));
358 * see a half-updated PTE with the migrating bit off. 465 barrier();
359 */ 466 ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
360#if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32 467 }
361# error Must write the present and migrating bits last 468#endif /* __tilegx__ */
362#endif 469}
363 ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32); 470
364 barrier(); 471void set_pte(pte_t *ptep, pte_t pte)
365 ((u32 *)ptep)[0] = (u32)(pte_val(pte)); 472{
366#endif 473 struct page *page = pfn_to_page(pte_pfn(pte));
474
475 /* Update the home of a PTE if necessary */
476 pte = pte_set_home(pte, page_home(page));
477
478 __set_pte(ptep, pte);
367} 479}
368 480
369/* Can this mm load a PTE with cached_priority set? */ 481/* Can this mm load a PTE with cached_priority set? */