aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2010-10-25 10:10:11 -0400
committerMartin Schwidefsky <sky@mschwide.boeblingen.de.ibm.com>2010-10-25 10:10:15 -0400
commit80217147a3d80c8a4e48f06e2f6e965455f3fe2a (patch)
treeb419ae9ee3ab0e5b92c0ed2a30ff59b76d6a4978 /arch
parent87799ebab760dd1460f6e4193d4f71ba416d1451 (diff)
[S390] lockless get_user_pages_fast()
Implement get_user_pages_fast without locking in the fastpath on s390. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/include/asm/pgalloc.h4
-rw-r--r--arch/s390/include/asm/pgtable.h1
-rw-r--r--arch/s390/include/asm/tlb.h13
-rw-r--r--arch/s390/mm/Makefile2
-rw-r--r--arch/s390/mm/gup.c225
-rw-r--r--arch/s390/mm/hugetlbpage.c2
-rw-r--r--arch/s390/mm/init.c2
-rw-r--r--arch/s390/mm/pgtable.c171
9 files changed, 394 insertions, 27 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 75976a141947..7afc17340500 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -101,6 +101,7 @@ config S390
101 select HAVE_KERNEL_BZIP2 101 select HAVE_KERNEL_BZIP2
102 select HAVE_KERNEL_LZMA 102 select HAVE_KERNEL_LZMA
103 select HAVE_KERNEL_LZO 103 select HAVE_KERNEL_LZO
104 select HAVE_GET_USER_PAGES_FAST
104 select ARCH_INLINE_SPIN_TRYLOCK 105 select ARCH_INLINE_SPIN_TRYLOCK
105 select ARCH_INLINE_SPIN_TRYLOCK_BH 106 select ARCH_INLINE_SPIN_TRYLOCK_BH
106 select ARCH_INLINE_SPIN_LOCK 107 select ARCH_INLINE_SPIN_LOCK
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 68940d0bad91..082eb4e50e8b 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -21,9 +21,11 @@
21 21
22unsigned long *crst_table_alloc(struct mm_struct *, int); 22unsigned long *crst_table_alloc(struct mm_struct *, int);
23void crst_table_free(struct mm_struct *, unsigned long *); 23void crst_table_free(struct mm_struct *, unsigned long *);
24void crst_table_free_rcu(struct mm_struct *, unsigned long *);
24 25
25unsigned long *page_table_alloc(struct mm_struct *); 26unsigned long *page_table_alloc(struct mm_struct *);
26void page_table_free(struct mm_struct *, unsigned long *); 27void page_table_free(struct mm_struct *, unsigned long *);
28void page_table_free_rcu(struct mm_struct *, unsigned long *);
27void disable_noexec(struct mm_struct *, struct task_struct *); 29void disable_noexec(struct mm_struct *, struct task_struct *);
28 30
29static inline void clear_table(unsigned long *s, unsigned long val, size_t n) 31static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
@@ -176,4 +178,6 @@ static inline void pmd_populate(struct mm_struct *mm,
176#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte) 178#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
177#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte) 179#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
178 180
181extern void rcu_table_freelist_finish(void);
182
179#endif /* _S390_PGALLOC_H */ 183#endif /* _S390_PGALLOC_H */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 22a294571000..785229ae39cb 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -316,6 +316,7 @@ extern unsigned long VMALLOC_START;
316 316
317/* Bits in the segment table entry */ 317/* Bits in the segment table entry */
318#define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */ 318#define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */
319#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */
319#define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ 320#define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */
320#define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */ 321#define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */
321#define _SEGMENT_ENTRY_PTL 0x0f /* page table length */ 322#define _SEGMENT_ENTRY_PTL 0x0f /* page table length */
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index fd1c00d08bf5..f1f644f2240a 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -64,10 +64,9 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb,
64 if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pxds < TLB_NR_PTRS)) 64 if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pxds < TLB_NR_PTRS))
65 __tlb_flush_mm(tlb->mm); 65 __tlb_flush_mm(tlb->mm);
66 while (tlb->nr_ptes > 0) 66 while (tlb->nr_ptes > 0)
67 pte_free(tlb->mm, tlb->array[--tlb->nr_ptes]); 67 page_table_free_rcu(tlb->mm, tlb->array[--tlb->nr_ptes]);
68 while (tlb->nr_pxds < TLB_NR_PTRS) 68 while (tlb->nr_pxds < TLB_NR_PTRS)
69 /* pgd_free frees the pointer as region or segment table */ 69 crst_table_free_rcu(tlb->mm, tlb->array[tlb->nr_pxds++]);
70 pgd_free(tlb->mm, tlb->array[tlb->nr_pxds++]);
71} 70}
72 71
73static inline void tlb_finish_mmu(struct mmu_gather *tlb, 72static inline void tlb_finish_mmu(struct mmu_gather *tlb,
@@ -75,6 +74,8 @@ static inline void tlb_finish_mmu(struct mmu_gather *tlb,
75{ 74{
76 tlb_flush_mmu(tlb, start, end); 75 tlb_flush_mmu(tlb, start, end);
77 76
77 rcu_table_freelist_finish();
78
78 /* keep the page table cache within bounds */ 79 /* keep the page table cache within bounds */
79 check_pgt_cache(); 80 check_pgt_cache();
80 81
@@ -103,7 +104,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
103 if (tlb->nr_ptes >= tlb->nr_pxds) 104 if (tlb->nr_ptes >= tlb->nr_pxds)
104 tlb_flush_mmu(tlb, 0, 0); 105 tlb_flush_mmu(tlb, 0, 0);
105 } else 106 } else
106 pte_free(tlb->mm, pte); 107 page_table_free(tlb->mm, (unsigned long *) pte);
107} 108}
108 109
109/* 110/*
@@ -124,7 +125,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
124 if (tlb->nr_ptes >= tlb->nr_pxds) 125 if (tlb->nr_ptes >= tlb->nr_pxds)
125 tlb_flush_mmu(tlb, 0, 0); 126 tlb_flush_mmu(tlb, 0, 0);
126 } else 127 } else
127 pmd_free(tlb->mm, pmd); 128 crst_table_free(tlb->mm, (unsigned long *) pmd);
128#endif 129#endif
129} 130}
130 131
@@ -146,7 +147,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
146 if (tlb->nr_ptes >= tlb->nr_pxds) 147 if (tlb->nr_ptes >= tlb->nr_pxds)
147 tlb_flush_mmu(tlb, 0, 0); 148 tlb_flush_mmu(tlb, 0, 0);
148 } else 149 } else
149 pud_free(tlb->mm, pud); 150 crst_table_free(tlb->mm, (unsigned long *) pud);
150#endif 151#endif
151} 152}
152 153
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index eec054484419..6fbc6f3fbdf2 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -3,6 +3,6 @@
3# 3#
4 4
5obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \ 5obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \
6 page-states.o 6 page-states.o gup.o
7obj-$(CONFIG_CMM) += cmm.o 7obj-$(CONFIG_CMM) += cmm.o
8obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o 8obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
new file mode 100644
index 000000000000..38e641cdd977
--- /dev/null
+++ b/arch/s390/mm/gup.c
@@ -0,0 +1,225 @@
1/*
2 * Lockless get_user_pages_fast for s390
3 *
4 * Copyright IBM Corp. 2010
5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
6 */
7#include <linux/sched.h>
8#include <linux/mm.h>
9#include <linux/hugetlb.h>
10#include <linux/vmstat.h>
11#include <linux/pagemap.h>
12#include <linux/rwsem.h>
13#include <asm/pgtable.h>
14
15/*
16 * The performance critical leaf functions are made noinline otherwise gcc
17 * inlines everything into a single function which results in too much
18 * register pressure.
19 */
20static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
21 unsigned long end, int write, struct page **pages, int *nr)
22{
23 unsigned long mask, result;
24 pte_t *ptep, pte;
25 struct page *page;
26
27 result = write ? 0 : _PAGE_RO;
28 mask = result | _PAGE_INVALID | _PAGE_SPECIAL;
29
30 ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr);
31 do {
32 pte = *ptep;
33 barrier();
34 if ((pte_val(pte) & mask) != result)
35 return 0;
36 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
37 page = pte_page(pte);
38 if (!page_cache_get_speculative(page))
39 return 0;
40 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
41 put_page(page);
42 return 0;
43 }
44 pages[*nr] = page;
45 (*nr)++;
46
47 } while (ptep++, addr += PAGE_SIZE, addr != end);
48
49 return 1;
50}
51
52static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
53 unsigned long end, int write, struct page **pages, int *nr)
54{
55 unsigned long mask, result;
56 struct page *head, *page;
57 int refs;
58
59 result = write ? 0 : _SEGMENT_ENTRY_RO;
60 mask = result | _SEGMENT_ENTRY_INV;
61 if ((pmd_val(pmd) & mask) != result)
62 return 0;
63 VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
64
65 refs = 0;
66 head = pmd_page(pmd);
67 page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
68 do {
69 VM_BUG_ON(compound_head(page) != head);
70 pages[*nr] = page;
71 (*nr)++;
72 page++;
73 refs++;
74 } while (addr += PAGE_SIZE, addr != end);
75
76 if (!page_cache_add_speculative(head, refs)) {
77 *nr -= refs;
78 return 0;
79 }
80
81 if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
82 *nr -= refs;
83 while (refs--)
84 put_page(head);
85 }
86
87 return 1;
88}
89
90
91static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
92 unsigned long end, int write, struct page **pages, int *nr)
93{
94 unsigned long next;
95 pmd_t *pmdp, pmd;
96
97 pmdp = (pmd_t *) pudp;
98#ifdef CONFIG_64BIT
99 if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
100 pmdp = (pmd_t *) pud_deref(pud);
101 pmdp += pmd_index(addr);
102#endif
103 do {
104 pmd = *pmdp;
105 barrier();
106 next = pmd_addr_end(addr, end);
107 if (pmd_none(pmd))
108 return 0;
109 if (unlikely(pmd_huge(pmd))) {
110 if (!gup_huge_pmd(pmdp, pmd, addr, next,
111 write, pages, nr))
112 return 0;
113 } else if (!gup_pte_range(pmdp, pmd, addr, next,
114 write, pages, nr))
115 return 0;
116 } while (pmdp++, addr = next, addr != end);
117
118 return 1;
119}
120
121static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
122 unsigned long end, int write, struct page **pages, int *nr)
123{
124 unsigned long next;
125 pud_t *pudp, pud;
126
127 pudp = (pud_t *) pgdp;
128#ifdef CONFIG_64BIT
129 if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
130 pudp = (pud_t *) pgd_deref(pgd);
131 pudp += pud_index(addr);
132#endif
133 do {
134 pud = *pudp;
135 barrier();
136 next = pud_addr_end(addr, end);
137 if (pud_none(pud))
138 return 0;
139 if (!gup_pmd_range(pudp, pud, addr, next, write, pages, nr))
140 return 0;
141 } while (pudp++, addr = next, addr != end);
142
143 return 1;
144}
145
146/**
147 * get_user_pages_fast() - pin user pages in memory
148 * @start: starting user address
149 * @nr_pages: number of pages from start to pin
150 * @write: whether pages will be written to
151 * @pages: array that receives pointers to the pages pinned.
152 * Should be at least nr_pages long.
153 *
154 * Attempt to pin user pages in memory without taking mm->mmap_sem.
155 * If not successful, it will fall back to taking the lock and
156 * calling get_user_pages().
157 *
158 * Returns number of pages pinned. This may be fewer than the number
159 * requested. If nr_pages is 0 or negative, returns 0. If no pages
160 * were pinned, returns -errno.
161 */
162int get_user_pages_fast(unsigned long start, int nr_pages, int write,
163 struct page **pages)
164{
165 struct mm_struct *mm = current->mm;
166 unsigned long addr, len, end;
167 unsigned long next;
168 pgd_t *pgdp, pgd;
169 int nr = 0;
170
171 start &= PAGE_MASK;
172 addr = start;
173 len = (unsigned long) nr_pages << PAGE_SHIFT;
174 end = start + len;
175 if (end < start)
176 goto slow_irqon;
177
178 /*
179 * local_irq_disable() doesn't prevent pagetable teardown, but does
180 * prevent the pagetables from being freed on s390.
181 *
182 * So long as we atomically load page table pointers versus teardown,
183 * we can follow the address down to the the page and take a ref on it.
184 */
185 local_irq_disable();
186 pgdp = pgd_offset(mm, addr);
187 do {
188 pgd = *pgdp;
189 barrier();
190 next = pgd_addr_end(addr, end);
191 if (pgd_none(pgd))
192 goto slow;
193 if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr))
194 goto slow;
195 } while (pgdp++, addr = next, addr != end);
196 local_irq_enable();
197
198 VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
199 return nr;
200
201 {
202 int ret;
203slow:
204 local_irq_enable();
205slow_irqon:
206 /* Try to get the remaining pages with get_user_pages */
207 start += nr << PAGE_SHIFT;
208 pages += nr;
209
210 down_read(&mm->mmap_sem);
211 ret = get_user_pages(current, mm, start,
212 (end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
213 up_read(&mm->mmap_sem);
214
215 /* Have to be a bit careful with return values */
216 if (nr > 0) {
217 if (ret < 0)
218 ret = nr;
219 else
220 ret += nr;
221 }
222
223 return ret;
224 }
225}
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index f28c43d2f61d..639cd21f2218 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -68,7 +68,7 @@ void arch_release_hugepage(struct page *page)
68 ptep = (pte_t *) page[1].index; 68 ptep = (pte_t *) page[1].index;
69 if (!ptep) 69 if (!ptep)
70 return; 70 return;
71 pte_free(&init_mm, ptep); 71 page_table_free(&init_mm, (unsigned long *) ptep);
72 page[1].index = 0; 72 page[1].index = 0;
73} 73}
74 74
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 0744fb3536b1..852a3fec1ece 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -38,8 +38,6 @@
38#include <asm/tlbflush.h> 38#include <asm/tlbflush.h>
39#include <asm/sections.h> 39#include <asm/sections.h>
40 40
41DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
42
43pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); 41pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE)));
44 42
45unsigned long empty_zero_page, zero_page_mask; 43unsigned long empty_zero_page, zero_page_mask;
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 8d999249d357..19338d228c9b 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -15,6 +15,7 @@
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/quicklist.h> 17#include <linux/quicklist.h>
18#include <linux/rcupdate.h>
18 19
19#include <asm/system.h> 20#include <asm/system.h>
20#include <asm/pgtable.h> 21#include <asm/pgtable.h>
@@ -23,6 +24,67 @@
23#include <asm/tlbflush.h> 24#include <asm/tlbflush.h>
24#include <asm/mmu_context.h> 25#include <asm/mmu_context.h>
25 26
27struct rcu_table_freelist {
28 struct rcu_head rcu;
29 struct mm_struct *mm;
30 unsigned int pgt_index;
31 unsigned int crst_index;
32 unsigned long *table[0];
33};
34
35#define RCU_FREELIST_SIZE \
36 ((PAGE_SIZE - sizeof(struct rcu_table_freelist)) \
37 / sizeof(unsigned long))
38
39DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
40static DEFINE_PER_CPU(struct rcu_table_freelist *, rcu_table_freelist);
41
42static void __page_table_free(struct mm_struct *mm, unsigned long *table);
43static void __crst_table_free(struct mm_struct *mm, unsigned long *table);
44
45static struct rcu_table_freelist *rcu_table_freelist_get(struct mm_struct *mm)
46{
47 struct rcu_table_freelist **batchp = &__get_cpu_var(rcu_table_freelist);
48 struct rcu_table_freelist *batch = *batchp;
49
50 if (batch)
51 return batch;
52 batch = (struct rcu_table_freelist *) __get_free_page(GFP_ATOMIC);
53 if (batch) {
54 batch->mm = mm;
55 batch->pgt_index = 0;
56 batch->crst_index = RCU_FREELIST_SIZE;
57 *batchp = batch;
58 }
59 return batch;
60}
61
62static void rcu_table_freelist_callback(struct rcu_head *head)
63{
64 struct rcu_table_freelist *batch =
65 container_of(head, struct rcu_table_freelist, rcu);
66
67 while (batch->pgt_index > 0)
68 __page_table_free(batch->mm, batch->table[--batch->pgt_index]);
69 while (batch->crst_index < RCU_FREELIST_SIZE)
70 __crst_table_free(batch->mm, batch->table[batch->crst_index++]);
71 free_page((unsigned long) batch);
72}
73
74void rcu_table_freelist_finish(void)
75{
76 struct rcu_table_freelist *batch = __get_cpu_var(rcu_table_freelist);
77
78 if (!batch)
79 return;
80 call_rcu(&batch->rcu, rcu_table_freelist_callback);
81 __get_cpu_var(rcu_table_freelist) = NULL;
82}
83
84static void smp_sync(void *arg)
85{
86}
87
26#ifndef CONFIG_64BIT 88#ifndef CONFIG_64BIT
27#define ALLOC_ORDER 1 89#define ALLOC_ORDER 1
28#define TABLES_PER_PAGE 4 90#define TABLES_PER_PAGE 4
@@ -78,25 +140,55 @@ unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
78 } 140 }
79 page->index = page_to_phys(shadow); 141 page->index = page_to_phys(shadow);
80 } 142 }
81 spin_lock(&mm->context.list_lock); 143 spin_lock_bh(&mm->context.list_lock);
82 list_add(&page->lru, &mm->context.crst_list); 144 list_add(&page->lru, &mm->context.crst_list);
83 spin_unlock(&mm->context.list_lock); 145 spin_unlock_bh(&mm->context.list_lock);
84 return (unsigned long *) page_to_phys(page); 146 return (unsigned long *) page_to_phys(page);
85} 147}
86 148
87void crst_table_free(struct mm_struct *mm, unsigned long *table) 149static void __crst_table_free(struct mm_struct *mm, unsigned long *table)
88{ 150{
89 unsigned long *shadow = get_shadow_table(table); 151 unsigned long *shadow = get_shadow_table(table);
90 struct page *page = virt_to_page(table);
91 152
92 spin_lock(&mm->context.list_lock);
93 list_del(&page->lru);
94 spin_unlock(&mm->context.list_lock);
95 if (shadow) 153 if (shadow)
96 free_pages((unsigned long) shadow, ALLOC_ORDER); 154 free_pages((unsigned long) shadow, ALLOC_ORDER);
97 free_pages((unsigned long) table, ALLOC_ORDER); 155 free_pages((unsigned long) table, ALLOC_ORDER);
98} 156}
99 157
158void crst_table_free(struct mm_struct *mm, unsigned long *table)
159{
160 struct page *page = virt_to_page(table);
161
162 spin_lock_bh(&mm->context.list_lock);
163 list_del(&page->lru);
164 spin_unlock_bh(&mm->context.list_lock);
165 __crst_table_free(mm, table);
166}
167
168void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table)
169{
170 struct rcu_table_freelist *batch;
171 struct page *page = virt_to_page(table);
172
173 spin_lock_bh(&mm->context.list_lock);
174 list_del(&page->lru);
175 spin_unlock_bh(&mm->context.list_lock);
176 if (atomic_read(&mm->mm_users) < 2 &&
177 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
178 __crst_table_free(mm, table);
179 return;
180 }
181 batch = rcu_table_freelist_get(mm);
182 if (!batch) {
183 smp_call_function(smp_sync, NULL, 1);
184 __crst_table_free(mm, table);
185 return;
186 }
187 batch->table[--batch->crst_index] = table;
188 if (batch->pgt_index >= batch->crst_index)
189 rcu_table_freelist_finish();
190}
191
100#ifdef CONFIG_64BIT 192#ifdef CONFIG_64BIT
101int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) 193int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
102{ 194{
@@ -108,7 +200,7 @@ repeat:
108 table = crst_table_alloc(mm, mm->context.noexec); 200 table = crst_table_alloc(mm, mm->context.noexec);
109 if (!table) 201 if (!table)
110 return -ENOMEM; 202 return -ENOMEM;
111 spin_lock(&mm->page_table_lock); 203 spin_lock_bh(&mm->page_table_lock);
112 if (mm->context.asce_limit < limit) { 204 if (mm->context.asce_limit < limit) {
113 pgd = (unsigned long *) mm->pgd; 205 pgd = (unsigned long *) mm->pgd;
114 if (mm->context.asce_limit <= (1UL << 31)) { 206 if (mm->context.asce_limit <= (1UL << 31)) {
@@ -130,7 +222,7 @@ repeat:
130 mm->task_size = mm->context.asce_limit; 222 mm->task_size = mm->context.asce_limit;
131 table = NULL; 223 table = NULL;
132 } 224 }
133 spin_unlock(&mm->page_table_lock); 225 spin_unlock_bh(&mm->page_table_lock);
134 if (table) 226 if (table)
135 crst_table_free(mm, table); 227 crst_table_free(mm, table);
136 if (mm->context.asce_limit < limit) 228 if (mm->context.asce_limit < limit)
@@ -182,7 +274,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
182 unsigned long bits; 274 unsigned long bits;
183 275
184 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; 276 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
185 spin_lock(&mm->context.list_lock); 277 spin_lock_bh(&mm->context.list_lock);
186 page = NULL; 278 page = NULL;
187 if (!list_empty(&mm->context.pgtable_list)) { 279 if (!list_empty(&mm->context.pgtable_list)) {
188 page = list_first_entry(&mm->context.pgtable_list, 280 page = list_first_entry(&mm->context.pgtable_list,
@@ -191,7 +283,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
191 page = NULL; 283 page = NULL;
192 } 284 }
193 if (!page) { 285 if (!page) {
194 spin_unlock(&mm->context.list_lock); 286 spin_unlock_bh(&mm->context.list_lock);
195 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 287 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
196 if (!page) 288 if (!page)
197 return NULL; 289 return NULL;
@@ -202,7 +294,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
202 clear_table_pgstes(table); 294 clear_table_pgstes(table);
203 else 295 else
204 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); 296 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
205 spin_lock(&mm->context.list_lock); 297 spin_lock_bh(&mm->context.list_lock);
206 list_add(&page->lru, &mm->context.pgtable_list); 298 list_add(&page->lru, &mm->context.pgtable_list);
207 } 299 }
208 table = (unsigned long *) page_to_phys(page); 300 table = (unsigned long *) page_to_phys(page);
@@ -213,10 +305,25 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
213 page->flags |= bits; 305 page->flags |= bits;
214 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) 306 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
215 list_move_tail(&page->lru, &mm->context.pgtable_list); 307 list_move_tail(&page->lru, &mm->context.pgtable_list);
216 spin_unlock(&mm->context.list_lock); 308 spin_unlock_bh(&mm->context.list_lock);
217 return table; 309 return table;
218} 310}
219 311
312static void __page_table_free(struct mm_struct *mm, unsigned long *table)
313{
314 struct page *page;
315 unsigned long bits;
316
317 bits = ((unsigned long) table) & 15;
318 table = (unsigned long *)(((unsigned long) table) ^ bits);
319 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
320 page->flags ^= bits;
321 if (!(page->flags & FRAG_MASK)) {
322 pgtable_page_dtor(page);
323 __free_page(page);
324 }
325}
326
220void page_table_free(struct mm_struct *mm, unsigned long *table) 327void page_table_free(struct mm_struct *mm, unsigned long *table)
221{ 328{
222 struct page *page; 329 struct page *page;
@@ -225,7 +332,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
225 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; 332 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
226 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); 333 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
227 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 334 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
228 spin_lock(&mm->context.list_lock); 335 spin_lock_bh(&mm->context.list_lock);
229 page->flags ^= bits; 336 page->flags ^= bits;
230 if (page->flags & FRAG_MASK) { 337 if (page->flags & FRAG_MASK) {
231 /* Page now has some free pgtable fragments. */ 338 /* Page now has some free pgtable fragments. */
@@ -234,18 +341,48 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
234 } else 341 } else
235 /* All fragments of the 4K page have been freed. */ 342 /* All fragments of the 4K page have been freed. */
236 list_del(&page->lru); 343 list_del(&page->lru);
237 spin_unlock(&mm->context.list_lock); 344 spin_unlock_bh(&mm->context.list_lock);
238 if (page) { 345 if (page) {
239 pgtable_page_dtor(page); 346 pgtable_page_dtor(page);
240 __free_page(page); 347 __free_page(page);
241 } 348 }
242} 349}
243 350
351void page_table_free_rcu(struct mm_struct *mm, unsigned long *table)
352{
353 struct rcu_table_freelist *batch;
354 struct page *page;
355 unsigned long bits;
356
357 if (atomic_read(&mm->mm_users) < 2 &&
358 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
359 page_table_free(mm, table);
360 return;
361 }
362 batch = rcu_table_freelist_get(mm);
363 if (!batch) {
364 smp_call_function(smp_sync, NULL, 1);
365 page_table_free(mm, table);
366 return;
367 }
368 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
369 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
370 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
371 spin_lock_bh(&mm->context.list_lock);
372 /* Delayed freeing with rcu prevents reuse of pgtable fragments */
373 list_del_init(&page->lru);
374 spin_unlock_bh(&mm->context.list_lock);
375 table = (unsigned long *)(((unsigned long) table) | bits);
376 batch->table[batch->pgt_index++] = table;
377 if (batch->pgt_index >= batch->crst_index)
378 rcu_table_freelist_finish();
379}
380
244void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) 381void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
245{ 382{
246 struct page *page; 383 struct page *page;
247 384
248 spin_lock(&mm->context.list_lock); 385 spin_lock_bh(&mm->context.list_lock);
249 /* Free shadow region and segment tables. */ 386 /* Free shadow region and segment tables. */
250 list_for_each_entry(page, &mm->context.crst_list, lru) 387 list_for_each_entry(page, &mm->context.crst_list, lru)
251 if (page->index) { 388 if (page->index) {
@@ -255,7 +392,7 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
255 /* "Free" second halves of page tables. */ 392 /* "Free" second halves of page tables. */
256 list_for_each_entry(page, &mm->context.pgtable_list, lru) 393 list_for_each_entry(page, &mm->context.pgtable_list, lru)
257 page->flags &= ~SECOND_HALVES; 394 page->flags &= ~SECOND_HALVES;
258 spin_unlock(&mm->context.list_lock); 395 spin_unlock_bh(&mm->context.list_lock);
259 mm->context.noexec = 0; 396 mm->context.noexec = 0;
260 update_mm(mm, tsk); 397 update_mm(mm, tsk);
261} 398}