aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile4
-rw-r--r--mm/bounce.c302
-rw-r--r--mm/filemap.c40
-rw-r--r--mm/highmem.c281
-rw-r--r--mm/memory.c9
-rw-r--r--mm/memory_hotplug.c5
-rw-r--r--mm/migrate.c4
-rw-r--r--mm/oom_kill.c53
-rw-r--r--mm/page-writeback.c160
-rw-r--r--mm/shmem.c99
-rw-r--r--mm/shmem_acl.c197
-rw-r--r--mm/slab.c24
-rw-r--r--mm/swapfile.c7
-rw-r--r--mm/truncate.c26
14 files changed, 883 insertions, 328 deletions
diff --git a/mm/Makefile b/mm/Makefile
index 60c56c0b5e..12b3a4eee8 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -12,11 +12,15 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
12 readahead.o swap.o truncate.o vmscan.o \ 12 readahead.o swap.o truncate.o vmscan.o \
13 prio_tree.o util.o mmzone.o vmstat.o $(mmu-y) 13 prio_tree.o util.o mmzone.o vmstat.o $(mmu-y)
14 14
15ifeq ($(CONFIG_MMU)$(CONFIG_BLOCK),yy)
16obj-y += bounce.o
17endif
15obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o 18obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o
16obj-$(CONFIG_HUGETLBFS) += hugetlb.o 19obj-$(CONFIG_HUGETLBFS) += hugetlb.o
17obj-$(CONFIG_NUMA) += mempolicy.o 20obj-$(CONFIG_NUMA) += mempolicy.o
18obj-$(CONFIG_SPARSEMEM) += sparse.o 21obj-$(CONFIG_SPARSEMEM) += sparse.o
19obj-$(CONFIG_SHMEM) += shmem.o 22obj-$(CONFIG_SHMEM) += shmem.o
23obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
20obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o 24obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
21obj-$(CONFIG_SLOB) += slob.o 25obj-$(CONFIG_SLOB) += slob.o
22obj-$(CONFIG_SLAB) += slab.o 26obj-$(CONFIG_SLAB) += slab.o
diff --git a/mm/bounce.c b/mm/bounce.c
new file mode 100644
index 0000000000..e4b62d2a40
--- /dev/null
+++ b/mm/bounce.c
@@ -0,0 +1,302 @@
1/* bounce buffer handling for block devices
2 *
3 * - Split from highmem.c
4 */
5
6#include <linux/mm.h>
7#include <linux/module.h>
8#include <linux/swap.h>
9#include <linux/bio.h>
10#include <linux/pagemap.h>
11#include <linux/mempool.h>
12#include <linux/blkdev.h>
13#include <linux/init.h>
14#include <linux/hash.h>
15#include <linux/highmem.h>
16#include <linux/blktrace_api.h>
17#include <asm/tlbflush.h>
18
19#define POOL_SIZE 64
20#define ISA_POOL_SIZE 16
21
22static mempool_t *page_pool, *isa_page_pool;
23
24#ifdef CONFIG_HIGHMEM
25static __init int init_emergency_pool(void)
26{
27 struct sysinfo i;
28 si_meminfo(&i);
29 si_swapinfo(&i);
30
31 if (!i.totalhigh)
32 return 0;
33
34 page_pool = mempool_create_page_pool(POOL_SIZE, 0);
35 BUG_ON(!page_pool);
36 printk("highmem bounce pool size: %d pages\n", POOL_SIZE);
37
38 return 0;
39}
40
41__initcall(init_emergency_pool);
42
43/*
44 * highmem version, map in to vec
45 */
46static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
47{
48 unsigned long flags;
49 unsigned char *vto;
50
51 local_irq_save(flags);
52 vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ);
53 memcpy(vto + to->bv_offset, vfrom, to->bv_len);
54 kunmap_atomic(vto, KM_BOUNCE_READ);
55 local_irq_restore(flags);
56}
57
58#else /* CONFIG_HIGHMEM */
59
60#define bounce_copy_vec(to, vfrom) \
61 memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len)
62
63#endif /* CONFIG_HIGHMEM */
64
65/*
66 * allocate pages in the DMA region for the ISA pool
67 */
68static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
69{
70 return mempool_alloc_pages(gfp_mask | GFP_DMA, data);
71}
72
73/*
74 * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA
75 * as the max address, so check if the pool has already been created.
76 */
77int init_emergency_isa_pool(void)
78{
79 if (isa_page_pool)
80 return 0;
81
82 isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa,
83 mempool_free_pages, (void *) 0);
84 BUG_ON(!isa_page_pool);
85
86 printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE);
87 return 0;
88}
89
90/*
91 * Simple bounce buffer support for highmem pages. Depending on the
92 * queue gfp mask set, *to may or may not be a highmem page. kmap it
93 * always, it will do the Right Thing
94 */
95static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
96{
97 unsigned char *vfrom;
98 struct bio_vec *tovec, *fromvec;
99 int i;
100
101 __bio_for_each_segment(tovec, to, i, 0) {
102 fromvec = from->bi_io_vec + i;
103
104 /*
105 * not bounced
106 */
107 if (tovec->bv_page == fromvec->bv_page)
108 continue;
109
110 /*
111 * fromvec->bv_offset and fromvec->bv_len might have been
112 * modified by the block layer, so use the original copy,
113 * bounce_copy_vec already uses tovec->bv_len
114 */
115 vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;
116
117 flush_dcache_page(tovec->bv_page);
118 bounce_copy_vec(tovec, vfrom);
119 }
120}
121
122static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
123{
124 struct bio *bio_orig = bio->bi_private;
125 struct bio_vec *bvec, *org_vec;
126 int i;
127
128 if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
129 set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags);
130
131 /*
132 * free up bounce indirect pages used
133 */
134 __bio_for_each_segment(bvec, bio, i, 0) {
135 org_vec = bio_orig->bi_io_vec + i;
136 if (bvec->bv_page == org_vec->bv_page)
137 continue;
138
139 dec_zone_page_state(bvec->bv_page, NR_BOUNCE);
140 mempool_free(bvec->bv_page, pool);
141 }
142
143 bio_endio(bio_orig, bio_orig->bi_size, err);
144 bio_put(bio);
145}
146
147static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done, int err)
148{
149 if (bio->bi_size)
150 return 1;
151
152 bounce_end_io(bio, page_pool, err);
153 return 0;
154}
155
156static int bounce_end_io_write_isa(struct bio *bio, unsigned int bytes_done, int err)
157{
158 if (bio->bi_size)
159 return 1;
160
161 bounce_end_io(bio, isa_page_pool, err);
162 return 0;
163}
164
165static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err)
166{
167 struct bio *bio_orig = bio->bi_private;
168
169 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
170 copy_to_high_bio_irq(bio_orig, bio);
171
172 bounce_end_io(bio, pool, err);
173}
174
175static int bounce_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
176{
177 if (bio->bi_size)
178 return 1;
179
180 __bounce_end_io_read(bio, page_pool, err);
181 return 0;
182}
183
184static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, int err)
185{
186 if (bio->bi_size)
187 return 1;
188
189 __bounce_end_io_read(bio, isa_page_pool, err);
190 return 0;
191}
192
193static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
194 mempool_t *pool)
195{
196 struct page *page;
197 struct bio *bio = NULL;
198 int i, rw = bio_data_dir(*bio_orig);
199 struct bio_vec *to, *from;
200
201 bio_for_each_segment(from, *bio_orig, i) {
202 page = from->bv_page;
203
204 /*
205 * is destination page below bounce pfn?
206 */
207 if (page_to_pfn(page) < q->bounce_pfn)
208 continue;
209
210 /*
211 * irk, bounce it
212 */
213 if (!bio)
214 bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt);
215
216 to = bio->bi_io_vec + i;
217
218 to->bv_page = mempool_alloc(pool, q->bounce_gfp);
219 to->bv_len = from->bv_len;
220 to->bv_offset = from->bv_offset;
221 inc_zone_page_state(to->bv_page, NR_BOUNCE);
222
223 if (rw == WRITE) {
224 char *vto, *vfrom;
225
226 flush_dcache_page(from->bv_page);
227 vto = page_address(to->bv_page) + to->bv_offset;
228 vfrom = kmap(from->bv_page) + from->bv_offset;
229 memcpy(vto, vfrom, to->bv_len);
230 kunmap(from->bv_page);
231 }
232 }
233
234 /*
235 * no pages bounced
236 */
237 if (!bio)
238 return;
239
240 /*
241 * at least one page was bounced, fill in possible non-highmem
242 * pages
243 */
244 __bio_for_each_segment(from, *bio_orig, i, 0) {
245 to = bio_iovec_idx(bio, i);
246 if (!to->bv_page) {
247 to->bv_page = from->bv_page;
248 to->bv_len = from->bv_len;
249 to->bv_offset = from->bv_offset;
250 }
251 }
252
253 bio->bi_bdev = (*bio_orig)->bi_bdev;
254 bio->bi_flags |= (1 << BIO_BOUNCED);
255 bio->bi_sector = (*bio_orig)->bi_sector;
256 bio->bi_rw = (*bio_orig)->bi_rw;
257
258 bio->bi_vcnt = (*bio_orig)->bi_vcnt;
259 bio->bi_idx = (*bio_orig)->bi_idx;
260 bio->bi_size = (*bio_orig)->bi_size;
261
262 if (pool == page_pool) {
263 bio->bi_end_io = bounce_end_io_write;
264 if (rw == READ)
265 bio->bi_end_io = bounce_end_io_read;
266 } else {
267 bio->bi_end_io = bounce_end_io_write_isa;
268 if (rw == READ)
269 bio->bi_end_io = bounce_end_io_read_isa;
270 }
271
272 bio->bi_private = *bio_orig;
273 *bio_orig = bio;
274}
275
276void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig)
277{
278 mempool_t *pool;
279
280 /*
281 * for non-isa bounce case, just check if the bounce pfn is equal
282 * to or bigger than the highest pfn in the system -- in that case,
283 * don't waste time iterating over bio segments
284 */
285 if (!(q->bounce_gfp & GFP_DMA)) {
286 if (q->bounce_pfn >= blk_max_pfn)
287 return;
288 pool = page_pool;
289 } else {
290 BUG_ON(!isa_page_pool);
291 pool = isa_page_pool;
292 }
293
294 blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
295
296 /*
297 * slow path
298 */
299 __blk_queue_bounce(q, bio_orig, pool);
300}
301
302EXPORT_SYMBOL(blk_queue_bounce);
diff --git a/mm/filemap.c b/mm/filemap.c
index afcdc72b5e..c4fe97f5ac 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1471,7 +1471,7 @@ outside_data_content:
1471 * accessible.. 1471 * accessible..
1472 */ 1472 */
1473 if (area->vm_mm == current->mm) 1473 if (area->vm_mm == current->mm)
1474 return NULL; 1474 return NOPAGE_SIGBUS;
1475 /* Fall through to the non-read-ahead case */ 1475 /* Fall through to the non-read-ahead case */
1476no_cached_page: 1476no_cached_page:
1477 /* 1477 /*
@@ -1496,7 +1496,7 @@ no_cached_page:
1496 */ 1496 */
1497 if (error == -ENOMEM) 1497 if (error == -ENOMEM)
1498 return NOPAGE_OOM; 1498 return NOPAGE_OOM;
1499 return NULL; 1499 return NOPAGE_SIGBUS;
1500 1500
1501page_not_uptodate: 1501page_not_uptodate:
1502 if (!did_readaround) { 1502 if (!did_readaround) {
@@ -1565,7 +1565,7 @@ page_not_uptodate:
1565 */ 1565 */
1566 shrink_readahead_size_eio(file, ra); 1566 shrink_readahead_size_eio(file, ra);
1567 page_cache_release(page); 1567 page_cache_release(page);
1568 return NULL; 1568 return NOPAGE_SIGBUS;
1569} 1569}
1570EXPORT_SYMBOL(filemap_nopage); 1570EXPORT_SYMBOL(filemap_nopage);
1571 1571
@@ -2020,6 +2020,7 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i
2020 if (unlikely(*pos + *count > inode->i_sb->s_maxbytes)) 2020 if (unlikely(*pos + *count > inode->i_sb->s_maxbytes))
2021 *count = inode->i_sb->s_maxbytes - *pos; 2021 *count = inode->i_sb->s_maxbytes - *pos;
2022 } else { 2022 } else {
2023#ifdef CONFIG_BLOCK
2023 loff_t isize; 2024 loff_t isize;
2024 if (bdev_read_only(I_BDEV(inode))) 2025 if (bdev_read_only(I_BDEV(inode)))
2025 return -EPERM; 2026 return -EPERM;
@@ -2031,6 +2032,9 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i
2031 2032
2032 if (*pos + *count > isize) 2033 if (*pos + *count > isize)
2033 *count = isize - *pos; 2034 *count = isize - *pos;
2035#else
2036 return -EPERM;
2037#endif
2034 } 2038 }
2035 return 0; 2039 return 0;
2036} 2040}
@@ -2491,3 +2495,33 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2491 } 2495 }
2492 return retval; 2496 return retval;
2493} 2497}
2498
2499/**
2500 * try_to_release_page() - release old fs-specific metadata on a page
2501 *
2502 * @page: the page which the kernel is trying to free
2503 * @gfp_mask: memory allocation flags (and I/O mode)
2504 *
2505 * The address_space is to try to release any data against the page
2506 * (presumably at page->private). If the release was successful, return `1'.
2507 * Otherwise return zero.
2508 *
2509 * The @gfp_mask argument specifies whether I/O may be performed to release
2510 * this page (__GFP_IO), and whether the call may block (__GFP_WAIT).
2511 *
2512 * NOTE: @gfp_mask may go away, and this function may become non-blocking.
2513 */
2514int try_to_release_page(struct page *page, gfp_t gfp_mask)
2515{
2516 struct address_space * const mapping = page->mapping;
2517
2518 BUG_ON(!PageLocked(page));
2519 if (PageWriteback(page))
2520 return 0;
2521
2522 if (mapping && mapping->a_ops->releasepage)
2523 return mapping->a_ops->releasepage(page, gfp_mask);
2524 return try_to_free_buffers(page);
2525}
2526
2527EXPORT_SYMBOL(try_to_release_page);
diff --git a/mm/highmem.c b/mm/highmem.c
index ee5519b176..0206e7e501 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -29,13 +29,6 @@
29#include <linux/blktrace_api.h> 29#include <linux/blktrace_api.h>
30#include <asm/tlbflush.h> 30#include <asm/tlbflush.h>
31 31
32static mempool_t *page_pool, *isa_page_pool;
33
34static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
35{
36 return mempool_alloc_pages(gfp_mask | GFP_DMA, data);
37}
38
39/* 32/*
40 * Virtual_count is not a pure "count". 33 * Virtual_count is not a pure "count".
41 * 0 means that it is not mapped, and has not been mapped 34 * 0 means that it is not mapped, and has not been mapped
@@ -217,282 +210,8 @@ void fastcall kunmap_high(struct page *page)
217} 210}
218 211
219EXPORT_SYMBOL(kunmap_high); 212EXPORT_SYMBOL(kunmap_high);
220
221#define POOL_SIZE 64
222
223static __init int init_emergency_pool(void)
224{
225 struct sysinfo i;
226 si_meminfo(&i);
227 si_swapinfo(&i);
228
229 if (!i.totalhigh)
230 return 0;
231
232 page_pool = mempool_create_page_pool(POOL_SIZE, 0);
233 BUG_ON(!page_pool);
234 printk("highmem bounce pool size: %d pages\n", POOL_SIZE);
235
236 return 0;
237}
238
239__initcall(init_emergency_pool);
240
241/*
242 * highmem version, map in to vec
243 */
244static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
245{
246 unsigned long flags;
247 unsigned char *vto;
248
249 local_irq_save(flags);
250 vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ);
251 memcpy(vto + to->bv_offset, vfrom, to->bv_len);
252 kunmap_atomic(vto, KM_BOUNCE_READ);
253 local_irq_restore(flags);
254}
255
256#else /* CONFIG_HIGHMEM */
257
258#define bounce_copy_vec(to, vfrom) \
259 memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len)
260
261#endif 213#endif
262 214
263#define ISA_POOL_SIZE 16
264
265/*
266 * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA
267 * as the max address, so check if the pool has already been created.
268 */
269int init_emergency_isa_pool(void)
270{
271 if (isa_page_pool)
272 return 0;
273
274 isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa,
275 mempool_free_pages, (void *) 0);
276 BUG_ON(!isa_page_pool);
277
278 printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE);
279 return 0;
280}
281
282/*
283 * Simple bounce buffer support for highmem pages. Depending on the
284 * queue gfp mask set, *to may or may not be a highmem page. kmap it
285 * always, it will do the Right Thing
286 */
287static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
288{
289 unsigned char *vfrom;
290 struct bio_vec *tovec, *fromvec;
291 int i;
292
293 __bio_for_each_segment(tovec, to, i, 0) {
294 fromvec = from->bi_io_vec + i;
295
296 /*
297 * not bounced
298 */
299 if (tovec->bv_page == fromvec->bv_page)
300 continue;
301
302 /*
303 * fromvec->bv_offset and fromvec->bv_len might have been
304 * modified by the block layer, so use the original copy,
305 * bounce_copy_vec already uses tovec->bv_len
306 */
307 vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;
308
309 flush_dcache_page(tovec->bv_page);
310 bounce_copy_vec(tovec, vfrom);
311 }
312}
313
314static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
315{
316 struct bio *bio_orig = bio->bi_private;
317 struct bio_vec *bvec, *org_vec;
318 int i;
319
320 if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
321 set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags);
322
323 /*
324 * free up bounce indirect pages used
325 */
326 __bio_for_each_segment(bvec, bio, i, 0) {
327 org_vec = bio_orig->bi_io_vec + i;
328 if (bvec->bv_page == org_vec->bv_page)
329 continue;
330
331 dec_zone_page_state(bvec->bv_page, NR_BOUNCE);
332 mempool_free(bvec->bv_page, pool);
333 }
334
335 bio_endio(bio_orig, bio_orig->bi_size, err);
336 bio_put(bio);
337}
338
339static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done, int err)
340{
341 if (bio->bi_size)
342 return 1;
343
344 bounce_end_io(bio, page_pool, err);
345 return 0;
346}
347
348static int bounce_end_io_write_isa(struct bio *bio, unsigned int bytes_done, int err)
349{
350 if (bio->bi_size)
351 return 1;
352
353 bounce_end_io(bio, isa_page_pool, err);
354 return 0;
355}
356
357static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err)
358{
359 struct bio *bio_orig = bio->bi_private;
360
361 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
362 copy_to_high_bio_irq(bio_orig, bio);
363
364 bounce_end_io(bio, pool, err);
365}
366
367static int bounce_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
368{
369 if (bio->bi_size)
370 return 1;
371
372 __bounce_end_io_read(bio, page_pool, err);
373 return 0;
374}
375
376static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, int err)
377{
378 if (bio->bi_size)
379 return 1;
380
381 __bounce_end_io_read(bio, isa_page_pool, err);
382 return 0;
383}
384
385static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
386 mempool_t *pool)
387{
388 struct page *page;
389 struct bio *bio = NULL;
390 int i, rw = bio_data_dir(*bio_orig);
391 struct bio_vec *to, *from;
392
393 bio_for_each_segment(from, *bio_orig, i) {
394 page = from->bv_page;
395
396 /*
397 * is destination page below bounce pfn?
398 */
399 if (page_to_pfn(page) < q->bounce_pfn)
400 continue;
401
402 /*
403 * irk, bounce it
404 */
405 if (!bio)
406 bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt);
407
408 to = bio->bi_io_vec + i;
409
410 to->bv_page = mempool_alloc(pool, q->bounce_gfp);
411 to->bv_len = from->bv_len;
412 to->bv_offset = from->bv_offset;
413 inc_zone_page_state(to->bv_page, NR_BOUNCE);
414
415 if (rw == WRITE) {
416 char *vto, *vfrom;
417
418 flush_dcache_page(from->bv_page);
419 vto = page_address(to->bv_page) + to->bv_offset;
420 vfrom = kmap(from->bv_page) + from->bv_offset;
421 memcpy(vto, vfrom, to->bv_len);
422 kunmap(from->bv_page);
423 }
424 }
425
426 /*
427 * no pages bounced
428 */
429 if (!bio)
430 return;
431
432 /*
433 * at least one page was bounced, fill in possible non-highmem
434 * pages
435 */
436 __bio_for_each_segment(from, *bio_orig, i, 0) {
437 to = bio_iovec_idx(bio, i);
438 if (!to->bv_page) {
439 to->bv_page = from->bv_page;
440 to->bv_len = from->bv_len;
441 to->bv_offset = from->bv_offset;
442 }
443 }
444
445 bio->bi_bdev = (*bio_orig)->bi_bdev;
446 bio->bi_flags |= (1 << BIO_BOUNCED);
447 bio->bi_sector = (*bio_orig)->bi_sector;
448 bio->bi_rw = (*bio_orig)->bi_rw;
449
450 bio->bi_vcnt = (*bio_orig)->bi_vcnt;
451 bio->bi_idx = (*bio_orig)->bi_idx;
452 bio->bi_size = (*bio_orig)->bi_size;
453
454 if (pool == page_pool) {
455 bio->bi_end_io = bounce_end_io_write;
456 if (rw == READ)
457 bio->bi_end_io = bounce_end_io_read;
458 } else {
459 bio->bi_end_io = bounce_end_io_write_isa;
460 if (rw == READ)
461 bio->bi_end_io = bounce_end_io_read_isa;
462 }
463
464 bio->bi_private = *bio_orig;
465 *bio_orig = bio;
466}
467
468void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig)
469{
470 mempool_t *pool;
471
472 /*
473 * for non-isa bounce case, just check if the bounce pfn is equal
474 * to or bigger than the highest pfn in the system -- in that case,
475 * don't waste time iterating over bio segments
476 */
477 if (!(q->bounce_gfp & GFP_DMA)) {
478 if (q->bounce_pfn >= blk_max_pfn)
479 return;
480 pool = page_pool;
481 } else {
482 BUG_ON(!isa_page_pool);
483 pool = isa_page_pool;
484 }
485
486 blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
487
488 /*
489 * slow path
490 */
491 __blk_queue_bounce(q, bio_orig, pool);
492}
493
494EXPORT_SYMBOL(blk_queue_bounce);
495
496#if defined(HASHED_PAGE_VIRTUAL) 215#if defined(HASHED_PAGE_VIRTUAL)
497 216
498#define PA_HASH_ORDER 7 217#define PA_HASH_ORDER 7
diff --git a/mm/memory.c b/mm/memory.c
index 601159a46a..160f5b503e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1577,7 +1577,14 @@ gotten:
1577 entry = mk_pte(new_page, vma->vm_page_prot); 1577 entry = mk_pte(new_page, vma->vm_page_prot);
1578 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 1578 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
1579 lazy_mmu_prot_update(entry); 1579 lazy_mmu_prot_update(entry);
1580 ptep_establish(vma, address, page_table, entry); 1580 /*
1581 * Clear the pte entry and flush it first, before updating the
1582 * pte with the new entry. This will avoid a race condition
1583 * seen in the presence of one thread doing SMC and another
1584 * thread doing COW.
1585 */
1586 ptep_clear_flush(vma, address, page_table);
1587 set_pte_at(mm, address, page_table, entry);
1581 update_mmu_cache(vma, address, entry); 1588 update_mmu_cache(vma, address, entry);
1582 lru_cache_add_active(new_page); 1589 lru_cache_add_active(new_page);
1583 page_add_new_anon_rmap(new_page, vma, address); 1590 page_add_new_anon_rmap(new_page, vma, address);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c37319542b..2053bb165a 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -13,6 +13,7 @@
13#include <linux/compiler.h> 13#include <linux/compiler.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/pagevec.h> 15#include <linux/pagevec.h>
16#include <linux/writeback.h>
16#include <linux/slab.h> 17#include <linux/slab.h>
17#include <linux/sysctl.h> 18#include <linux/sysctl.h>
18#include <linux/cpu.h> 19#include <linux/cpu.h>
@@ -21,6 +22,7 @@
21#include <linux/highmem.h> 22#include <linux/highmem.h>
22#include <linux/vmalloc.h> 23#include <linux/vmalloc.h>
23#include <linux/ioport.h> 24#include <linux/ioport.h>
25#include <linux/cpuset.h>
24 26
25#include <asm/tlbflush.h> 27#include <asm/tlbflush.h>
26 28
@@ -191,6 +193,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
191 if (need_zonelists_rebuild) 193 if (need_zonelists_rebuild)
192 build_all_zonelists(); 194 build_all_zonelists();
193 vm_total_pages = nr_free_pagecache_pages(); 195 vm_total_pages = nr_free_pagecache_pages();
196 writeback_set_ratelimit();
194 return 0; 197 return 0;
195} 198}
196 199
@@ -283,6 +286,8 @@ int add_memory(int nid, u64 start, u64 size)
283 /* we online node here. we can't roll back from here. */ 286 /* we online node here. we can't roll back from here. */
284 node_set_online(nid); 287 node_set_online(nid);
285 288
289 cpuset_track_online_nodes();
290
286 if (new_pgdat) { 291 if (new_pgdat) {
287 ret = register_one_node(nid); 292 ret = register_one_node(nid);
288 /* 293 /*
diff --git a/mm/migrate.c b/mm/migrate.c
index 20a8c2687b..ba2453f948 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -409,6 +409,7 @@ int migrate_page(struct address_space *mapping,
409} 409}
410EXPORT_SYMBOL(migrate_page); 410EXPORT_SYMBOL(migrate_page);
411 411
412#ifdef CONFIG_BLOCK
412/* 413/*
413 * Migration function for pages with buffers. This function can only be used 414 * Migration function for pages with buffers. This function can only be used
414 * if the underlying filesystem guarantees that no other references to "page" 415 * if the underlying filesystem guarantees that no other references to "page"
@@ -466,6 +467,7 @@ int buffer_migrate_page(struct address_space *mapping,
466 return 0; 467 return 0;
467} 468}
468EXPORT_SYMBOL(buffer_migrate_page); 469EXPORT_SYMBOL(buffer_migrate_page);
470#endif
469 471
470/* 472/*
471 * Writeback a page to clean the dirty state 473 * Writeback a page to clean the dirty state
@@ -525,7 +527,7 @@ static int fallback_migrate_page(struct address_space *mapping,
525 * Buffers may be managed in a filesystem specific way. 527 * Buffers may be managed in a filesystem specific way.
526 * We must have no buffers or drop them. 528 * We must have no buffers or drop them.
527 */ 529 */
528 if (page_has_buffers(page) && 530 if (PagePrivate(page) &&
529 !try_to_release_page(page, GFP_KERNEL)) 531 !try_to_release_page(page, GFP_KERNEL))
530 return -EAGAIN; 532 return -EAGAIN;
531 533
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index bada3d0311..20f41b082e 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -204,16 +204,30 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
204 do_posix_clock_monotonic_gettime(&uptime); 204 do_posix_clock_monotonic_gettime(&uptime);
205 do_each_thread(g, p) { 205 do_each_thread(g, p) {
206 unsigned long points; 206 unsigned long points;
207 int releasing;
208 207
209 /* skip kernel threads */ 208 /*
209 * skip kernel threads and tasks which have already released
210 * their mm.
211 */
210 if (!p->mm) 212 if (!p->mm)
211 continue; 213 continue;
212 /* skip the init task with pid == 1 */ 214 /* skip the init task */
213 if (p->pid == 1) 215 if (is_init(p))
214 continue; 216 continue;
215 217
216 /* 218 /*
219 * This task already has access to memory reserves and is
220 * being killed. Don't allow any other task access to the
221 * memory reserve.
222 *
223 * Note: this may have a chance of deadlock if it gets
224 * blocked waiting for another task which itself is waiting
225 * for memory. Is there a better alternative?
226 */
227 if (test_tsk_thread_flag(p, TIF_MEMDIE))
228 return ERR_PTR(-1UL);
229
230 /*
217 * This is in the process of releasing memory so wait for it 231 * This is in the process of releasing memory so wait for it
218 * to finish before killing some other task by mistake. 232 * to finish before killing some other task by mistake.
219 * 233 *
@@ -221,21 +235,16 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
221 * go ahead if it is exiting: this will simply set TIF_MEMDIE, 235 * go ahead if it is exiting: this will simply set TIF_MEMDIE,
222 * which will allow it to gain access to memory reserves in 236 * which will allow it to gain access to memory reserves in
223 * the process of exiting and releasing its resources. 237 * the process of exiting and releasing its resources.
224 * Otherwise we could get an OOM deadlock. 238 * Otherwise we could get an easy OOM deadlock.
225 */ 239 */
226 releasing = test_tsk_thread_flag(p, TIF_MEMDIE) || 240 if (p->flags & PF_EXITING) {
227 p->flags & PF_EXITING; 241 if (p != current)
228 if (releasing) { 242 return ERR_PTR(-1UL);
229 /* PF_DEAD tasks have already released their mm */ 243
230 if (p->flags & PF_DEAD) 244 chosen = p;
231 continue; 245 *ppoints = ULONG_MAX;
232 if (p->flags & PF_EXITING && p == current) {
233 chosen = p;
234 *ppoints = ULONG_MAX;
235 break;
236 }
237 return ERR_PTR(-1UL);
238 } 246 }
247
239 if (p->oomkilladj == OOM_DISABLE) 248 if (p->oomkilladj == OOM_DISABLE)
240 continue; 249 continue;
241 250
@@ -245,6 +254,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
245 *ppoints = points; 254 *ppoints = points;
246 } 255 }
247 } while_each_thread(g, p); 256 } while_each_thread(g, p);
257
248 return chosen; 258 return chosen;
249} 259}
250 260
@@ -255,20 +265,17 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
255 */ 265 */
256static void __oom_kill_task(struct task_struct *p, const char *message) 266static void __oom_kill_task(struct task_struct *p, const char *message)
257{ 267{
258 if (p->pid == 1) { 268 if (is_init(p)) {
259 WARN_ON(1); 269 WARN_ON(1);
260 printk(KERN_WARNING "tried to kill init!\n"); 270 printk(KERN_WARNING "tried to kill init!\n");
261 return; 271 return;
262 } 272 }
263 273
264 task_lock(p); 274 if (!p->mm) {
265 if (!p->mm || p->mm == &init_mm) {
266 WARN_ON(1); 275 WARN_ON(1);
267 printk(KERN_WARNING "tried to kill an mm-less task!\n"); 276 printk(KERN_WARNING "tried to kill an mm-less task!\n");
268 task_unlock(p);
269 return; 277 return;
270 } 278 }
271 task_unlock(p);
272 279
273 if (message) { 280 if (message) {
274 printk(KERN_ERR "%s: Killed process %d (%s).\n", 281 printk(KERN_ERR "%s: Killed process %d (%s).\n",
@@ -302,7 +309,7 @@ static int oom_kill_task(struct task_struct *p, const char *message)
302 * However, this is of no concern to us. 309 * However, this is of no concern to us.
303 */ 310 */
304 311
305 if (mm == NULL || mm == &init_mm) 312 if (mm == NULL)
306 return 1; 313 return 1;
307 314
308 __oom_kill_task(p, message); 315 __oom_kill_task(p, message);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 555752907d..c0d4ce144d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -30,6 +30,8 @@
30#include <linux/sysctl.h> 30#include <linux/sysctl.h>
31#include <linux/cpu.h> 31#include <linux/cpu.h>
32#include <linux/syscalls.h> 32#include <linux/syscalls.h>
33#include <linux/buffer_head.h>
34#include <linux/pagevec.h>
33 35
34/* 36/*
35 * The maximum number of pages to writeout in a single bdflush/kupdate 37 * The maximum number of pages to writeout in a single bdflush/kupdate
@@ -46,7 +48,6 @@
46 */ 48 */
47static long ratelimit_pages = 32; 49static long ratelimit_pages = 32;
48 50
49static long total_pages; /* The total number of pages in the machine. */
50static int dirty_exceeded __cacheline_aligned_in_smp; /* Dirty mem may be over limit */ 51static int dirty_exceeded __cacheline_aligned_in_smp; /* Dirty mem may be over limit */
51 52
52/* 53/*
@@ -126,7 +127,7 @@ get_dirty_limits(long *pbackground, long *pdirty,
126 int unmapped_ratio; 127 int unmapped_ratio;
127 long background; 128 long background;
128 long dirty; 129 long dirty;
129 unsigned long available_memory = total_pages; 130 unsigned long available_memory = vm_total_pages;
130 struct task_struct *tsk; 131 struct task_struct *tsk;
131 132
132#ifdef CONFIG_HIGHMEM 133#ifdef CONFIG_HIGHMEM
@@ -141,7 +142,7 @@ get_dirty_limits(long *pbackground, long *pdirty,
141 142
142 unmapped_ratio = 100 - ((global_page_state(NR_FILE_MAPPED) + 143 unmapped_ratio = 100 - ((global_page_state(NR_FILE_MAPPED) +
143 global_page_state(NR_ANON_PAGES)) * 100) / 144 global_page_state(NR_ANON_PAGES)) * 100) /
144 total_pages; 145 vm_total_pages;
145 146
146 dirty_ratio = vm_dirty_ratio; 147 dirty_ratio = vm_dirty_ratio;
147 if (dirty_ratio > unmapped_ratio / 2) 148 if (dirty_ratio > unmapped_ratio / 2)
@@ -502,9 +503,9 @@ void laptop_sync_completion(void)
502 * will write six megabyte chunks, max. 503 * will write six megabyte chunks, max.
503 */ 504 */
504 505
505static void set_ratelimit(void) 506void writeback_set_ratelimit(void)
506{ 507{
507 ratelimit_pages = total_pages / (num_online_cpus() * 32); 508 ratelimit_pages = vm_total_pages / (num_online_cpus() * 32);
508 if (ratelimit_pages < 16) 509 if (ratelimit_pages < 16)
509 ratelimit_pages = 16; 510 ratelimit_pages = 16;
510 if (ratelimit_pages * PAGE_CACHE_SIZE > 4096 * 1024) 511 if (ratelimit_pages * PAGE_CACHE_SIZE > 4096 * 1024)
@@ -514,7 +515,7 @@ static void set_ratelimit(void)
514static int __cpuinit 515static int __cpuinit
515ratelimit_handler(struct notifier_block *self, unsigned long u, void *v) 516ratelimit_handler(struct notifier_block *self, unsigned long u, void *v)
516{ 517{
517 set_ratelimit(); 518 writeback_set_ratelimit();
518 return 0; 519 return 0;
519} 520}
520 521
@@ -533,9 +534,7 @@ void __init page_writeback_init(void)
533 long buffer_pages = nr_free_buffer_pages(); 534 long buffer_pages = nr_free_buffer_pages();
534 long correction; 535 long correction;
535 536
536 total_pages = nr_free_pagecache_pages(); 537 correction = (100 * 4 * buffer_pages) / vm_total_pages;
537
538 correction = (100 * 4 * buffer_pages) / total_pages;
539 538
540 if (correction < 100) { 539 if (correction < 100) {
541 dirty_background_ratio *= correction; 540 dirty_background_ratio *= correction;
@@ -549,10 +548,143 @@ void __init page_writeback_init(void)
549 vm_dirty_ratio = 1; 548 vm_dirty_ratio = 1;
550 } 549 }
551 mod_timer(&wb_timer, jiffies + dirty_writeback_interval); 550 mod_timer(&wb_timer, jiffies + dirty_writeback_interval);
552 set_ratelimit(); 551 writeback_set_ratelimit();
553 register_cpu_notifier(&ratelimit_nb); 552 register_cpu_notifier(&ratelimit_nb);
554} 553}
555 554
555/**
556 * generic_writepages - walk the list of dirty pages of the given
557 * address space and writepage() all of them.
558 *
559 * @mapping: address space structure to write
560 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
561 *
562 * This is a library function, which implements the writepages()
563 * address_space_operation.
564 *
565 * If a page is already under I/O, generic_writepages() skips it, even
566 * if it's dirty. This is desirable behaviour for memory-cleaning writeback,
567 * but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
568 * and msync() need to guarantee that all the data which was dirty at the time
569 * the call was made get new I/O started against them. If wbc->sync_mode is
570 * WB_SYNC_ALL then we were called for data integrity and we must wait for
571 * existing IO to complete.
572 *
573 * Derived from mpage_writepages() - if you fix this you should check that
574 * also!
575 */
576int generic_writepages(struct address_space *mapping,
577 struct writeback_control *wbc)
578{
579 struct backing_dev_info *bdi = mapping->backing_dev_info;
580 int ret = 0;
581 int done = 0;
582 int (*writepage)(struct page *page, struct writeback_control *wbc);
583 struct pagevec pvec;
584 int nr_pages;
585 pgoff_t index;
586 pgoff_t end; /* Inclusive */
587 int scanned = 0;
588 int range_whole = 0;
589
590 if (wbc->nonblocking && bdi_write_congested(bdi)) {
591 wbc->encountered_congestion = 1;
592 return 0;
593 }
594
595 writepage = mapping->a_ops->writepage;
596
597 /* deal with chardevs and other special file */
598 if (!writepage)
599 return 0;
600
601 pagevec_init(&pvec, 0);
602 if (wbc->range_cyclic) {
603 index = mapping->writeback_index; /* Start from prev offset */
604 end = -1;
605 } else {
606 index = wbc->range_start >> PAGE_CACHE_SHIFT;
607 end = wbc->range_end >> PAGE_CACHE_SHIFT;
608 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
609 range_whole = 1;
610 scanned = 1;
611 }
612retry:
613 while (!done && (index <= end) &&
614 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
615 PAGECACHE_TAG_DIRTY,
616 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
617 unsigned i;
618
619 scanned = 1;
620 for (i = 0; i < nr_pages; i++) {
621 struct page *page = pvec.pages[i];
622
623 /*
624 * At this point we hold neither mapping->tree_lock nor
625 * lock on the page itself: the page may be truncated or
626 * invalidated (changing page->mapping to NULL), or even
627 * swizzled back from swapper_space to tmpfs file
628 * mapping
629 */
630 lock_page(page);
631
632 if (unlikely(page->mapping != mapping)) {
633 unlock_page(page);
634 continue;
635 }
636
637 if (!wbc->range_cyclic && page->index > end) {
638 done = 1;
639 unlock_page(page);
640 continue;
641 }
642
643 if (wbc->sync_mode != WB_SYNC_NONE)
644 wait_on_page_writeback(page);
645
646 if (PageWriteback(page) ||
647 !clear_page_dirty_for_io(page)) {
648 unlock_page(page);
649 continue;
650 }
651
652 ret = (*writepage)(page, wbc);
653 if (ret) {
654 if (ret == -ENOSPC)
655 set_bit(AS_ENOSPC, &mapping->flags);
656 else
657 set_bit(AS_EIO, &mapping->flags);
658 }
659
660 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE))
661 unlock_page(page);
662 if (ret || (--(wbc->nr_to_write) <= 0))
663 done = 1;
664 if (wbc->nonblocking && bdi_write_congested(bdi)) {
665 wbc->encountered_congestion = 1;
666 done = 1;
667 }
668 }
669 pagevec_release(&pvec);
670 cond_resched();
671 }
672 if (!scanned && !done) {
673 /*
674 * We hit the last page and there is more work to be done: wrap
675 * back to the start of the file
676 */
677 scanned = 1;
678 index = 0;
679 goto retry;
680 }
681 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
682 mapping->writeback_index = index;
683 return ret;
684}
685
686EXPORT_SYMBOL(generic_writepages);
687
556int do_writepages(struct address_space *mapping, struct writeback_control *wbc) 688int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
557{ 689{
558 int ret; 690 int ret;
@@ -675,9 +807,11 @@ int fastcall set_page_dirty(struct page *page)
675 807
676 if (likely(mapping)) { 808 if (likely(mapping)) {
677 int (*spd)(struct page *) = mapping->a_ops->set_page_dirty; 809 int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
678 if (spd) 810#ifdef CONFIG_BLOCK
679 return (*spd)(page); 811 if (!spd)
680 return __set_page_dirty_buffers(page); 812 spd = __set_page_dirty_buffers;
813#endif
814 return (*spd)(page);
681 } 815 }
682 if (!PageDirty(page)) { 816 if (!PageDirty(page)) {
683 if (!TestSetPageDirty(page)) 817 if (!TestSetPageDirty(page))
diff --git a/mm/shmem.c b/mm/shmem.c
index eda907c3a8..b96de69f23 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -26,6 +26,8 @@
26#include <linux/module.h> 26#include <linux/module.h>
27#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/fs.h> 28#include <linux/fs.h>
29#include <linux/xattr.h>
30#include <linux/generic_acl.h>
29#include <linux/mm.h> 31#include <linux/mm.h>
30#include <linux/mman.h> 32#include <linux/mman.h>
31#include <linux/file.h> 33#include <linux/file.h>
@@ -177,6 +179,7 @@ static const struct address_space_operations shmem_aops;
177static struct file_operations shmem_file_operations; 179static struct file_operations shmem_file_operations;
178static struct inode_operations shmem_inode_operations; 180static struct inode_operations shmem_inode_operations;
179static struct inode_operations shmem_dir_inode_operations; 181static struct inode_operations shmem_dir_inode_operations;
182static struct inode_operations shmem_special_inode_operations;
180static struct vm_operations_struct shmem_vm_ops; 183static struct vm_operations_struct shmem_vm_ops;
181 184
182static struct backing_dev_info shmem_backing_dev_info __read_mostly = { 185static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
@@ -637,7 +640,7 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
637 struct page *page = NULL; 640 struct page *page = NULL;
638 int error; 641 int error;
639 642
640 if (attr->ia_valid & ATTR_SIZE) { 643 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
641 if (attr->ia_size < inode->i_size) { 644 if (attr->ia_size < inode->i_size) {
642 /* 645 /*
643 * If truncating down to a partial page, then 646 * If truncating down to a partial page, then
@@ -670,6 +673,10 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
670 error = inode_change_ok(inode, attr); 673 error = inode_change_ok(inode, attr);
671 if (!error) 674 if (!error)
672 error = inode_setattr(inode, attr); 675 error = inode_setattr(inode, attr);
676#ifdef CONFIG_TMPFS_POSIX_ACL
677 if (!error && (attr->ia_valid & ATTR_MODE))
678 error = generic_acl_chmod(inode, &shmem_acl_ops);
679#endif
673 if (page) 680 if (page)
674 page_cache_release(page); 681 page_cache_release(page);
675 return error; 682 return error;
@@ -1362,6 +1369,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
1362 1369
1363 switch (mode & S_IFMT) { 1370 switch (mode & S_IFMT) {
1364 default: 1371 default:
1372 inode->i_op = &shmem_special_inode_operations;
1365 init_special_inode(inode, mode, dev); 1373 init_special_inode(inode, mode, dev);
1366 break; 1374 break;
1367 case S_IFREG: 1375 case S_IFREG:
@@ -1682,7 +1690,11 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1682 iput(inode); 1690 iput(inode);
1683 return error; 1691 return error;
1684 } 1692 }
1685 error = 0; 1693 }
1694 error = shmem_acl_init(inode, dir);
1695 if (error) {
1696 iput(inode);
1697 return error;
1686 } 1698 }
1687 if (dir->i_mode & S_ISGID) { 1699 if (dir->i_mode & S_ISGID) {
1688 inode->i_gid = dir->i_gid; 1700 inode->i_gid = dir->i_gid;
@@ -1897,6 +1909,53 @@ static struct inode_operations shmem_symlink_inode_operations = {
1897 .put_link = shmem_put_link, 1909 .put_link = shmem_put_link,
1898}; 1910};
1899 1911
1912#ifdef CONFIG_TMPFS_POSIX_ACL
1913/**
1914 * Superblocks without xattr inode operations will get security.* xattr
1915 * support from the VFS "for free". As soon as we have any other xattrs
1916 * like ACLs, we also need to implement the security.* handlers at
1917 * filesystem level, though.
1918 */
1919
1920static size_t shmem_xattr_security_list(struct inode *inode, char *list,
1921 size_t list_len, const char *name,
1922 size_t name_len)
1923{
1924 return security_inode_listsecurity(inode, list, list_len);
1925}
1926
1927static int shmem_xattr_security_get(struct inode *inode, const char *name,
1928 void *buffer, size_t size)
1929{
1930 if (strcmp(name, "") == 0)
1931 return -EINVAL;
1932 return security_inode_getsecurity(inode, name, buffer, size,
1933 -EOPNOTSUPP);
1934}
1935
1936static int shmem_xattr_security_set(struct inode *inode, const char *name,
1937 const void *value, size_t size, int flags)
1938{
1939 if (strcmp(name, "") == 0)
1940 return -EINVAL;
1941 return security_inode_setsecurity(inode, name, value, size, flags);
1942}
1943
1944struct xattr_handler shmem_xattr_security_handler = {
1945 .prefix = XATTR_SECURITY_PREFIX,
1946 .list = shmem_xattr_security_list,
1947 .get = shmem_xattr_security_get,
1948 .set = shmem_xattr_security_set,
1949};
1950
1951static struct xattr_handler *shmem_xattr_handlers[] = {
1952 &shmem_xattr_acl_access_handler,
1953 &shmem_xattr_acl_default_handler,
1954 &shmem_xattr_security_handler,
1955 NULL
1956};
1957#endif
1958
1900static int shmem_parse_options(char *options, int *mode, uid_t *uid, 1959static int shmem_parse_options(char *options, int *mode, uid_t *uid,
1901 gid_t *gid, unsigned long *blocks, unsigned long *inodes, 1960 gid_t *gid, unsigned long *blocks, unsigned long *inodes,
1902 int *policy, nodemask_t *policy_nodes) 1961 int *policy, nodemask_t *policy_nodes)
@@ -2094,6 +2153,10 @@ static int shmem_fill_super(struct super_block *sb,
2094 sb->s_magic = TMPFS_MAGIC; 2153 sb->s_magic = TMPFS_MAGIC;
2095 sb->s_op = &shmem_ops; 2154 sb->s_op = &shmem_ops;
2096 sb->s_time_gran = 1; 2155 sb->s_time_gran = 1;
2156#ifdef CONFIG_TMPFS_POSIX_ACL
2157 sb->s_xattr = shmem_xattr_handlers;
2158 sb->s_flags |= MS_POSIXACL;
2159#endif
2097 2160
2098 inode = shmem_get_inode(sb, S_IFDIR | mode, 0); 2161 inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
2099 if (!inode) 2162 if (!inode)
@@ -2130,6 +2193,7 @@ static void shmem_destroy_inode(struct inode *inode)
2130 /* only struct inode is valid if it's an inline symlink */ 2193 /* only struct inode is valid if it's an inline symlink */
2131 mpol_free_shared_policy(&SHMEM_I(inode)->policy); 2194 mpol_free_shared_policy(&SHMEM_I(inode)->policy);
2132 } 2195 }
2196 shmem_acl_destroy_inode(inode);
2133 kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); 2197 kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
2134} 2198}
2135 2199
@@ -2141,6 +2205,10 @@ static void init_once(void *foo, struct kmem_cache *cachep,
2141 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 2205 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
2142 SLAB_CTOR_CONSTRUCTOR) { 2206 SLAB_CTOR_CONSTRUCTOR) {
2143 inode_init_once(&p->vfs_inode); 2207 inode_init_once(&p->vfs_inode);
2208#ifdef CONFIG_TMPFS_POSIX_ACL
2209 p->i_acl = NULL;
2210 p->i_default_acl = NULL;
2211#endif
2144 } 2212 }
2145} 2213}
2146 2214
@@ -2184,6 +2252,14 @@ static struct inode_operations shmem_inode_operations = {
2184 .truncate = shmem_truncate, 2252 .truncate = shmem_truncate,
2185 .setattr = shmem_notify_change, 2253 .setattr = shmem_notify_change,
2186 .truncate_range = shmem_truncate_range, 2254 .truncate_range = shmem_truncate_range,
2255#ifdef CONFIG_TMPFS_POSIX_ACL
2256 .setxattr = generic_setxattr,
2257 .getxattr = generic_getxattr,
2258 .listxattr = generic_listxattr,
2259 .removexattr = generic_removexattr,
2260 .permission = shmem_permission,
2261#endif
2262
2187}; 2263};
2188 2264
2189static struct inode_operations shmem_dir_inode_operations = { 2265static struct inode_operations shmem_dir_inode_operations = {
@@ -2198,6 +2274,25 @@ static struct inode_operations shmem_dir_inode_operations = {
2198 .mknod = shmem_mknod, 2274 .mknod = shmem_mknod,
2199 .rename = shmem_rename, 2275 .rename = shmem_rename,
2200#endif 2276#endif
2277#ifdef CONFIG_TMPFS_POSIX_ACL
2278 .setattr = shmem_notify_change,
2279 .setxattr = generic_setxattr,
2280 .getxattr = generic_getxattr,
2281 .listxattr = generic_listxattr,
2282 .removexattr = generic_removexattr,
2283 .permission = shmem_permission,
2284#endif
2285};
2286
2287static struct inode_operations shmem_special_inode_operations = {
2288#ifdef CONFIG_TMPFS_POSIX_ACL
2289 .setattr = shmem_notify_change,
2290 .setxattr = generic_setxattr,
2291 .getxattr = generic_getxattr,
2292 .listxattr = generic_listxattr,
2293 .removexattr = generic_removexattr,
2294 .permission = shmem_permission,
2295#endif
2201}; 2296};
2202 2297
2203static struct super_operations shmem_ops = { 2298static struct super_operations shmem_ops = {
diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c
new file mode 100644
index 0000000000..c946bf4687
--- /dev/null
+++ b/mm/shmem_acl.c
@@ -0,0 +1,197 @@
1/*
2 * mm/shmem_acl.c
3 *
4 * (C) 2005 Andreas Gruenbacher <agruen@suse.de>
5 *
6 * This file is released under the GPL.
7 */
8
9#include <linux/fs.h>
10#include <linux/shmem_fs.h>
11#include <linux/xattr.h>
12#include <linux/generic_acl.h>
13
14/**
15 * shmem_get_acl - generic_acl_operations->getacl() operation
16 */
17static struct posix_acl *
18shmem_get_acl(struct inode *inode, int type)
19{
20 struct posix_acl *acl = NULL;
21
22 spin_lock(&inode->i_lock);
23 switch(type) {
24 case ACL_TYPE_ACCESS:
25 acl = posix_acl_dup(SHMEM_I(inode)->i_acl);
26 break;
27
28 case ACL_TYPE_DEFAULT:
29 acl = posix_acl_dup(SHMEM_I(inode)->i_default_acl);
30 break;
31 }
32 spin_unlock(&inode->i_lock);
33
34 return acl;
35}
36
37/**
38 * shmem_get_acl - generic_acl_operations->setacl() operation
39 */
40static void
41shmem_set_acl(struct inode *inode, int type, struct posix_acl *acl)
42{
43 struct posix_acl *free = NULL;
44
45 spin_lock(&inode->i_lock);
46 switch(type) {
47 case ACL_TYPE_ACCESS:
48 free = SHMEM_I(inode)->i_acl;
49 SHMEM_I(inode)->i_acl = posix_acl_dup(acl);
50 break;
51
52 case ACL_TYPE_DEFAULT:
53 free = SHMEM_I(inode)->i_default_acl;
54 SHMEM_I(inode)->i_default_acl = posix_acl_dup(acl);
55 break;
56 }
57 spin_unlock(&inode->i_lock);
58 posix_acl_release(free);
59}
60
61struct generic_acl_operations shmem_acl_ops = {
62 .getacl = shmem_get_acl,
63 .setacl = shmem_set_acl,
64};
65
66/**
67 * shmem_list_acl_access, shmem_get_acl_access, shmem_set_acl_access,
68 * shmem_xattr_acl_access_handler - plumbing code to implement the
69 * system.posix_acl_access xattr using the generic acl functions.
70 */
71
72static size_t
73shmem_list_acl_access(struct inode *inode, char *list, size_t list_size,
74 const char *name, size_t name_len)
75{
76 return generic_acl_list(inode, &shmem_acl_ops, ACL_TYPE_ACCESS,
77 list, list_size);
78}
79
80static int
81shmem_get_acl_access(struct inode *inode, const char *name, void *buffer,
82 size_t size)
83{
84 if (strcmp(name, "") != 0)
85 return -EINVAL;
86 return generic_acl_get(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, buffer,
87 size);
88}
89
90static int
91shmem_set_acl_access(struct inode *inode, const char *name, const void *value,
92 size_t size, int flags)
93{
94 if (strcmp(name, "") != 0)
95 return -EINVAL;
96 return generic_acl_set(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, value,
97 size);
98}
99
100struct xattr_handler shmem_xattr_acl_access_handler = {
101 .prefix = POSIX_ACL_XATTR_ACCESS,
102 .list = shmem_list_acl_access,
103 .get = shmem_get_acl_access,
104 .set = shmem_set_acl_access,
105};
106
107/**
108 * shmem_list_acl_default, shmem_get_acl_default, shmem_set_acl_default,
109 * shmem_xattr_acl_default_handler - plumbing code to implement the
110 * system.posix_acl_default xattr using the generic acl functions.
111 */
112
113static size_t
114shmem_list_acl_default(struct inode *inode, char *list, size_t list_size,
115 const char *name, size_t name_len)
116{
117 return generic_acl_list(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT,
118 list, list_size);
119}
120
121static int
122shmem_get_acl_default(struct inode *inode, const char *name, void *buffer,
123 size_t size)
124{
125 if (strcmp(name, "") != 0)
126 return -EINVAL;
127 return generic_acl_get(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, buffer,
128 size);
129}
130
131static int
132shmem_set_acl_default(struct inode *inode, const char *name, const void *value,
133 size_t size, int flags)
134{
135 if (strcmp(name, "") != 0)
136 return -EINVAL;
137 return generic_acl_set(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, value,
138 size);
139}
140
141struct xattr_handler shmem_xattr_acl_default_handler = {
142 .prefix = POSIX_ACL_XATTR_DEFAULT,
143 .list = shmem_list_acl_default,
144 .get = shmem_get_acl_default,
145 .set = shmem_set_acl_default,
146};
147
148/**
149 * shmem_acl_init - Inizialize the acl(s) of a new inode
150 */
151int
152shmem_acl_init(struct inode *inode, struct inode *dir)
153{
154 return generic_acl_init(inode, dir, &shmem_acl_ops);
155}
156
157/**
158 * shmem_acl_destroy_inode - destroy acls hanging off the in-memory inode
159 *
160 * This is done before destroying the actual inode.
161 */
162
163void
164shmem_acl_destroy_inode(struct inode *inode)
165{
166 if (SHMEM_I(inode)->i_acl)
167 posix_acl_release(SHMEM_I(inode)->i_acl);
168 SHMEM_I(inode)->i_acl = NULL;
169 if (SHMEM_I(inode)->i_default_acl)
170 posix_acl_release(SHMEM_I(inode)->i_default_acl);
171 SHMEM_I(inode)->i_default_acl = NULL;
172}
173
174/**
175 * shmem_check_acl - check_acl() callback for generic_permission()
176 */
177static int
178shmem_check_acl(struct inode *inode, int mask)
179{
180 struct posix_acl *acl = shmem_get_acl(inode, ACL_TYPE_ACCESS);
181
182 if (acl) {
183 int error = posix_acl_permission(inode, acl, mask);
184 posix_acl_release(acl);
185 return error;
186 }
187 return -EAGAIN;
188}
189
190/**
191 * shmem_permission - permission() inode operation
192 */
193int
194shmem_permission(struct inode *inode, int mask, struct nameidata *nd)
195{
196 return generic_permission(inode, mask, shmem_check_acl);
197}
diff --git a/mm/slab.c b/mm/slab.c
index 792bfe320a..3dbd6f4e74 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1683,10 +1683,32 @@ static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
1683static void dump_line(char *data, int offset, int limit) 1683static void dump_line(char *data, int offset, int limit)
1684{ 1684{
1685 int i; 1685 int i;
1686 unsigned char error = 0;
1687 int bad_count = 0;
1688
1686 printk(KERN_ERR "%03x:", offset); 1689 printk(KERN_ERR "%03x:", offset);
1687 for (i = 0; i < limit; i++) 1690 for (i = 0; i < limit; i++) {
1691 if (data[offset + i] != POISON_FREE) {
1692 error = data[offset + i];
1693 bad_count++;
1694 }
1688 printk(" %02x", (unsigned char)data[offset + i]); 1695 printk(" %02x", (unsigned char)data[offset + i]);
1696 }
1689 printk("\n"); 1697 printk("\n");
1698
1699 if (bad_count == 1) {
1700 error ^= POISON_FREE;
1701 if (!(error & (error - 1))) {
1702 printk(KERN_ERR "Single bit error detected. Probably "
1703 "bad RAM.\n");
1704#ifdef CONFIG_X86
1705 printk(KERN_ERR "Run memtest86+ or a similar memory "
1706 "test tool.\n");
1707#else
1708 printk(KERN_ERR "Run a memory test tool.\n");
1709#endif
1710 }
1711 }
1690} 1712}
1691#endif 1713#endif
1692 1714
diff --git a/mm/swapfile.c b/mm/swapfile.c
index f1f5ec7837..a15def63f2 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1723,13 +1723,14 @@ get_swap_info_struct(unsigned type)
1723 */ 1723 */
1724int valid_swaphandles(swp_entry_t entry, unsigned long *offset) 1724int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
1725{ 1725{
1726 int ret = 0, i = 1 << page_cluster; 1726 int our_page_cluster = page_cluster;
1727 int ret = 0, i = 1 << our_page_cluster;
1727 unsigned long toff; 1728 unsigned long toff;
1728 struct swap_info_struct *swapdev = swp_type(entry) + swap_info; 1729 struct swap_info_struct *swapdev = swp_type(entry) + swap_info;
1729 1730
1730 if (!page_cluster) /* no readahead */ 1731 if (!our_page_cluster) /* no readahead */
1731 return 0; 1732 return 0;
1732 toff = (swp_offset(entry) >> page_cluster) << page_cluster; 1733 toff = (swp_offset(entry) >> our_page_cluster) << our_page_cluster;
1733 if (!toff) /* first page is swap header */ 1734 if (!toff) /* first page is swap header */
1734 toff++, i--; 1735 toff++, i--;
1735 *offset = toff; 1736 *offset = toff;
diff --git a/mm/truncate.c b/mm/truncate.c
index a654928323..8fde658065 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -17,6 +17,32 @@
17 do_invalidatepage */ 17 do_invalidatepage */
18 18
19 19
20/**
21 * do_invalidatepage - invalidate part of all of a page
22 * @page: the page which is affected
23 * @offset: the index of the truncation point
24 *
25 * do_invalidatepage() is called when all or part of the page has become
26 * invalidated by a truncate operation.
27 *
28 * do_invalidatepage() does not have to release all buffers, but it must
29 * ensure that no dirty buffer is left outside @offset and that no I/O
30 * is underway against any of the blocks which are outside the truncation
31 * point. Because the caller is about to free (and possibly reuse) those
32 * blocks on-disk.
33 */
34void do_invalidatepage(struct page *page, unsigned long offset)
35{
36 void (*invalidatepage)(struct page *, unsigned long);
37 invalidatepage = page->mapping->a_ops->invalidatepage;
38#ifdef CONFIG_BLOCK
39 if (!invalidatepage)
40 invalidatepage = block_invalidatepage;
41#endif
42 if (invalidatepage)
43 (*invalidatepage)(page, offset);
44}
45
20static inline void truncate_partial_page(struct page *page, unsigned partial) 46static inline void truncate_partial_page(struct page *page, unsigned partial)
21{ 47{
22 memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial); 48 memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial);