aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2008-07-15 01:44:51 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2008-07-15 01:44:51 -0400
commit43d2548bb2ef7e6d753f91468a746784041e522d (patch)
tree77d13fcd48fd998393abb825ec36e2b732684a73 /fs
parent585583d95c5660973bc0cf64add517b040acd8a4 (diff)
parent85082fd7cbe3173198aac0eb5e85ab1edcc6352c (diff)
Merge commit '85082fd7cbe3173198aac0eb5e85ab1edcc6352c' into test-build
Manual fixup of: arch/powerpc/Kconfig
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Makefile1
-rw-r--r--fs/bio-integrity.c719
-rw-r--r--fs/bio.c88
-rw-r--r--fs/char_dev.c7
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/dlm/user.c9
-rw-r--r--fs/ecryptfs/file.c3
-rw-r--r--fs/fat/cache.c2
-rw-r--r--fs/fat/dir.c4
-rw-r--r--fs/fat/file.c6
-rw-r--r--fs/fat/inode.c26
-rw-r--r--fs/fcntl.c3
-rw-r--r--fs/gfs2/ops_file.c4
-rw-r--r--fs/msdos/namei.c35
-rw-r--r--fs/namespace.c14
-rw-r--r--fs/ncpfs/file.c12
-rw-r--r--fs/nfs/file.c6
-rw-r--r--fs/ocfs2/stack_user.c3
-rw-r--r--fs/proc/base.c9
-rw-r--r--fs/proc/proc_misc.c16
-rw-r--r--fs/proc/task_mmu.c6
-rw-r--r--fs/proc/task_nommu.c2
-rw-r--r--fs/ramfs/file-mmu.c1
-rw-r--r--fs/ramfs/file-nommu.c1
-rw-r--r--fs/read_write.c38
-rw-r--r--fs/smbfs/file.c11
-rw-r--r--fs/splice.c17
-rw-r--r--fs/vfat/namei.c35
29 files changed, 938 insertions, 144 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 2694648cbd1b..313b2e06ded5 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -930,7 +930,7 @@ config PROC_KCORE
930 930
931config PROC_VMCORE 931config PROC_VMCORE
932 bool "/proc/vmcore support (EXPERIMENTAL)" 932 bool "/proc/vmcore support (EXPERIMENTAL)"
933 depends on PROC_FS && EXPERIMENTAL && CRASH_DUMP 933 depends on PROC_FS && CRASH_DUMP
934 default y 934 default y
935 help 935 help
936 Exports the dump image of crashed kernel in ELF format. 936 Exports the dump image of crashed kernel in ELF format.
diff --git a/fs/Makefile b/fs/Makefile
index 1e7a11bd4da1..277b079dec9e 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -19,6 +19,7 @@ else
19obj-y += no-block.o 19obj-y += no-block.o
20endif 20endif
21 21
22obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o
22obj-$(CONFIG_INOTIFY) += inotify.o 23obj-$(CONFIG_INOTIFY) += inotify.o
23obj-$(CONFIG_INOTIFY_USER) += inotify_user.o 24obj-$(CONFIG_INOTIFY_USER) += inotify_user.o
24obj-$(CONFIG_EPOLL) += eventpoll.o 25obj-$(CONFIG_EPOLL) += eventpoll.o
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
new file mode 100644
index 000000000000..63e2ee63058d
--- /dev/null
+++ b/fs/bio-integrity.c
@@ -0,0 +1,719 @@
1/*
2 * bio-integrity.c - bio data integrity extensions
3 *
4 * Copyright (C) 2007, 2008 Oracle Corporation
5 * Written by: Martin K. Petersen <martin.petersen@oracle.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; see the file COPYING. If not, write to
18 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
19 * USA.
20 *
21 */
22
23#include <linux/blkdev.h>
24#include <linux/mempool.h>
25#include <linux/bio.h>
26#include <linux/workqueue.h>
27
28static struct kmem_cache *bio_integrity_slab __read_mostly;
29static struct workqueue_struct *kintegrityd_wq;
30
31/**
32 * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio
33 * @bio: bio to attach integrity metadata to
34 * @gfp_mask: Memory allocation mask
35 * @nr_vecs: Number of integrity metadata scatter-gather elements
36 * @bs: bio_set to allocate from
37 *
38 * Description: This function prepares a bio for attaching integrity
39 * metadata. nr_vecs specifies the maximum number of pages containing
40 * integrity metadata that can be attached.
41 */
42struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
43 gfp_t gfp_mask,
44 unsigned int nr_vecs,
45 struct bio_set *bs)
46{
47 struct bio_integrity_payload *bip;
48 struct bio_vec *iv;
49 unsigned long idx;
50
51 BUG_ON(bio == NULL);
52
53 bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
54 if (unlikely(bip == NULL)) {
55 printk(KERN_ERR "%s: could not alloc bip\n", __func__);
56 return NULL;
57 }
58
59 memset(bip, 0, sizeof(*bip));
60
61 iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, bs);
62 if (unlikely(iv == NULL)) {
63 printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__);
64 mempool_free(bip, bs->bio_integrity_pool);
65 return NULL;
66 }
67
68 bip->bip_pool = idx;
69 bip->bip_vec = iv;
70 bip->bip_bio = bio;
71 bio->bi_integrity = bip;
72
73 return bip;
74}
75EXPORT_SYMBOL(bio_integrity_alloc_bioset);
76
77/**
78 * bio_integrity_alloc - Allocate integrity payload and attach it to bio
79 * @bio: bio to attach integrity metadata to
80 * @gfp_mask: Memory allocation mask
81 * @nr_vecs: Number of integrity metadata scatter-gather elements
82 *
83 * Description: This function prepares a bio for attaching integrity
84 * metadata. nr_vecs specifies the maximum number of pages containing
85 * integrity metadata that can be attached.
86 */
87struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
88 gfp_t gfp_mask,
89 unsigned int nr_vecs)
90{
91 return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set);
92}
93EXPORT_SYMBOL(bio_integrity_alloc);
94
95/**
96 * bio_integrity_free - Free bio integrity payload
97 * @bio: bio containing bip to be freed
98 * @bs: bio_set this bio was allocated from
99 *
100 * Description: Used to free the integrity portion of a bio. Usually
101 * called from bio_free().
102 */
103void bio_integrity_free(struct bio *bio, struct bio_set *bs)
104{
105 struct bio_integrity_payload *bip = bio->bi_integrity;
106
107 BUG_ON(bip == NULL);
108
109 /* A cloned bio doesn't own the integrity metadata */
110 if (!bio_flagged(bio, BIO_CLONED) && bip->bip_buf != NULL)
111 kfree(bip->bip_buf);
112
113 mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]);
114 mempool_free(bip, bs->bio_integrity_pool);
115
116 bio->bi_integrity = NULL;
117}
118EXPORT_SYMBOL(bio_integrity_free);
119
120/**
121 * bio_integrity_add_page - Attach integrity metadata
122 * @bio: bio to update
123 * @page: page containing integrity metadata
124 * @len: number of bytes of integrity metadata in page
125 * @offset: start offset within page
126 *
127 * Description: Attach a page containing integrity metadata to bio.
128 */
129int bio_integrity_add_page(struct bio *bio, struct page *page,
130 unsigned int len, unsigned int offset)
131{
132 struct bio_integrity_payload *bip = bio->bi_integrity;
133 struct bio_vec *iv;
134
135 if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_pool)) {
136 printk(KERN_ERR "%s: bip_vec full\n", __func__);
137 return 0;
138 }
139
140 iv = bip_vec_idx(bip, bip->bip_vcnt);
141 BUG_ON(iv == NULL);
142 BUG_ON(iv->bv_page != NULL);
143
144 iv->bv_page = page;
145 iv->bv_len = len;
146 iv->bv_offset = offset;
147 bip->bip_vcnt++;
148
149 return len;
150}
151EXPORT_SYMBOL(bio_integrity_add_page);
152
153/**
154 * bio_integrity_enabled - Check whether integrity can be passed
155 * @bio: bio to check
156 *
157 * Description: Determines whether bio_integrity_prep() can be called
158 * on this bio or not. bio data direction and target device must be
159 * set prior to calling. The functions honors the write_generate and
160 * read_verify flags in sysfs.
161 */
162int bio_integrity_enabled(struct bio *bio)
163{
164 /* Already protected? */
165 if (bio_integrity(bio))
166 return 0;
167
168 return bdev_integrity_enabled(bio->bi_bdev, bio_data_dir(bio));
169}
170EXPORT_SYMBOL(bio_integrity_enabled);
171
172/**
173 * bio_integrity_hw_sectors - Convert 512b sectors to hardware ditto
174 * @bi: blk_integrity profile for device
175 * @sectors: Number of 512 sectors to convert
176 *
177 * Description: The block layer calculates everything in 512 byte
178 * sectors but integrity metadata is done in terms of the hardware
179 * sector size of the storage device. Convert the block layer sectors
180 * to physical sectors.
181 */
182static inline unsigned int bio_integrity_hw_sectors(struct blk_integrity *bi,
183 unsigned int sectors)
184{
185 /* At this point there are only 512b or 4096b DIF/EPP devices */
186 if (bi->sector_size == 4096)
187 return sectors >>= 3;
188
189 return sectors;
190}
191
192/**
193 * bio_integrity_tag_size - Retrieve integrity tag space
194 * @bio: bio to inspect
195 *
196 * Description: Returns the maximum number of tag bytes that can be
197 * attached to this bio. Filesystems can use this to determine how
198 * much metadata to attach to an I/O.
199 */
200unsigned int bio_integrity_tag_size(struct bio *bio)
201{
202 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
203
204 BUG_ON(bio->bi_size == 0);
205
206 return bi->tag_size * (bio->bi_size / bi->sector_size);
207}
208EXPORT_SYMBOL(bio_integrity_tag_size);
209
210int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len, int set)
211{
212 struct bio_integrity_payload *bip = bio->bi_integrity;
213 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
214 unsigned int nr_sectors;
215
216 BUG_ON(bip->bip_buf == NULL);
217
218 if (bi->tag_size == 0)
219 return -1;
220
221 nr_sectors = bio_integrity_hw_sectors(bi,
222 DIV_ROUND_UP(len, bi->tag_size));
223
224 if (nr_sectors * bi->tuple_size > bip->bip_size) {
225 printk(KERN_ERR "%s: tag too big for bio: %u > %u\n",
226 __func__, nr_sectors * bi->tuple_size, bip->bip_size);
227 return -1;
228 }
229
230 if (set)
231 bi->set_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
232 else
233 bi->get_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
234
235 return 0;
236}
237
238/**
239 * bio_integrity_set_tag - Attach a tag buffer to a bio
240 * @bio: bio to attach buffer to
241 * @tag_buf: Pointer to a buffer containing tag data
242 * @len: Length of the included buffer
243 *
244 * Description: Use this function to tag a bio by leveraging the extra
245 * space provided by devices formatted with integrity protection. The
246 * size of the integrity buffer must be <= to the size reported by
247 * bio_integrity_tag_size().
248 */
249int bio_integrity_set_tag(struct bio *bio, void *tag_buf, unsigned int len)
250{
251 BUG_ON(bio_data_dir(bio) != WRITE);
252
253 return bio_integrity_tag(bio, tag_buf, len, 1);
254}
255EXPORT_SYMBOL(bio_integrity_set_tag);
256
257/**
258 * bio_integrity_get_tag - Retrieve a tag buffer from a bio
259 * @bio: bio to retrieve buffer from
260 * @tag_buf: Pointer to a buffer for the tag data
261 * @len: Length of the target buffer
262 *
263 * Description: Use this function to retrieve the tag buffer from a
264 * completed I/O. The size of the integrity buffer must be <= to the
265 * size reported by bio_integrity_tag_size().
266 */
267int bio_integrity_get_tag(struct bio *bio, void *tag_buf, unsigned int len)
268{
269 BUG_ON(bio_data_dir(bio) != READ);
270
271 return bio_integrity_tag(bio, tag_buf, len, 0);
272}
273EXPORT_SYMBOL(bio_integrity_get_tag);
274
275/**
276 * bio_integrity_generate - Generate integrity metadata for a bio
277 * @bio: bio to generate integrity metadata for
278 *
279 * Description: Generates integrity metadata for a bio by calling the
280 * block device's generation callback function. The bio must have a
281 * bip attached with enough room to accommodate the generated
282 * integrity metadata.
283 */
284static void bio_integrity_generate(struct bio *bio)
285{
286 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
287 struct blk_integrity_exchg bix;
288 struct bio_vec *bv;
289 sector_t sector = bio->bi_sector;
290 unsigned int i, sectors, total;
291 void *prot_buf = bio->bi_integrity->bip_buf;
292
293 total = 0;
294 bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
295 bix.sector_size = bi->sector_size;
296
297 bio_for_each_segment(bv, bio, i) {
298 void *kaddr = kmap_atomic(bv->bv_page, KM_USER0);
299 bix.data_buf = kaddr + bv->bv_offset;
300 bix.data_size = bv->bv_len;
301 bix.prot_buf = prot_buf;
302 bix.sector = sector;
303
304 bi->generate_fn(&bix);
305
306 sectors = bv->bv_len / bi->sector_size;
307 sector += sectors;
308 prot_buf += sectors * bi->tuple_size;
309 total += sectors * bi->tuple_size;
310 BUG_ON(total > bio->bi_integrity->bip_size);
311
312 kunmap_atomic(kaddr, KM_USER0);
313 }
314}
315
316/**
317 * bio_integrity_prep - Prepare bio for integrity I/O
318 * @bio: bio to prepare
319 *
320 * Description: Allocates a buffer for integrity metadata, maps the
321 * pages and attaches them to a bio. The bio must have data
322 * direction, target device and start sector set priot to calling. In
323 * the WRITE case, integrity metadata will be generated using the
324 * block device's integrity function. In the READ case, the buffer
325 * will be prepared for DMA and a suitable end_io handler set up.
326 */
327int bio_integrity_prep(struct bio *bio)
328{
329 struct bio_integrity_payload *bip;
330 struct blk_integrity *bi;
331 struct request_queue *q;
332 void *buf;
333 unsigned long start, end;
334 unsigned int len, nr_pages;
335 unsigned int bytes, offset, i;
336 unsigned int sectors;
337
338 bi = bdev_get_integrity(bio->bi_bdev);
339 q = bdev_get_queue(bio->bi_bdev);
340 BUG_ON(bi == NULL);
341 BUG_ON(bio_integrity(bio));
342
343 sectors = bio_integrity_hw_sectors(bi, bio_sectors(bio));
344
345 /* Allocate kernel buffer for protection data */
346 len = sectors * blk_integrity_tuple_size(bi);
347 buf = kmalloc(len, GFP_NOIO | __GFP_NOFAIL | q->bounce_gfp);
348 if (unlikely(buf == NULL)) {
349 printk(KERN_ERR "could not allocate integrity buffer\n");
350 return -EIO;
351 }
352
353 end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
354 start = ((unsigned long) buf) >> PAGE_SHIFT;
355 nr_pages = end - start;
356
357 /* Allocate bio integrity payload and integrity vectors */
358 bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages);
359 if (unlikely(bip == NULL)) {
360 printk(KERN_ERR "could not allocate data integrity bioset\n");
361 kfree(buf);
362 return -EIO;
363 }
364
365 bip->bip_buf = buf;
366 bip->bip_size = len;
367 bip->bip_sector = bio->bi_sector;
368
369 /* Map it */
370 offset = offset_in_page(buf);
371 for (i = 0 ; i < nr_pages ; i++) {
372 int ret;
373 bytes = PAGE_SIZE - offset;
374
375 if (len <= 0)
376 break;
377
378 if (bytes > len)
379 bytes = len;
380
381 ret = bio_integrity_add_page(bio, virt_to_page(buf),
382 bytes, offset);
383
384 if (ret == 0)
385 return 0;
386
387 if (ret < bytes)
388 break;
389
390 buf += bytes;
391 len -= bytes;
392 offset = 0;
393 }
394
395 /* Install custom I/O completion handler if read verify is enabled */
396 if (bio_data_dir(bio) == READ) {
397 bip->bip_end_io = bio->bi_end_io;
398 bio->bi_end_io = bio_integrity_endio;
399 }
400
401 /* Auto-generate integrity metadata if this is a write */
402 if (bio_data_dir(bio) == WRITE)
403 bio_integrity_generate(bio);
404
405 return 0;
406}
407EXPORT_SYMBOL(bio_integrity_prep);
408
409/**
410 * bio_integrity_verify - Verify integrity metadata for a bio
411 * @bio: bio to verify
412 *
413 * Description: This function is called to verify the integrity of a
414 * bio. The data in the bio io_vec is compared to the integrity
415 * metadata returned by the HBA.
416 */
417static int bio_integrity_verify(struct bio *bio)
418{
419 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
420 struct blk_integrity_exchg bix;
421 struct bio_vec *bv;
422 sector_t sector = bio->bi_integrity->bip_sector;
423 unsigned int i, sectors, total, ret;
424 void *prot_buf = bio->bi_integrity->bip_buf;
425
426 ret = total = 0;
427 bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
428 bix.sector_size = bi->sector_size;
429
430 bio_for_each_segment(bv, bio, i) {
431 void *kaddr = kmap_atomic(bv->bv_page, KM_USER0);
432 bix.data_buf = kaddr + bv->bv_offset;
433 bix.data_size = bv->bv_len;
434 bix.prot_buf = prot_buf;
435 bix.sector = sector;
436
437 ret = bi->verify_fn(&bix);
438
439 if (ret) {
440 kunmap_atomic(kaddr, KM_USER0);
441 break;
442 }
443
444 sectors = bv->bv_len / bi->sector_size;
445 sector += sectors;
446 prot_buf += sectors * bi->tuple_size;
447 total += sectors * bi->tuple_size;
448 BUG_ON(total > bio->bi_integrity->bip_size);
449
450 kunmap_atomic(kaddr, KM_USER0);
451 }
452
453 return ret;
454}
455
456/**
457 * bio_integrity_verify_fn - Integrity I/O completion worker
458 * @work: Work struct stored in bio to be verified
459 *
460 * Description: This workqueue function is called to complete a READ
461 * request. The function verifies the transferred integrity metadata
462 * and then calls the original bio end_io function.
463 */
464static void bio_integrity_verify_fn(struct work_struct *work)
465{
466 struct bio_integrity_payload *bip =
467 container_of(work, struct bio_integrity_payload, bip_work);
468 struct bio *bio = bip->bip_bio;
469 int error = bip->bip_error;
470
471 if (bio_integrity_verify(bio)) {
472 clear_bit(BIO_UPTODATE, &bio->bi_flags);
473 error = -EIO;
474 }
475
476 /* Restore original bio completion handler */
477 bio->bi_end_io = bip->bip_end_io;
478
479 if (bio->bi_end_io)
480 bio->bi_end_io(bio, error);
481}
482
483/**
484 * bio_integrity_endio - Integrity I/O completion function
485 * @bio: Protected bio
486 * @error: Pointer to errno
487 *
488 * Description: Completion for integrity I/O
489 *
490 * Normally I/O completion is done in interrupt context. However,
491 * verifying I/O integrity is a time-consuming task which must be run
492 * in process context. This function postpones completion
493 * accordingly.
494 */
495void bio_integrity_endio(struct bio *bio, int error)
496{
497 struct bio_integrity_payload *bip = bio->bi_integrity;
498
499 BUG_ON(bip->bip_bio != bio);
500
501 bip->bip_error = error;
502 INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
503 queue_work(kintegrityd_wq, &bip->bip_work);
504}
505EXPORT_SYMBOL(bio_integrity_endio);
506
507/**
508 * bio_integrity_mark_head - Advance bip_vec skip bytes
509 * @bip: Integrity vector to advance
510 * @skip: Number of bytes to advance it
511 */
512void bio_integrity_mark_head(struct bio_integrity_payload *bip,
513 unsigned int skip)
514{
515 struct bio_vec *iv;
516 unsigned int i;
517
518 bip_for_each_vec(iv, bip, i) {
519 if (skip == 0) {
520 bip->bip_idx = i;
521 return;
522 } else if (skip >= iv->bv_len) {
523 skip -= iv->bv_len;
524 } else { /* skip < iv->bv_len) */
525 iv->bv_offset += skip;
526 iv->bv_len -= skip;
527 bip->bip_idx = i;
528 return;
529 }
530 }
531}
532
533/**
534 * bio_integrity_mark_tail - Truncate bip_vec to be len bytes long
535 * @bip: Integrity vector to truncate
536 * @len: New length of integrity vector
537 */
538void bio_integrity_mark_tail(struct bio_integrity_payload *bip,
539 unsigned int len)
540{
541 struct bio_vec *iv;
542 unsigned int i;
543
544 bip_for_each_vec(iv, bip, i) {
545 if (len == 0) {
546 bip->bip_vcnt = i;
547 return;
548 } else if (len >= iv->bv_len) {
549 len -= iv->bv_len;
550 } else { /* len < iv->bv_len) */
551 iv->bv_len = len;
552 len = 0;
553 }
554 }
555}
556
557/**
558 * bio_integrity_advance - Advance integrity vector
559 * @bio: bio whose integrity vector to update
560 * @bytes_done: number of data bytes that have been completed
561 *
562 * Description: This function calculates how many integrity bytes the
563 * number of completed data bytes correspond to and advances the
564 * integrity vector accordingly.
565 */
566void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
567{
568 struct bio_integrity_payload *bip = bio->bi_integrity;
569 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
570 unsigned int nr_sectors;
571
572 BUG_ON(bip == NULL);
573 BUG_ON(bi == NULL);
574
575 nr_sectors = bio_integrity_hw_sectors(bi, bytes_done >> 9);
576 bio_integrity_mark_head(bip, nr_sectors * bi->tuple_size);
577}
578EXPORT_SYMBOL(bio_integrity_advance);
579
580/**
581 * bio_integrity_trim - Trim integrity vector
582 * @bio: bio whose integrity vector to update
583 * @offset: offset to first data sector
584 * @sectors: number of data sectors
585 *
586 * Description: Used to trim the integrity vector in a cloned bio.
587 * The ivec will be advanced corresponding to 'offset' data sectors
588 * and the length will be truncated corresponding to 'len' data
589 * sectors.
590 */
591void bio_integrity_trim(struct bio *bio, unsigned int offset,
592 unsigned int sectors)
593{
594 struct bio_integrity_payload *bip = bio->bi_integrity;
595 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
596 unsigned int nr_sectors;
597
598 BUG_ON(bip == NULL);
599 BUG_ON(bi == NULL);
600 BUG_ON(!bio_flagged(bio, BIO_CLONED));
601
602 nr_sectors = bio_integrity_hw_sectors(bi, sectors);
603 bip->bip_sector = bip->bip_sector + offset;
604 bio_integrity_mark_head(bip, offset * bi->tuple_size);
605 bio_integrity_mark_tail(bip, sectors * bi->tuple_size);
606}
607EXPORT_SYMBOL(bio_integrity_trim);
608
609/**
610 * bio_integrity_split - Split integrity metadata
611 * @bio: Protected bio
612 * @bp: Resulting bio_pair
613 * @sectors: Offset
614 *
615 * Description: Splits an integrity page into a bio_pair.
616 */
617void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
618{
619 struct blk_integrity *bi;
620 struct bio_integrity_payload *bip = bio->bi_integrity;
621 unsigned int nr_sectors;
622
623 if (bio_integrity(bio) == 0)
624 return;
625
626 bi = bdev_get_integrity(bio->bi_bdev);
627 BUG_ON(bi == NULL);
628 BUG_ON(bip->bip_vcnt != 1);
629
630 nr_sectors = bio_integrity_hw_sectors(bi, sectors);
631
632 bp->bio1.bi_integrity = &bp->bip1;
633 bp->bio2.bi_integrity = &bp->bip2;
634
635 bp->iv1 = bip->bip_vec[0];
636 bp->iv2 = bip->bip_vec[0];
637
638 bp->bip1.bip_vec = &bp->iv1;
639 bp->bip2.bip_vec = &bp->iv2;
640
641 bp->iv1.bv_len = sectors * bi->tuple_size;
642 bp->iv2.bv_offset += sectors * bi->tuple_size;
643 bp->iv2.bv_len -= sectors * bi->tuple_size;
644
645 bp->bip1.bip_sector = bio->bi_integrity->bip_sector;
646 bp->bip2.bip_sector = bio->bi_integrity->bip_sector + nr_sectors;
647
648 bp->bip1.bip_vcnt = bp->bip2.bip_vcnt = 1;
649 bp->bip1.bip_idx = bp->bip2.bip_idx = 0;
650}
651EXPORT_SYMBOL(bio_integrity_split);
652
653/**
654 * bio_integrity_clone - Callback for cloning bios with integrity metadata
655 * @bio: New bio
656 * @bio_src: Original bio
657 * @bs: bio_set to allocate bip from
658 *
659 * Description: Called to allocate a bip when cloning a bio
660 */
661int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
662 struct bio_set *bs)
663{
664 struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
665 struct bio_integrity_payload *bip;
666
667 BUG_ON(bip_src == NULL);
668
669 bip = bio_integrity_alloc_bioset(bio, GFP_NOIO, bip_src->bip_vcnt, bs);
670
671 if (bip == NULL)
672 return -EIO;
673
674 memcpy(bip->bip_vec, bip_src->bip_vec,
675 bip_src->bip_vcnt * sizeof(struct bio_vec));
676
677 bip->bip_sector = bip_src->bip_sector;
678 bip->bip_vcnt = bip_src->bip_vcnt;
679 bip->bip_idx = bip_src->bip_idx;
680
681 return 0;
682}
683EXPORT_SYMBOL(bio_integrity_clone);
684
685int bioset_integrity_create(struct bio_set *bs, int pool_size)
686{
687 bs->bio_integrity_pool = mempool_create_slab_pool(pool_size,
688 bio_integrity_slab);
689 if (!bs->bio_integrity_pool)
690 return -1;
691
692 return 0;
693}
694EXPORT_SYMBOL(bioset_integrity_create);
695
696void bioset_integrity_free(struct bio_set *bs)
697{
698 if (bs->bio_integrity_pool)
699 mempool_destroy(bs->bio_integrity_pool);
700}
701EXPORT_SYMBOL(bioset_integrity_free);
702
703void __init bio_integrity_init_slab(void)
704{
705 bio_integrity_slab = KMEM_CACHE(bio_integrity_payload,
706 SLAB_HWCACHE_ALIGN|SLAB_PANIC);
707}
708EXPORT_SYMBOL(bio_integrity_init_slab);
709
710static int __init integrity_init(void)
711{
712 kintegrityd_wq = create_workqueue("kintegrityd");
713
714 if (!kintegrityd_wq)
715 panic("Failed to create kintegrityd\n");
716
717 return 0;
718}
719subsys_initcall(integrity_init);
diff --git a/fs/bio.c b/fs/bio.c
index 78562574cb52..88322b066acb 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -28,25 +28,10 @@
28#include <linux/blktrace_api.h> 28#include <linux/blktrace_api.h>
29#include <scsi/sg.h> /* for struct sg_iovec */ 29#include <scsi/sg.h> /* for struct sg_iovec */
30 30
31#define BIO_POOL_SIZE 2
32
33static struct kmem_cache *bio_slab __read_mostly; 31static struct kmem_cache *bio_slab __read_mostly;
34 32
35#define BIOVEC_NR_POOLS 6
36
37/*
38 * a small number of entries is fine, not going to be performance critical.
39 * basically we just need to survive
40 */
41#define BIO_SPLIT_ENTRIES 2
42mempool_t *bio_split_pool __read_mostly; 33mempool_t *bio_split_pool __read_mostly;
43 34
44struct biovec_slab {
45 int nr_vecs;
46 char *name;
47 struct kmem_cache *slab;
48};
49
50/* 35/*
51 * if you change this list, also change bvec_alloc or things will 36 * if you change this list, also change bvec_alloc or things will
52 * break badly! cannot be bigger than what you can fit into an 37 * break badly! cannot be bigger than what you can fit into an
@@ -60,23 +45,17 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
60#undef BV 45#undef BV
61 46
62/* 47/*
63 * bio_set is used to allow other portions of the IO system to
64 * allocate their own private memory pools for bio and iovec structures.
65 * These memory pools in turn all allocate from the bio_slab
66 * and the bvec_slabs[].
67 */
68struct bio_set {
69 mempool_t *bio_pool;
70 mempool_t *bvec_pools[BIOVEC_NR_POOLS];
71};
72
73/*
74 * fs_bio_set is the bio_set containing bio and iovec memory pools used by 48 * fs_bio_set is the bio_set containing bio and iovec memory pools used by
75 * IO code that does not need private memory pools. 49 * IO code that does not need private memory pools.
76 */ 50 */
77static struct bio_set *fs_bio_set; 51struct bio_set *fs_bio_set;
52
53unsigned int bvec_nr_vecs(unsigned short idx)
54{
55 return bvec_slabs[idx].nr_vecs;
56}
78 57
79static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) 58struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs)
80{ 59{
81 struct bio_vec *bvl; 60 struct bio_vec *bvl;
82 61
@@ -117,6 +96,9 @@ void bio_free(struct bio *bio, struct bio_set *bio_set)
117 mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]); 96 mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
118 } 97 }
119 98
99 if (bio_integrity(bio))
100 bio_integrity_free(bio, bio_set);
101
120 mempool_free(bio, bio_set->bio_pool); 102 mempool_free(bio, bio_set->bio_pool);
121} 103}
122 104
@@ -275,9 +257,19 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
275{ 257{
276 struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); 258 struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set);
277 259
278 if (b) { 260 if (!b)
279 b->bi_destructor = bio_fs_destructor; 261 return NULL;
280 __bio_clone(b, bio); 262
263 b->bi_destructor = bio_fs_destructor;
264 __bio_clone(b, bio);
265
266 if (bio_integrity(bio)) {
267 int ret;
268
269 ret = bio_integrity_clone(b, bio, fs_bio_set);
270
271 if (ret < 0)
272 return NULL;
281 } 273 }
282 274
283 return b; 275 return b;
@@ -333,10 +325,19 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
333 if (page == prev->bv_page && 325 if (page == prev->bv_page &&
334 offset == prev->bv_offset + prev->bv_len) { 326 offset == prev->bv_offset + prev->bv_len) {
335 prev->bv_len += len; 327 prev->bv_len += len;
336 if (q->merge_bvec_fn && 328
337 q->merge_bvec_fn(q, bio, prev) < len) { 329 if (q->merge_bvec_fn) {
338 prev->bv_len -= len; 330 struct bvec_merge_data bvm = {
339 return 0; 331 .bi_bdev = bio->bi_bdev,
332 .bi_sector = bio->bi_sector,
333 .bi_size = bio->bi_size,
334 .bi_rw = bio->bi_rw,
335 };
336
337 if (q->merge_bvec_fn(q, &bvm, prev) < len) {
338 prev->bv_len -= len;
339 return 0;
340 }
340 } 341 }
341 342
342 goto done; 343 goto done;
@@ -377,11 +378,18 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
377 * queue to get further control 378 * queue to get further control
378 */ 379 */
379 if (q->merge_bvec_fn) { 380 if (q->merge_bvec_fn) {
381 struct bvec_merge_data bvm = {
382 .bi_bdev = bio->bi_bdev,
383 .bi_sector = bio->bi_sector,
384 .bi_size = bio->bi_size,
385 .bi_rw = bio->bi_rw,
386 };
387
380 /* 388 /*
381 * merge_bvec_fn() returns number of bytes it can accept 389 * merge_bvec_fn() returns number of bytes it can accept
382 * at this offset 390 * at this offset
383 */ 391 */
384 if (q->merge_bvec_fn(q, bio, bvec) < len) { 392 if (q->merge_bvec_fn(q, &bvm, bvec) < len) {
385 bvec->bv_page = NULL; 393 bvec->bv_page = NULL;
386 bvec->bv_len = 0; 394 bvec->bv_len = 0;
387 bvec->bv_offset = 0; 395 bvec->bv_offset = 0;
@@ -1249,6 +1257,9 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
1249 bp->bio1.bi_private = bi; 1257 bp->bio1.bi_private = bi;
1250 bp->bio2.bi_private = pool; 1258 bp->bio2.bi_private = pool;
1251 1259
1260 if (bio_integrity(bi))
1261 bio_integrity_split(bi, bp, first_sectors);
1262
1252 return bp; 1263 return bp;
1253} 1264}
1254 1265
@@ -1290,6 +1301,7 @@ void bioset_free(struct bio_set *bs)
1290 if (bs->bio_pool) 1301 if (bs->bio_pool)
1291 mempool_destroy(bs->bio_pool); 1302 mempool_destroy(bs->bio_pool);
1292 1303
1304 bioset_integrity_free(bs);
1293 biovec_free_pools(bs); 1305 biovec_free_pools(bs);
1294 1306
1295 kfree(bs); 1307 kfree(bs);
@@ -1306,6 +1318,9 @@ struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size)
1306 if (!bs->bio_pool) 1318 if (!bs->bio_pool)
1307 goto bad; 1319 goto bad;
1308 1320
1321 if (bioset_integrity_create(bs, bio_pool_size))
1322 goto bad;
1323
1309 if (!biovec_create_pools(bs, bvec_pool_size)) 1324 if (!biovec_create_pools(bs, bvec_pool_size))
1310 return bs; 1325 return bs;
1311 1326
@@ -1332,6 +1347,7 @@ static int __init init_bio(void)
1332{ 1347{
1333 bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC); 1348 bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
1334 1349
1350 bio_integrity_init_slab();
1335 biovec_init_slabs(); 1351 biovec_init_slabs();
1336 1352
1337 fs_bio_set = bioset_create(BIO_POOL_SIZE, 2); 1353 fs_bio_set = bioset_create(BIO_POOL_SIZE, 2);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 68e510b88457..3cb7cda3d780 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -373,6 +373,8 @@ static int chrdev_open(struct inode *inode, struct file *filp)
373 return -ENXIO; 373 return -ENXIO;
374 new = container_of(kobj, struct cdev, kobj); 374 new = container_of(kobj, struct cdev, kobj);
375 spin_lock(&cdev_lock); 375 spin_lock(&cdev_lock);
376 /* Check i_cdev again in case somebody beat us to it while
377 we dropped the lock. */
376 p = inode->i_cdev; 378 p = inode->i_cdev;
377 if (!p) { 379 if (!p) {
378 inode->i_cdev = p = new; 380 inode->i_cdev = p = new;
@@ -392,11 +394,8 @@ static int chrdev_open(struct inode *inode, struct file *filp)
392 cdev_put(p); 394 cdev_put(p);
393 return -ENXIO; 395 return -ENXIO;
394 } 396 }
395 if (filp->f_op->open) { 397 if (filp->f_op->open)
396 lock_kernel();
397 ret = filp->f_op->open(inode,filp); 398 ret = filp->f_op->open(inode,filp);
398 unlock_kernel();
399 }
400 if (ret) 399 if (ret)
401 cdev_put(p); 400 cdev_put(p);
402 return ret; 401 return ret;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 86b4d5f405ae..22857c639df5 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -612,7 +612,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
612 if (retval < 0) 612 if (retval < 0)
613 return (loff_t)retval; 613 return (loff_t)retval;
614 } 614 }
615 return remote_llseek(file, offset, origin); 615 return generic_file_llseek_unlocked(file, offset, origin);
616} 616}
617 617
618struct file_system_type cifs_fs_type = { 618struct file_system_type cifs_fs_type = {
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index ebbcf38fd33b..f976f303c196 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -15,6 +15,7 @@
15#include <linux/poll.h> 15#include <linux/poll.h>
16#include <linux/signal.h> 16#include <linux/signal.h>
17#include <linux/spinlock.h> 17#include <linux/spinlock.h>
18#include <linux/smp_lock.h>
18#include <linux/dlm.h> 19#include <linux/dlm.h>
19#include <linux/dlm_device.h> 20#include <linux/dlm_device.h>
20 21
@@ -618,13 +619,17 @@ static int device_open(struct inode *inode, struct file *file)
618 struct dlm_user_proc *proc; 619 struct dlm_user_proc *proc;
619 struct dlm_ls *ls; 620 struct dlm_ls *ls;
620 621
622 lock_kernel();
621 ls = dlm_find_lockspace_device(iminor(inode)); 623 ls = dlm_find_lockspace_device(iminor(inode));
622 if (!ls) 624 if (!ls) {
625 unlock_kernel();
623 return -ENOENT; 626 return -ENOENT;
627 }
624 628
625 proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL); 629 proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL);
626 if (!proc) { 630 if (!proc) {
627 dlm_put_lockspace(ls); 631 dlm_put_lockspace(ls);
632 unlock_kernel();
628 return -ENOMEM; 633 return -ENOMEM;
629 } 634 }
630 635
@@ -636,6 +641,7 @@ static int device_open(struct inode *inode, struct file *file)
636 spin_lock_init(&proc->locks_spin); 641 spin_lock_init(&proc->locks_spin);
637 init_waitqueue_head(&proc->wait); 642 init_waitqueue_head(&proc->wait);
638 file->private_data = proc; 643 file->private_data = proc;
644 unlock_kernel();
639 645
640 return 0; 646 return 0;
641} 647}
@@ -870,6 +876,7 @@ static unsigned int device_poll(struct file *file, poll_table *wait)
870 876
871static int ctl_device_open(struct inode *inode, struct file *file) 877static int ctl_device_open(struct inode *inode, struct file *file)
872{ 878{
879 cycle_kernel_lock();
873 file->private_data = NULL; 880 file->private_data = NULL;
874 return 0; 881 return 0;
875} 882}
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 2258b8f654a6..24749bf0668f 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -30,6 +30,7 @@
30#include <linux/security.h> 30#include <linux/security.h>
31#include <linux/compat.h> 31#include <linux/compat.h>
32#include <linux/fs_stack.h> 32#include <linux/fs_stack.h>
33#include <linux/smp_lock.h>
33#include "ecryptfs_kernel.h" 34#include "ecryptfs_kernel.h"
34 35
35/** 36/**
@@ -277,9 +278,11 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
277 int rc = 0; 278 int rc = 0;
278 struct file *lower_file = NULL; 279 struct file *lower_file = NULL;
279 280
281 lock_kernel();
280 lower_file = ecryptfs_file_to_lower(file); 282 lower_file = ecryptfs_file_to_lower(file);
281 if (lower_file->f_op && lower_file->f_op->fasync) 283 if (lower_file->f_op && lower_file->f_op->fasync)
282 rc = lower_file->f_op->fasync(fd, lower_file, flag); 284 rc = lower_file->f_op->fasync(fd, lower_file, flag);
285 unlock_kernel();
283 return rc; 286 return rc;
284} 287}
285 288
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index fda25479af26..3a9ecac8d61f 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -61,7 +61,7 @@ void fat_cache_destroy(void)
61 61
62static inline struct fat_cache *fat_cache_alloc(struct inode *inode) 62static inline struct fat_cache *fat_cache_alloc(struct inode *inode)
63{ 63{
64 return kmem_cache_alloc(fat_cache_cachep, GFP_KERNEL); 64 return kmem_cache_alloc(fat_cache_cachep, GFP_NOFS);
65} 65}
66 66
67static inline void fat_cache_free(struct fat_cache *cache) 67static inline void fat_cache_free(struct fat_cache *cache)
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 486725ee99ae..34541d06e626 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -472,7 +472,7 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
472 loff_t cpos; 472 loff_t cpos;
473 int ret = 0; 473 int ret = 0;
474 474
475 lock_kernel(); 475 lock_super(sb);
476 476
477 cpos = filp->f_pos; 477 cpos = filp->f_pos;
478 /* Fake . and .. for the root directory. */ 478 /* Fake . and .. for the root directory. */
@@ -654,7 +654,7 @@ FillFailed:
654 if (unicode) 654 if (unicode)
655 __putname(unicode); 655 __putname(unicode);
656out: 656out:
657 unlock_kernel(); 657 unlock_super(sb);
658 return ret; 658 return ret;
659} 659}
660 660
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 771326b8047e..c672df4036e9 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -11,7 +11,6 @@
11#include <linux/mount.h> 11#include <linux/mount.h>
12#include <linux/time.h> 12#include <linux/time.h>
13#include <linux/msdos_fs.h> 13#include <linux/msdos_fs.h>
14#include <linux/smp_lock.h>
15#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
16#include <linux/writeback.h> 15#include <linux/writeback.h>
17#include <linux/backing-dev.h> 16#include <linux/backing-dev.h>
@@ -242,9 +241,7 @@ void fat_truncate(struct inode *inode)
242 241
243 nr_clusters = (inode->i_size + (cluster_size - 1)) >> sbi->cluster_bits; 242 nr_clusters = (inode->i_size + (cluster_size - 1)) >> sbi->cluster_bits;
244 243
245 lock_kernel();
246 fat_free(inode, nr_clusters); 244 fat_free(inode, nr_clusters);
247 unlock_kernel();
248 fat_flush_inodes(inode->i_sb, inode, NULL); 245 fat_flush_inodes(inode->i_sb, inode, NULL);
249} 246}
250 247
@@ -310,8 +307,6 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
310 int error = 0; 307 int error = 0;
311 unsigned int ia_valid; 308 unsigned int ia_valid;
312 309
313 lock_kernel();
314
315 /* 310 /*
316 * Expand the file. Since inode_setattr() updates ->i_size 311 * Expand the file. Since inode_setattr() updates ->i_size
317 * before calling the ->truncate(), but FAT needs to fill the 312 * before calling the ->truncate(), but FAT needs to fill the
@@ -366,7 +361,6 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
366 361
367 error = inode_setattr(inode, attr); 362 error = inode_setattr(inode, attr);
368out: 363out:
369 unlock_kernel();
370 return error; 364 return error;
371} 365}
372EXPORT_SYMBOL_GPL(fat_setattr); 366EXPORT_SYMBOL_GPL(fat_setattr);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 4e0a3dd9d677..46a4508ffd2e 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -440,14 +440,13 @@ static void fat_delete_inode(struct inode *inode)
440 440
441static void fat_clear_inode(struct inode *inode) 441static void fat_clear_inode(struct inode *inode)
442{ 442{
443 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); 443 struct super_block *sb = inode->i_sb;
444 struct msdos_sb_info *sbi = MSDOS_SB(sb);
444 445
445 lock_kernel();
446 spin_lock(&sbi->inode_hash_lock); 446 spin_lock(&sbi->inode_hash_lock);
447 fat_cache_inval_inode(inode); 447 fat_cache_inval_inode(inode);
448 hlist_del_init(&MSDOS_I(inode)->i_fat_hash); 448 hlist_del_init(&MSDOS_I(inode)->i_fat_hash);
449 spin_unlock(&sbi->inode_hash_lock); 449 spin_unlock(&sbi->inode_hash_lock);
450 unlock_kernel();
451} 450}
452 451
453static void fat_write_super(struct super_block *sb) 452static void fat_write_super(struct super_block *sb)
@@ -485,7 +484,7 @@ static struct kmem_cache *fat_inode_cachep;
485static struct inode *fat_alloc_inode(struct super_block *sb) 484static struct inode *fat_alloc_inode(struct super_block *sb)
486{ 485{
487 struct msdos_inode_info *ei; 486 struct msdos_inode_info *ei;
488 ei = kmem_cache_alloc(fat_inode_cachep, GFP_KERNEL); 487 ei = kmem_cache_alloc(fat_inode_cachep, GFP_NOFS);
489 if (!ei) 488 if (!ei)
490 return NULL; 489 return NULL;
491 return &ei->vfs_inode; 490 return &ei->vfs_inode;
@@ -567,7 +566,7 @@ retry:
567 if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) 566 if (inode->i_ino == MSDOS_ROOT_INO || !i_pos)
568 return 0; 567 return 0;
569 568
570 lock_kernel(); 569 lock_super(sb);
571 bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); 570 bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits);
572 if (!bh) { 571 if (!bh) {
573 printk(KERN_ERR "FAT: unable to read inode block " 572 printk(KERN_ERR "FAT: unable to read inode block "
@@ -579,7 +578,7 @@ retry:
579 if (i_pos != MSDOS_I(inode)->i_pos) { 578 if (i_pos != MSDOS_I(inode)->i_pos) {
580 spin_unlock(&sbi->inode_hash_lock); 579 spin_unlock(&sbi->inode_hash_lock);
581 brelse(bh); 580 brelse(bh);
582 unlock_kernel(); 581 unlock_super(sb);
583 goto retry; 582 goto retry;
584 } 583 }
585 584
@@ -606,7 +605,7 @@ retry:
606 err = sync_dirty_buffer(bh); 605 err = sync_dirty_buffer(bh);
607 brelse(bh); 606 brelse(bh);
608out: 607out:
609 unlock_kernel(); 608 unlock_super(sb);
610 return err; 609 return err;
611} 610}
612 611
@@ -736,6 +735,7 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable)
736 735
737static struct dentry *fat_get_parent(struct dentry *child) 736static struct dentry *fat_get_parent(struct dentry *child)
738{ 737{
738 struct super_block *sb = child->d_sb;
739 struct buffer_head *bh; 739 struct buffer_head *bh;
740 struct msdos_dir_entry *de; 740 struct msdos_dir_entry *de;
741 loff_t i_pos; 741 loff_t i_pos;
@@ -743,14 +743,14 @@ static struct dentry *fat_get_parent(struct dentry *child)
743 struct inode *inode; 743 struct inode *inode;
744 int err; 744 int err;
745 745
746 lock_kernel(); 746 lock_super(sb);
747 747
748 err = fat_get_dotdot_entry(child->d_inode, &bh, &de, &i_pos); 748 err = fat_get_dotdot_entry(child->d_inode, &bh, &de, &i_pos);
749 if (err) { 749 if (err) {
750 parent = ERR_PTR(err); 750 parent = ERR_PTR(err);
751 goto out; 751 goto out;
752 } 752 }
753 inode = fat_build_inode(child->d_sb, de, i_pos); 753 inode = fat_build_inode(sb, de, i_pos);
754 brelse(bh); 754 brelse(bh);
755 if (IS_ERR(inode)) { 755 if (IS_ERR(inode)) {
756 parent = ERR_CAST(inode); 756 parent = ERR_CAST(inode);
@@ -762,7 +762,7 @@ static struct dentry *fat_get_parent(struct dentry *child)
762 parent = ERR_PTR(-ENOMEM); 762 parent = ERR_PTR(-ENOMEM);
763 } 763 }
764out: 764out:
765 unlock_kernel(); 765 unlock_super(sb);
766 766
767 return parent; 767 return parent;
768} 768}
@@ -1172,6 +1172,12 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1172 long error; 1172 long error;
1173 char buf[50]; 1173 char buf[50];
1174 1174
1175 /*
1176 * GFP_KERNEL is ok here, because while we do hold the
1177 * supeblock lock, memory pressure can't call back into
1178 * the filesystem, since we're only just about to mount
1179 * it and have no inodes etc active!
1180 */
1175 sbi = kzalloc(sizeof(struct msdos_sb_info), GFP_KERNEL); 1181 sbi = kzalloc(sizeof(struct msdos_sb_info), GFP_KERNEL);
1176 if (!sbi) 1182 if (!sbi)
1177 return -ENOMEM; 1183 return -ENOMEM;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index bfd776509a72..330a7d782591 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -12,7 +12,6 @@
12#include <linux/fdtable.h> 12#include <linux/fdtable.h>
13#include <linux/capability.h> 13#include <linux/capability.h>
14#include <linux/dnotify.h> 14#include <linux/dnotify.h>
15#include <linux/smp_lock.h>
16#include <linux/slab.h> 15#include <linux/slab.h>
17#include <linux/module.h> 16#include <linux/module.h>
18#include <linux/security.h> 17#include <linux/security.h>
@@ -227,7 +226,6 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
227 if (error) 226 if (error)
228 return error; 227 return error;
229 228
230 lock_kernel();
231 if ((arg ^ filp->f_flags) & FASYNC) { 229 if ((arg ^ filp->f_flags) & FASYNC) {
232 if (filp->f_op && filp->f_op->fasync) { 230 if (filp->f_op && filp->f_op->fasync) {
233 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); 231 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
@@ -238,7 +236,6 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
238 236
239 filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); 237 filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
240 out: 238 out:
241 unlock_kernel();
242 return error; 239 return error;
243} 240}
244 241
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index e1b7d525a066..24dd59450088 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -62,11 +62,11 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
62 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, 62 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
63 &i_gh); 63 &i_gh);
64 if (!error) { 64 if (!error) {
65 error = remote_llseek(file, offset, origin); 65 error = generic_file_llseek_unlocked(file, offset, origin);
66 gfs2_glock_dq_uninit(&i_gh); 66 gfs2_glock_dq_uninit(&i_gh);
67 } 67 }
68 } else 68 } else
69 error = remote_llseek(file, offset, origin); 69 error = generic_file_llseek_unlocked(file, offset, origin);
70 70
71 return error; 71 return error;
72} 72}
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 05ff4f1d7026..1f7f2956412a 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -214,7 +214,7 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry,
214 214
215 dentry->d_op = &msdos_dentry_operations; 215 dentry->d_op = &msdos_dentry_operations;
216 216
217 lock_kernel(); 217 lock_super(sb);
218 res = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); 218 res = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo);
219 if (res == -ENOENT) 219 if (res == -ENOENT)
220 goto add; 220 goto add;
@@ -232,7 +232,7 @@ add:
232 if (dentry) 232 if (dentry)
233 dentry->d_op = &msdos_dentry_operations; 233 dentry->d_op = &msdos_dentry_operations;
234out: 234out:
235 unlock_kernel(); 235 unlock_super(sb);
236 if (!res) 236 if (!res)
237 return dentry; 237 return dentry;
238 return ERR_PTR(res); 238 return ERR_PTR(res);
@@ -286,7 +286,7 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, int mode,
286 unsigned char msdos_name[MSDOS_NAME]; 286 unsigned char msdos_name[MSDOS_NAME];
287 int err, is_hid; 287 int err, is_hid;
288 288
289 lock_kernel(); 289 lock_super(sb);
290 290
291 err = msdos_format_name(dentry->d_name.name, dentry->d_name.len, 291 err = msdos_format_name(dentry->d_name.name, dentry->d_name.len,
292 msdos_name, &MSDOS_SB(sb)->options); 292 msdos_name, &MSDOS_SB(sb)->options);
@@ -315,7 +315,7 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, int mode,
315 315
316 d_instantiate(dentry, inode); 316 d_instantiate(dentry, inode);
317out: 317out:
318 unlock_kernel(); 318 unlock_super(sb);
319 if (!err) 319 if (!err)
320 err = fat_flush_inodes(sb, dir, inode); 320 err = fat_flush_inodes(sb, dir, inode);
321 return err; 321 return err;
@@ -324,11 +324,12 @@ out:
324/***** Remove a directory */ 324/***** Remove a directory */
325static int msdos_rmdir(struct inode *dir, struct dentry *dentry) 325static int msdos_rmdir(struct inode *dir, struct dentry *dentry)
326{ 326{
327 struct super_block *sb = dir->i_sb;
327 struct inode *inode = dentry->d_inode; 328 struct inode *inode = dentry->d_inode;
328 struct fat_slot_info sinfo; 329 struct fat_slot_info sinfo;
329 int err; 330 int err;
330 331
331 lock_kernel(); 332 lock_super(sb);
332 /* 333 /*
333 * Check whether the directory is not in use, then check 334 * Check whether the directory is not in use, then check
334 * whether it is empty. 335 * whether it is empty.
@@ -349,9 +350,9 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry)
349 inode->i_ctime = CURRENT_TIME_SEC; 350 inode->i_ctime = CURRENT_TIME_SEC;
350 fat_detach(inode); 351 fat_detach(inode);
351out: 352out:
352 unlock_kernel(); 353 unlock_super(sb);
353 if (!err) 354 if (!err)
354 err = fat_flush_inodes(inode->i_sb, dir, inode); 355 err = fat_flush_inodes(sb, dir, inode);
355 356
356 return err; 357 return err;
357} 358}
@@ -366,7 +367,7 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode)
366 struct timespec ts; 367 struct timespec ts;
367 int err, is_hid, cluster; 368 int err, is_hid, cluster;
368 369
369 lock_kernel(); 370 lock_super(sb);
370 371
371 err = msdos_format_name(dentry->d_name.name, dentry->d_name.len, 372 err = msdos_format_name(dentry->d_name.name, dentry->d_name.len,
372 msdos_name, &MSDOS_SB(sb)->options); 373 msdos_name, &MSDOS_SB(sb)->options);
@@ -404,14 +405,14 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode)
404 405
405 d_instantiate(dentry, inode); 406 d_instantiate(dentry, inode);
406 407
407 unlock_kernel(); 408 unlock_super(sb);
408 fat_flush_inodes(sb, dir, inode); 409 fat_flush_inodes(sb, dir, inode);
409 return 0; 410 return 0;
410 411
411out_free: 412out_free:
412 fat_free_clusters(dir, cluster); 413 fat_free_clusters(dir, cluster);
413out: 414out:
414 unlock_kernel(); 415 unlock_super(sb);
415 return err; 416 return err;
416} 417}
417 418
@@ -419,10 +420,11 @@ out:
419static int msdos_unlink(struct inode *dir, struct dentry *dentry) 420static int msdos_unlink(struct inode *dir, struct dentry *dentry)
420{ 421{
421 struct inode *inode = dentry->d_inode; 422 struct inode *inode = dentry->d_inode;
423 struct super_block *sb= inode->i_sb;
422 struct fat_slot_info sinfo; 424 struct fat_slot_info sinfo;
423 int err; 425 int err;
424 426
425 lock_kernel(); 427 lock_super(sb);
426 err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); 428 err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo);
427 if (err) 429 if (err)
428 goto out; 430 goto out;
@@ -434,9 +436,9 @@ static int msdos_unlink(struct inode *dir, struct dentry *dentry)
434 inode->i_ctime = CURRENT_TIME_SEC; 436 inode->i_ctime = CURRENT_TIME_SEC;
435 fat_detach(inode); 437 fat_detach(inode);
436out: 438out:
437 unlock_kernel(); 439 unlock_super(sb);
438 if (!err) 440 if (!err)
439 err = fat_flush_inodes(inode->i_sb, dir, inode); 441 err = fat_flush_inodes(sb, dir, inode);
440 442
441 return err; 443 return err;
442} 444}
@@ -618,10 +620,11 @@ error_inode:
618static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry, 620static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry,
619 struct inode *new_dir, struct dentry *new_dentry) 621 struct inode *new_dir, struct dentry *new_dentry)
620{ 622{
623 struct super_block *sb = old_dir->i_sb;
621 unsigned char old_msdos_name[MSDOS_NAME], new_msdos_name[MSDOS_NAME]; 624 unsigned char old_msdos_name[MSDOS_NAME], new_msdos_name[MSDOS_NAME];
622 int err, is_hid; 625 int err, is_hid;
623 626
624 lock_kernel(); 627 lock_super(sb);
625 628
626 err = msdos_format_name(old_dentry->d_name.name, 629 err = msdos_format_name(old_dentry->d_name.name,
627 old_dentry->d_name.len, old_msdos_name, 630 old_dentry->d_name.len, old_msdos_name,
@@ -640,9 +643,9 @@ static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry,
640 err = do_msdos_rename(old_dir, old_msdos_name, old_dentry, 643 err = do_msdos_rename(old_dir, old_msdos_name, old_dentry,
641 new_dir, new_msdos_name, new_dentry, is_hid); 644 new_dir, new_msdos_name, new_dentry, is_hid);
642out: 645out:
643 unlock_kernel(); 646 unlock_super(sb);
644 if (!err) 647 if (!err)
645 err = fat_flush_inodes(old_dir->i_sb, old_dir, new_dir); 648 err = fat_flush_inodes(sb, old_dir, new_dir);
646 return err; 649 return err;
647} 650}
648 651
diff --git a/fs/namespace.c b/fs/namespace.c
index 4fc302c2a0e0..4f6f7635b59c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -750,7 +750,7 @@ struct proc_fs_info {
750 const char *str; 750 const char *str;
751}; 751};
752 752
753static void show_sb_opts(struct seq_file *m, struct super_block *sb) 753static int show_sb_opts(struct seq_file *m, struct super_block *sb)
754{ 754{
755 static const struct proc_fs_info fs_info[] = { 755 static const struct proc_fs_info fs_info[] = {
756 { MS_SYNCHRONOUS, ",sync" }, 756 { MS_SYNCHRONOUS, ",sync" },
@@ -764,6 +764,8 @@ static void show_sb_opts(struct seq_file *m, struct super_block *sb)
764 if (sb->s_flags & fs_infop->flag) 764 if (sb->s_flags & fs_infop->flag)
765 seq_puts(m, fs_infop->str); 765 seq_puts(m, fs_infop->str);
766 } 766 }
767
768 return security_sb_show_options(m, sb);
767} 769}
768 770
769static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt) 771static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
@@ -806,11 +808,14 @@ static int show_vfsmnt(struct seq_file *m, void *v)
806 seq_putc(m, ' '); 808 seq_putc(m, ' ');
807 show_type(m, mnt->mnt_sb); 809 show_type(m, mnt->mnt_sb);
808 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw"); 810 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
809 show_sb_opts(m, mnt->mnt_sb); 811 err = show_sb_opts(m, mnt->mnt_sb);
812 if (err)
813 goto out;
810 show_mnt_opts(m, mnt); 814 show_mnt_opts(m, mnt);
811 if (mnt->mnt_sb->s_op->show_options) 815 if (mnt->mnt_sb->s_op->show_options)
812 err = mnt->mnt_sb->s_op->show_options(m, mnt); 816 err = mnt->mnt_sb->s_op->show_options(m, mnt);
813 seq_puts(m, " 0 0\n"); 817 seq_puts(m, " 0 0\n");
818out:
814 return err; 819 return err;
815} 820}
816 821
@@ -865,10 +870,13 @@ static int show_mountinfo(struct seq_file *m, void *v)
865 seq_putc(m, ' '); 870 seq_putc(m, ' ');
866 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); 871 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
867 seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw"); 872 seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw");
868 show_sb_opts(m, sb); 873 err = show_sb_opts(m, sb);
874 if (err)
875 goto out;
869 if (sb->s_op->show_options) 876 if (sb->s_op->show_options)
870 err = sb->s_op->show_options(m, mnt); 877 err = sb->s_op->show_options(m, mnt);
871 seq_putc(m, '\n'); 878 seq_putc(m, '\n');
879out:
872 return err; 880 return err;
873} 881}
874 882
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 2b145de45b39..6a7d901f1936 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -18,6 +18,7 @@
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/vmalloc.h> 19#include <linux/vmalloc.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/smp_lock.h>
21 22
22#include <linux/ncp_fs.h> 23#include <linux/ncp_fs.h>
23#include "ncplib_kernel.h" 24#include "ncplib_kernel.h"
@@ -281,9 +282,18 @@ static int ncp_release(struct inode *inode, struct file *file) {
281 return 0; 282 return 0;
282} 283}
283 284
285static loff_t ncp_remote_llseek(struct file *file, loff_t offset, int origin)
286{
287 loff_t ret;
288 lock_kernel();
289 ret = generic_file_llseek_unlocked(file, offset, origin);
290 unlock_kernel();
291 return ret;
292}
293
284const struct file_operations ncp_file_operations = 294const struct file_operations ncp_file_operations =
285{ 295{
286 .llseek = remote_llseek, 296 .llseek = ncp_remote_llseek,
287 .read = ncp_file_read, 297 .read = ncp_file_read,
288 .write = ncp_file_write, 298 .write = ncp_file_write,
289 .ioctl = ncp_ioctl, 299 .ioctl = ncp_ioctl,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index d84a3d8f32af..4e98a56a1777 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -170,6 +170,7 @@ force_reval:
170 170
171static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) 171static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
172{ 172{
173 loff_t loff;
173 /* origin == SEEK_END => we must revalidate the cached file length */ 174 /* origin == SEEK_END => we must revalidate the cached file length */
174 if (origin == SEEK_END) { 175 if (origin == SEEK_END) {
175 struct inode *inode = filp->f_mapping->host; 176 struct inode *inode = filp->f_mapping->host;
@@ -177,7 +178,10 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
177 if (retval < 0) 178 if (retval < 0)
178 return (loff_t)retval; 179 return (loff_t)retval;
179 } 180 }
180 return remote_llseek(filp, offset, origin); 181 lock_kernel(); /* BKL needed? */
182 loff = generic_file_llseek_unlocked(filp, offset, origin);
183 unlock_kernel();
184 return loff;
181} 185}
182 186
183/* 187/*
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index c021280dd462..bd7e0f3acfc7 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -21,6 +21,7 @@
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/miscdevice.h> 22#include <linux/miscdevice.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/smp_lock.h>
24#include <linux/reboot.h> 25#include <linux/reboot.h>
25#include <asm/uaccess.h> 26#include <asm/uaccess.h>
26 27
@@ -619,10 +620,12 @@ static int ocfs2_control_open(struct inode *inode, struct file *file)
619 return -ENOMEM; 620 return -ENOMEM;
620 p->op_this_node = -1; 621 p->op_this_node = -1;
621 622
623 lock_kernel();
622 mutex_lock(&ocfs2_control_lock); 624 mutex_lock(&ocfs2_control_lock);
623 file->private_data = p; 625 file->private_data = p;
624 list_add(&p->op_list, &ocfs2_control_private_list); 626 list_add(&p->op_list, &ocfs2_control_private_list);
625 mutex_unlock(&ocfs2_control_lock); 627 mutex_unlock(&ocfs2_control_lock);
628 unlock_kernel();
626 629
627 return 0; 630 return 0;
628} 631}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3b455371e7ff..58c3e6a8e15e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -233,7 +233,7 @@ static int check_mem_permission(struct task_struct *task)
233 */ 233 */
234 if (task->parent == current && (task->ptrace & PT_PTRACED) && 234 if (task->parent == current && (task->ptrace & PT_PTRACED) &&
235 task_is_stopped_or_traced(task) && 235 task_is_stopped_or_traced(task) &&
236 ptrace_may_attach(task)) 236 ptrace_may_access(task, PTRACE_MODE_ATTACH))
237 return 0; 237 return 0;
238 238
239 /* 239 /*
@@ -251,7 +251,8 @@ struct mm_struct *mm_for_maps(struct task_struct *task)
251 task_lock(task); 251 task_lock(task);
252 if (task->mm != mm) 252 if (task->mm != mm)
253 goto out; 253 goto out;
254 if (task->mm != current->mm && __ptrace_may_attach(task) < 0) 254 if (task->mm != current->mm &&
255 __ptrace_may_access(task, PTRACE_MODE_READ) < 0)
255 goto out; 256 goto out;
256 task_unlock(task); 257 task_unlock(task);
257 return mm; 258 return mm;
@@ -518,7 +519,7 @@ static int proc_fd_access_allowed(struct inode *inode)
518 */ 519 */
519 task = get_proc_task(inode); 520 task = get_proc_task(inode);
520 if (task) { 521 if (task) {
521 allowed = ptrace_may_attach(task); 522 allowed = ptrace_may_access(task, PTRACE_MODE_READ);
522 put_task_struct(task); 523 put_task_struct(task);
523 } 524 }
524 return allowed; 525 return allowed;
@@ -904,7 +905,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
904 if (!task) 905 if (!task)
905 goto out_no_task; 906 goto out_no_task;
906 907
907 if (!ptrace_may_attach(task)) 908 if (!ptrace_may_access(task, PTRACE_MODE_READ))
908 goto out; 909 goto out;
909 910
910 ret = -ENOMEM; 911 ret = -ENOMEM;
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 7e277f2ad466..c652d469dc08 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -123,6 +123,11 @@ static int uptime_read_proc(char *page, char **start, off_t off,
123 return proc_calc_metrics(page, start, off, count, eof, len); 123 return proc_calc_metrics(page, start, off, count, eof, len);
124} 124}
125 125
126int __attribute__((weak)) arch_report_meminfo(char *page)
127{
128 return 0;
129}
130
126static int meminfo_read_proc(char *page, char **start, off_t off, 131static int meminfo_read_proc(char *page, char **start, off_t off,
127 int count, int *eof, void *data) 132 int count, int *eof, void *data)
128{ 133{
@@ -221,6 +226,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
221 226
222 len += hugetlb_report_meminfo(page + len); 227 len += hugetlb_report_meminfo(page + len);
223 228
229 len += arch_report_meminfo(page + len);
230
224 return proc_calc_metrics(page, start, off, count, eof, len); 231 return proc_calc_metrics(page, start, off, count, eof, len);
225#undef K 232#undef K
226} 233}
@@ -472,6 +479,13 @@ static const struct file_operations proc_vmalloc_operations = {
472}; 479};
473#endif 480#endif
474 481
482#ifndef arch_irq_stat_cpu
483#define arch_irq_stat_cpu(cpu) 0
484#endif
485#ifndef arch_irq_stat
486#define arch_irq_stat() 0
487#endif
488
475static int show_stat(struct seq_file *p, void *v) 489static int show_stat(struct seq_file *p, void *v)
476{ 490{
477 int i; 491 int i;
@@ -509,7 +523,9 @@ static int show_stat(struct seq_file *p, void *v)
509 sum += temp; 523 sum += temp;
510 per_irq_sum[j] += temp; 524 per_irq_sum[j] += temp;
511 } 525 }
526 sum += arch_irq_stat_cpu(i);
512 } 527 }
528 sum += arch_irq_stat();
513 529
514 seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", 530 seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
515 (unsigned long long)cputime64_to_clock_t(user), 531 (unsigned long long)cputime64_to_clock_t(user),
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index c492449f3b45..164bd9f9ede3 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -210,7 +210,7 @@ static int show_map(struct seq_file *m, void *v)
210 dev_t dev = 0; 210 dev_t dev = 0;
211 int len; 211 int len;
212 212
213 if (maps_protect && !ptrace_may_attach(task)) 213 if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ))
214 return -EACCES; 214 return -EACCES;
215 215
216 if (file) { 216 if (file) {
@@ -646,7 +646,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
646 goto out; 646 goto out;
647 647
648 ret = -EACCES; 648 ret = -EACCES;
649 if (!ptrace_may_attach(task)) 649 if (!ptrace_may_access(task, PTRACE_MODE_READ))
650 goto out_task; 650 goto out_task;
651 651
652 ret = -EINVAL; 652 ret = -EINVAL;
@@ -747,7 +747,7 @@ static int show_numa_map_checked(struct seq_file *m, void *v)
747 struct proc_maps_private *priv = m->private; 747 struct proc_maps_private *priv = m->private;
748 struct task_struct *task = priv->task; 748 struct task_struct *task = priv->task;
749 749
750 if (maps_protect && !ptrace_may_attach(task)) 750 if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ))
751 return -EACCES; 751 return -EACCES;
752 752
753 return show_numa_map(m, v); 753 return show_numa_map(m, v);
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 4b4f9cc2f186..5d84e7121df8 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -113,7 +113,7 @@ static int show_map(struct seq_file *m, void *_vml)
113 struct proc_maps_private *priv = m->private; 113 struct proc_maps_private *priv = m->private;
114 struct task_struct *task = priv->task; 114 struct task_struct *task = priv->task;
115 115
116 if (maps_protect && !ptrace_may_attach(task)) 116 if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ))
117 return -EACCES; 117 return -EACCES;
118 118
119 return nommu_vma_show(m, vml->vma); 119 return nommu_vma_show(m, vml->vma);
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 9590b9024300..78f613cb9c76 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -45,6 +45,7 @@ const struct file_operations ramfs_file_operations = {
45 .mmap = generic_file_mmap, 45 .mmap = generic_file_mmap,
46 .fsync = simple_sync_file, 46 .fsync = simple_sync_file,
47 .splice_read = generic_file_splice_read, 47 .splice_read = generic_file_splice_read,
48 .splice_write = generic_file_splice_write,
48 .llseek = generic_file_llseek, 49 .llseek = generic_file_llseek,
49}; 50};
50 51
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 0989bc2c2f69..52312ec93ff4 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -43,6 +43,7 @@ const struct file_operations ramfs_file_operations = {
43 .aio_write = generic_file_aio_write, 43 .aio_write = generic_file_aio_write,
44 .fsync = simple_sync_file, 44 .fsync = simple_sync_file,
45 .splice_read = generic_file_splice_read, 45 .splice_read = generic_file_splice_read,
46 .splice_write = generic_file_splice_write,
46 .llseek = generic_file_llseek, 47 .llseek = generic_file_llseek,
47}; 48};
48 49
diff --git a/fs/read_write.c b/fs/read_write.c
index f0d1240a5c69..9ba495d5a29b 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -31,12 +31,12 @@ const struct file_operations generic_ro_fops = {
31 31
32EXPORT_SYMBOL(generic_ro_fops); 32EXPORT_SYMBOL(generic_ro_fops);
33 33
34loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) 34loff_t
35generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
35{ 36{
36 loff_t retval; 37 loff_t retval;
37 struct inode *inode = file->f_mapping->host; 38 struct inode *inode = file->f_mapping->host;
38 39
39 mutex_lock(&inode->i_mutex);
40 switch (origin) { 40 switch (origin) {
41 case SEEK_END: 41 case SEEK_END:
42 offset += inode->i_size; 42 offset += inode->i_size;
@@ -46,42 +46,26 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
46 } 46 }
47 retval = -EINVAL; 47 retval = -EINVAL;
48 if (offset>=0 && offset<=inode->i_sb->s_maxbytes) { 48 if (offset>=0 && offset<=inode->i_sb->s_maxbytes) {
49 /* Special lock needed here? */
49 if (offset != file->f_pos) { 50 if (offset != file->f_pos) {
50 file->f_pos = offset; 51 file->f_pos = offset;
51 file->f_version = 0; 52 file->f_version = 0;
52 } 53 }
53 retval = offset; 54 retval = offset;
54 } 55 }
55 mutex_unlock(&inode->i_mutex);
56 return retval; 56 return retval;
57} 57}
58EXPORT_SYMBOL(generic_file_llseek_unlocked);
58 59
59EXPORT_SYMBOL(generic_file_llseek); 60loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
60
61loff_t remote_llseek(struct file *file, loff_t offset, int origin)
62{ 61{
63 loff_t retval; 62 loff_t n;
64 63 mutex_lock(&file->f_dentry->d_inode->i_mutex);
65 lock_kernel(); 64 n = generic_file_llseek_unlocked(file, offset, origin);
66 switch (origin) { 65 mutex_unlock(&file->f_dentry->d_inode->i_mutex);
67 case SEEK_END: 66 return n;
68 offset += i_size_read(file->f_path.dentry->d_inode);
69 break;
70 case SEEK_CUR:
71 offset += file->f_pos;
72 }
73 retval = -EINVAL;
74 if (offset>=0 && offset<=file->f_path.dentry->d_inode->i_sb->s_maxbytes) {
75 if (offset != file->f_pos) {
76 file->f_pos = offset;
77 file->f_version = 0;
78 }
79 retval = offset;
80 }
81 unlock_kernel();
82 return retval;
83} 67}
84EXPORT_SYMBOL(remote_llseek); 68EXPORT_SYMBOL(generic_file_llseek);
85 69
86loff_t no_llseek(struct file *file, loff_t offset, int origin) 70loff_t no_llseek(struct file *file, loff_t offset, int origin)
87{ 71{
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index efbe29af3d7a..2294783320cb 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -422,9 +422,18 @@ smb_file_permission(struct inode *inode, int mask, struct nameidata *nd)
422 return error; 422 return error;
423} 423}
424 424
425static loff_t smb_remote_llseek(struct file *file, loff_t offset, int origin)
426{
427 loff_t ret;
428 lock_kernel();
429 ret = generic_file_llseek_unlocked(file, offset, origin);
430 unlock_kernel();
431 return ret;
432}
433
425const struct file_operations smb_file_operations = 434const struct file_operations smb_file_operations =
426{ 435{
427 .llseek = remote_llseek, 436 .llseek = smb_remote_llseek,
428 .read = do_sync_read, 437 .read = do_sync_read,
429 .aio_read = smb_file_aio_read, 438 .aio_read = smb_file_aio_read,
430 .write = do_sync_write, 439 .write = do_sync_write,
diff --git a/fs/splice.c b/fs/splice.c
index aa5f6f60b305..399442179d89 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -379,13 +379,22 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
379 lock_page(page); 379 lock_page(page);
380 380
381 /* 381 /*
382 * page was truncated, stop here. if this isn't the 382 * Page was truncated, or invalidated by the
383 * first page, we'll just complete what we already 383 * filesystem. Redo the find/create, but this time the
384 * added 384 * page is kept locked, so there's no chance of another
385 * race with truncate/invalidate.
385 */ 386 */
386 if (!page->mapping) { 387 if (!page->mapping) {
387 unlock_page(page); 388 unlock_page(page);
388 break; 389 page = find_or_create_page(mapping, index,
390 mapping_gfp_mask(mapping));
391
392 if (!page) {
393 error = -ENOMEM;
394 break;
395 }
396 page_cache_release(pages[page_nr]);
397 pages[page_nr] = page;
389 } 398 }
390 /* 399 /*
391 * page was already under io and is now done, great 400 * page was already under io and is now done, great
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index a3522727ea5b..b546ba69be82 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -645,7 +645,7 @@ static int vfat_add_entry(struct inode *dir, struct qstr *qname, int is_dir,
645 if (len == 0) 645 if (len == 0)
646 return -ENOENT; 646 return -ENOENT;
647 647
648 slots = kmalloc(sizeof(*slots) * MSDOS_SLOTS, GFP_KERNEL); 648 slots = kmalloc(sizeof(*slots) * MSDOS_SLOTS, GFP_NOFS);
649 if (slots == NULL) 649 if (slots == NULL)
650 return -ENOMEM; 650 return -ENOMEM;
651 651
@@ -687,7 +687,7 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
687 struct dentry *alias; 687 struct dentry *alias;
688 int err, table; 688 int err, table;
689 689
690 lock_kernel(); 690 lock_super(sb);
691 table = (MSDOS_SB(sb)->options.name_check == 's') ? 2 : 0; 691 table = (MSDOS_SB(sb)->options.name_check == 's') ? 2 : 0;
692 dentry->d_op = &vfat_dentry_ops[table]; 692 dentry->d_op = &vfat_dentry_ops[table];
693 693
@@ -699,7 +699,7 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
699 inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); 699 inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos);
700 brelse(sinfo.bh); 700 brelse(sinfo.bh);
701 if (IS_ERR(inode)) { 701 if (IS_ERR(inode)) {
702 unlock_kernel(); 702 unlock_super(sb);
703 return ERR_CAST(inode); 703 return ERR_CAST(inode);
704 } 704 }
705 alias = d_find_alias(inode); 705 alias = d_find_alias(inode);
@@ -708,13 +708,13 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
708 dput(alias); 708 dput(alias);
709 else { 709 else {
710 iput(inode); 710 iput(inode);
711 unlock_kernel(); 711 unlock_super(sb);
712 return alias; 712 return alias;
713 } 713 }
714 714
715 } 715 }
716error: 716error:
717 unlock_kernel(); 717 unlock_super(sb);
718 dentry->d_op = &vfat_dentry_ops[table]; 718 dentry->d_op = &vfat_dentry_ops[table];
719 dentry->d_time = dentry->d_parent->d_inode->i_version; 719 dentry->d_time = dentry->d_parent->d_inode->i_version;
720 dentry = d_splice_alias(inode, dentry); 720 dentry = d_splice_alias(inode, dentry);
@@ -734,7 +734,7 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, int mode,
734 struct timespec ts; 734 struct timespec ts;
735 int err; 735 int err;
736 736
737 lock_kernel(); 737 lock_super(sb);
738 738
739 ts = CURRENT_TIME_SEC; 739 ts = CURRENT_TIME_SEC;
740 err = vfat_add_entry(dir, &dentry->d_name, 0, 0, &ts, &sinfo); 740 err = vfat_add_entry(dir, &dentry->d_name, 0, 0, &ts, &sinfo);
@@ -755,17 +755,18 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, int mode,
755 dentry->d_time = dentry->d_parent->d_inode->i_version; 755 dentry->d_time = dentry->d_parent->d_inode->i_version;
756 d_instantiate(dentry, inode); 756 d_instantiate(dentry, inode);
757out: 757out:
758 unlock_kernel(); 758 unlock_super(sb);
759 return err; 759 return err;
760} 760}
761 761
762static int vfat_rmdir(struct inode *dir, struct dentry *dentry) 762static int vfat_rmdir(struct inode *dir, struct dentry *dentry)
763{ 763{
764 struct inode *inode = dentry->d_inode; 764 struct inode *inode = dentry->d_inode;
765 struct super_block *sb = dir->i_sb;
765 struct fat_slot_info sinfo; 766 struct fat_slot_info sinfo;
766 int err; 767 int err;
767 768
768 lock_kernel(); 769 lock_super(sb);
769 770
770 err = fat_dir_empty(inode); 771 err = fat_dir_empty(inode);
771 if (err) 772 if (err)
@@ -783,7 +784,7 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry)
783 inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; 784 inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC;
784 fat_detach(inode); 785 fat_detach(inode);
785out: 786out:
786 unlock_kernel(); 787 unlock_super(sb);
787 788
788 return err; 789 return err;
789} 790}
@@ -791,10 +792,11 @@ out:
791static int vfat_unlink(struct inode *dir, struct dentry *dentry) 792static int vfat_unlink(struct inode *dir, struct dentry *dentry)
792{ 793{
793 struct inode *inode = dentry->d_inode; 794 struct inode *inode = dentry->d_inode;
795 struct super_block *sb = dir->i_sb;
794 struct fat_slot_info sinfo; 796 struct fat_slot_info sinfo;
795 int err; 797 int err;
796 798
797 lock_kernel(); 799 lock_super(sb);
798 800
799 err = vfat_find(dir, &dentry->d_name, &sinfo); 801 err = vfat_find(dir, &dentry->d_name, &sinfo);
800 if (err) 802 if (err)
@@ -807,7 +809,7 @@ static int vfat_unlink(struct inode *dir, struct dentry *dentry)
807 inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; 809 inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC;
808 fat_detach(inode); 810 fat_detach(inode);
809out: 811out:
810 unlock_kernel(); 812 unlock_super(sb);
811 813
812 return err; 814 return err;
813} 815}
@@ -820,7 +822,7 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, int mode)
820 struct timespec ts; 822 struct timespec ts;
821 int err, cluster; 823 int err, cluster;
822 824
823 lock_kernel(); 825 lock_super(sb);
824 826
825 ts = CURRENT_TIME_SEC; 827 ts = CURRENT_TIME_SEC;
826 cluster = fat_alloc_new_dir(dir, &ts); 828 cluster = fat_alloc_new_dir(dir, &ts);
@@ -849,13 +851,13 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, int mode)
849 dentry->d_time = dentry->d_parent->d_inode->i_version; 851 dentry->d_time = dentry->d_parent->d_inode->i_version;
850 d_instantiate(dentry, inode); 852 d_instantiate(dentry, inode);
851 853
852 unlock_kernel(); 854 unlock_super(sb);
853 return 0; 855 return 0;
854 856
855out_free: 857out_free:
856 fat_free_clusters(dir, cluster); 858 fat_free_clusters(dir, cluster);
857out: 859out:
858 unlock_kernel(); 860 unlock_super(sb);
859 return err; 861 return err;
860} 862}
861 863
@@ -869,11 +871,12 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry,
869 struct timespec ts; 871 struct timespec ts;
870 loff_t dotdot_i_pos, new_i_pos; 872 loff_t dotdot_i_pos, new_i_pos;
871 int err, is_dir, update_dotdot, corrupt = 0; 873 int err, is_dir, update_dotdot, corrupt = 0;
874 struct super_block *sb = old_dir->i_sb;
872 875
873 old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; 876 old_sinfo.bh = sinfo.bh = dotdot_bh = NULL;
874 old_inode = old_dentry->d_inode; 877 old_inode = old_dentry->d_inode;
875 new_inode = new_dentry->d_inode; 878 new_inode = new_dentry->d_inode;
876 lock_kernel(); 879 lock_super(sb);
877 err = vfat_find(old_dir, &old_dentry->d_name, &old_sinfo); 880 err = vfat_find(old_dir, &old_dentry->d_name, &old_sinfo);
878 if (err) 881 if (err)
879 goto out; 882 goto out;
@@ -951,7 +954,7 @@ out:
951 brelse(sinfo.bh); 954 brelse(sinfo.bh);
952 brelse(dotdot_bh); 955 brelse(dotdot_bh);
953 brelse(old_sinfo.bh); 956 brelse(old_sinfo.bh);
954 unlock_kernel(); 957 unlock_super(sb);
955 958
956 return err; 959 return err;
957 960