aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/adfs/inode.c14
-rw-r--r--fs/affs/file.c101
-rw-r--r--fs/bfs/file.c12
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/binfmt_elf_fdpic.c2
-rw-r--r--fs/block_dev.c24
-rw-r--r--fs/buffer.c723
-rw-r--r--fs/configfs/inode.c4
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/debugfs/file.c41
-rw-r--r--fs/direct-io.c4
-rw-r--r--fs/ecryptfs/Makefile2
-rw-r--r--fs/ecryptfs/crypto.c989
-rw-r--r--fs/ecryptfs/debug.c2
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h183
-rw-r--r--fs/ecryptfs/file.c97
-rw-r--r--fs/ecryptfs/inode.c231
-rw-r--r--fs/ecryptfs/keystore.c1078
-rw-r--r--fs/ecryptfs/main.c260
-rw-r--r--fs/ecryptfs/messaging.c5
-rw-r--r--fs/ecryptfs/mmap.c704
-rw-r--r--fs/ecryptfs/read_write.c358
-rw-r--r--fs/ecryptfs/super.c39
-rw-r--r--fs/ext2/dir.c55
-rw-r--r--fs/ext2/ext2.h3
-rw-r--r--fs/ext2/inode.c40
-rw-r--r--fs/ext3/dir.c2
-rw-r--r--fs/ext3/inode.c163
-rw-r--r--fs/ext4/dir.c2
-rw-r--r--fs/ext4/inode.c174
-rw-r--r--fs/fat/inode.c27
-rw-r--r--fs/fuse/file.c46
-rw-r--r--fs/gfs2/ops_address.c211
-rw-r--r--fs/hfs/extent.c19
-rw-r--r--fs/hfs/inode.c18
-rw-r--r--fs/hfsplus/extents.c21
-rw-r--r--fs/hfsplus/inode.c18
-rw-r--r--fs/hostfs/hostfs.h9
-rw-r--r--fs/hostfs/hostfs_kern.c301
-rw-r--r--fs/hostfs/hostfs_user.c141
-rw-r--r--fs/hpfs/file.c18
-rw-r--r--fs/hugetlbfs/inode.c149
-rw-r--r--fs/inode.c18
-rw-r--r--fs/jbd/journal.c4
-rw-r--r--fs/jbd/revoke.c6
-rw-r--r--fs/jffs2/file.c105
-rw-r--r--fs/jfs/inode.c16
-rw-r--r--fs/libfs.c44
-rw-r--r--fs/minix/dir.c49
-rw-r--r--fs/minix/inode.c23
-rw-r--r--fs/minix/minix.h3
-rw-r--r--fs/mpage.c10
-rw-r--r--fs/namei.c46
-rw-r--r--fs/nfs/file.c80
-rw-r--r--fs/nfsd/export.c17
-rw-r--r--fs/ocfs2/aops.c14
-rw-r--r--fs/ocfs2/aops.h8
-rw-r--r--fs/ocfs2/file.c266
-rw-r--r--fs/proc/base.c13
-rw-r--r--fs/proc/generic.c2
-rw-r--r--fs/proc/proc_misc.c14
-rw-r--r--fs/qnx4/inode.c19
-rw-r--r--fs/ramfs/file-mmu.c4
-rw-r--r--fs/ramfs/file-nommu.c4
-rw-r--r--fs/reiserfs/file.c1240
-rw-r--r--fs/reiserfs/inode.c187
-rw-r--r--fs/reiserfs/ioctl.c10
-rw-r--r--fs/reiserfs/xattr.c16
-rw-r--r--fs/smbfs/file.c32
-rw-r--r--fs/splice.c71
-rw-r--r--fs/sysfs/inode.c4
-rw-r--r--fs/sysv/dir.c50
-rw-r--r--fs/sysv/itree.c23
-rw-r--r--fs/sysv/sysv.h3
-rw-r--r--fs/udf/file.c35
-rw-r--r--fs/udf/inode.c13
-rw-r--r--fs/ufs/dir.c55
-rw-r--r--fs/ufs/inode.c23
-rw-r--r--fs/ufs/util.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c19
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c35
81 files changed, 4290 insertions, 4588 deletions
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 7e7a04be1278..e647200262a2 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -61,10 +61,14 @@ static int adfs_readpage(struct file *file, struct page *page)
61 return block_read_full_page(page, adfs_get_block); 61 return block_read_full_page(page, adfs_get_block);
62} 62}
63 63
64static int adfs_prepare_write(struct file *file, struct page *page, unsigned int from, unsigned int to) 64static int adfs_write_begin(struct file *file, struct address_space *mapping,
65 loff_t pos, unsigned len, unsigned flags,
66 struct page **pagep, void **fsdata)
65{ 67{
66 return cont_prepare_write(page, from, to, adfs_get_block, 68 *pagep = NULL;
67 &ADFS_I(page->mapping->host)->mmu_private); 69 return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
70 adfs_get_block,
71 &ADFS_I(mapping->host)->mmu_private);
68} 72}
69 73
70static sector_t _adfs_bmap(struct address_space *mapping, sector_t block) 74static sector_t _adfs_bmap(struct address_space *mapping, sector_t block)
@@ -76,8 +80,8 @@ static const struct address_space_operations adfs_aops = {
76 .readpage = adfs_readpage, 80 .readpage = adfs_readpage,
77 .writepage = adfs_writepage, 81 .writepage = adfs_writepage,
78 .sync_page = block_sync_page, 82 .sync_page = block_sync_page,
79 .prepare_write = adfs_prepare_write, 83 .write_begin = adfs_write_begin,
80 .commit_write = generic_commit_write, 84 .write_end = generic_write_end,
81 .bmap = _adfs_bmap 85 .bmap = _adfs_bmap
82}; 86};
83 87
diff --git a/fs/affs/file.c b/fs/affs/file.c
index c314a35f0918..6e0c9399200e 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -395,25 +395,33 @@ static int affs_writepage(struct page *page, struct writeback_control *wbc)
395{ 395{
396 return block_write_full_page(page, affs_get_block, wbc); 396 return block_write_full_page(page, affs_get_block, wbc);
397} 397}
398
398static int affs_readpage(struct file *file, struct page *page) 399static int affs_readpage(struct file *file, struct page *page)
399{ 400{
400 return block_read_full_page(page, affs_get_block); 401 return block_read_full_page(page, affs_get_block);
401} 402}
402static int affs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) 403
404static int affs_write_begin(struct file *file, struct address_space *mapping,
405 loff_t pos, unsigned len, unsigned flags,
406 struct page **pagep, void **fsdata)
403{ 407{
404 return cont_prepare_write(page, from, to, affs_get_block, 408 *pagep = NULL;
405 &AFFS_I(page->mapping->host)->mmu_private); 409 return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
410 affs_get_block,
411 &AFFS_I(mapping->host)->mmu_private);
406} 412}
413
407static sector_t _affs_bmap(struct address_space *mapping, sector_t block) 414static sector_t _affs_bmap(struct address_space *mapping, sector_t block)
408{ 415{
409 return generic_block_bmap(mapping,block,affs_get_block); 416 return generic_block_bmap(mapping,block,affs_get_block);
410} 417}
418
411const struct address_space_operations affs_aops = { 419const struct address_space_operations affs_aops = {
412 .readpage = affs_readpage, 420 .readpage = affs_readpage,
413 .writepage = affs_writepage, 421 .writepage = affs_writepage,
414 .sync_page = block_sync_page, 422 .sync_page = block_sync_page,
415 .prepare_write = affs_prepare_write, 423 .write_begin = affs_write_begin,
416 .commit_write = generic_commit_write, 424 .write_end = generic_write_end,
417 .bmap = _affs_bmap 425 .bmap = _affs_bmap
418}; 426};
419 427
@@ -603,54 +611,65 @@ affs_readpage_ofs(struct file *file, struct page *page)
603 return err; 611 return err;
604} 612}
605 613
606static int affs_prepare_write_ofs(struct file *file, struct page *page, unsigned from, unsigned to) 614static int affs_write_begin_ofs(struct file *file, struct address_space *mapping,
615 loff_t pos, unsigned len, unsigned flags,
616 struct page **pagep, void **fsdata)
607{ 617{
608 struct inode *inode = page->mapping->host; 618 struct inode *inode = mapping->host;
609 u32 size, offset; 619 struct page *page;
610 u32 tmp; 620 pgoff_t index;
611 int err = 0; 621 int err = 0;
612 622
613 pr_debug("AFFS: prepare_write(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to); 623 pr_debug("AFFS: write_begin(%u, %llu, %llu)\n", (u32)inode->i_ino, (unsigned long long)pos, (unsigned long long)pos + len);
614 offset = page->index << PAGE_CACHE_SHIFT; 624 if (pos > AFFS_I(inode)->mmu_private) {
615 if (offset + from > AFFS_I(inode)->mmu_private) { 625 /* XXX: this probably leaves a too-big i_size in case of
616 err = affs_extent_file_ofs(inode, offset + from); 626 * failure. Should really be updating i_size at write_end time
627 */
628 err = affs_extent_file_ofs(inode, pos);
617 if (err) 629 if (err)
618 return err; 630 return err;
619 } 631 }
620 size = inode->i_size; 632
633 index = pos >> PAGE_CACHE_SHIFT;
634 page = __grab_cache_page(mapping, index);
635 if (!page)
636 return -ENOMEM;
637 *pagep = page;
621 638
622 if (PageUptodate(page)) 639 if (PageUptodate(page))
623 return 0; 640 return 0;
624 641
625 if (from) { 642 /* XXX: inefficient but safe in the face of short writes */
626 err = affs_do_readpage_ofs(file, page, 0, from); 643 err = affs_do_readpage_ofs(file, page, 0, PAGE_CACHE_SIZE);
627 if (err) 644 if (err) {
628 return err; 645 unlock_page(page);
629 } 646 page_cache_release(page);
630 if (to < PAGE_CACHE_SIZE) {
631 zero_user_page(page, to, PAGE_CACHE_SIZE - to, KM_USER0);
632 if (size > offset + to) {
633 if (size < offset + PAGE_CACHE_SIZE)
634 tmp = size & ~PAGE_CACHE_MASK;
635 else
636 tmp = PAGE_CACHE_SIZE;
637 err = affs_do_readpage_ofs(file, page, to, tmp);
638 }
639 } 647 }
640 return err; 648 return err;
641} 649}
642 650
643static int affs_commit_write_ofs(struct file *file, struct page *page, unsigned from, unsigned to) 651static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
652 loff_t pos, unsigned len, unsigned copied,
653 struct page *page, void *fsdata)
644{ 654{
645 struct inode *inode = page->mapping->host; 655 struct inode *inode = mapping->host;
646 struct super_block *sb = inode->i_sb; 656 struct super_block *sb = inode->i_sb;
647 struct buffer_head *bh, *prev_bh; 657 struct buffer_head *bh, *prev_bh;
648 char *data; 658 char *data;
649 u32 bidx, boff, bsize; 659 u32 bidx, boff, bsize;
660 unsigned from, to;
650 u32 tmp; 661 u32 tmp;
651 int written; 662 int written;
652 663
653 pr_debug("AFFS: commit_write(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to); 664 from = pos & (PAGE_CACHE_SIZE - 1);
665 to = pos + len;
666 /*
667 * XXX: not sure if this can handle short copies (len < copied), but
668 * we don't have to, because the page should always be uptodate here,
669 * due to write_begin.
670 */
671
672 pr_debug("AFFS: write_begin(%u, %llu, %llu)\n", (u32)inode->i_ino, (unsigned long long)pos, (unsigned long long)pos + len);
654 bsize = AFFS_SB(sb)->s_data_blksize; 673 bsize = AFFS_SB(sb)->s_data_blksize;
655 data = page_address(page); 674 data = page_address(page);
656 675
@@ -748,6 +767,9 @@ done:
748 if (tmp > inode->i_size) 767 if (tmp > inode->i_size)
749 inode->i_size = AFFS_I(inode)->mmu_private = tmp; 768 inode->i_size = AFFS_I(inode)->mmu_private = tmp;
750 769
770 unlock_page(page);
771 page_cache_release(page);
772
751 return written; 773 return written;
752 774
753out: 775out:
@@ -761,8 +783,8 @@ const struct address_space_operations affs_aops_ofs = {
761 .readpage = affs_readpage_ofs, 783 .readpage = affs_readpage_ofs,
762 //.writepage = affs_writepage_ofs, 784 //.writepage = affs_writepage_ofs,
763 //.sync_page = affs_sync_page_ofs, 785 //.sync_page = affs_sync_page_ofs,
764 .prepare_write = affs_prepare_write_ofs, 786 .write_begin = affs_write_begin_ofs,
765 .commit_write = affs_commit_write_ofs 787 .write_end = affs_write_end_ofs
766}; 788};
767 789
768/* Free any preallocated blocks. */ 790/* Free any preallocated blocks. */
@@ -805,18 +827,13 @@ affs_truncate(struct inode *inode)
805 if (inode->i_size > AFFS_I(inode)->mmu_private) { 827 if (inode->i_size > AFFS_I(inode)->mmu_private) {
806 struct address_space *mapping = inode->i_mapping; 828 struct address_space *mapping = inode->i_mapping;
807 struct page *page; 829 struct page *page;
808 u32 size = inode->i_size - 1; 830 void *fsdata;
831 u32 size = inode->i_size;
809 int res; 832 int res;
810 833
811 page = grab_cache_page(mapping, size >> PAGE_CACHE_SHIFT); 834 res = mapping->a_ops->write_begin(NULL, mapping, size, 0, 0, &page, &fsdata);
812 if (!page)
813 return;
814 size = (size & (PAGE_CACHE_SIZE - 1)) + 1;
815 res = mapping->a_ops->prepare_write(NULL, page, size, size);
816 if (!res) 835 if (!res)
817 res = mapping->a_ops->commit_write(NULL, page, size, size); 836 res = mapping->a_ops->write_end(NULL, mapping, size, 0, 0, page, fsdata);
818 unlock_page(page);
819 page_cache_release(page);
820 mark_inode_dirty(inode); 837 mark_inode_dirty(inode);
821 return; 838 return;
822 } else if (inode->i_size == AFFS_I(inode)->mmu_private) 839 } else if (inode->i_size == AFFS_I(inode)->mmu_private)
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 24310e9ee05a..911b4ccf470f 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -145,9 +145,13 @@ static int bfs_readpage(struct file *file, struct page *page)
145 return block_read_full_page(page, bfs_get_block); 145 return block_read_full_page(page, bfs_get_block);
146} 146}
147 147
148static int bfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) 148static int bfs_write_begin(struct file *file, struct address_space *mapping,
149 loff_t pos, unsigned len, unsigned flags,
150 struct page **pagep, void **fsdata)
149{ 151{
150 return block_prepare_write(page, from, to, bfs_get_block); 152 *pagep = NULL;
153 return block_write_begin(file, mapping, pos, len, flags,
154 pagep, fsdata, bfs_get_block);
151} 155}
152 156
153static sector_t bfs_bmap(struct address_space *mapping, sector_t block) 157static sector_t bfs_bmap(struct address_space *mapping, sector_t block)
@@ -159,8 +163,8 @@ const struct address_space_operations bfs_aops = {
159 .readpage = bfs_readpage, 163 .readpage = bfs_readpage,
160 .writepage = bfs_writepage, 164 .writepage = bfs_writepage,
161 .sync_page = block_sync_page, 165 .sync_page = block_sync_page,
162 .prepare_write = bfs_prepare_write, 166 .write_begin = bfs_write_begin,
163 .commit_write = generic_commit_write, 167 .write_end = generic_write_end,
164 .bmap = bfs_bmap, 168 .bmap = bfs_bmap,
165}; 169};
166 170
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index b1013f34085d..f3037c645ca9 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1725,7 +1725,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1725 &page, &vma) <= 0) { 1725 &page, &vma) <= 0) {
1726 DUMP_SEEK(PAGE_SIZE); 1726 DUMP_SEEK(PAGE_SIZE);
1727 } else { 1727 } else {
1728 if (page == ZERO_PAGE(addr)) { 1728 if (page == ZERO_PAGE(0)) {
1729 if (!dump_seek(file, PAGE_SIZE)) { 1729 if (!dump_seek(file, PAGE_SIZE)) {
1730 page_cache_release(page); 1730 page_cache_release(page);
1731 goto end_coredump; 1731 goto end_coredump;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 2f5d8dbe676d..c5ca2f0aca7f 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1488,7 +1488,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
1488 &page, &vma) <= 0) { 1488 &page, &vma) <= 0) {
1489 DUMP_SEEK(file->f_pos + PAGE_SIZE); 1489 DUMP_SEEK(file->f_pos + PAGE_SIZE);
1490 } 1490 }
1491 else if (page == ZERO_PAGE(addr)) { 1491 else if (page == ZERO_PAGE(0)) {
1492 page_cache_release(page); 1492 page_cache_release(page);
1493 DUMP_SEEK(file->f_pos + PAGE_SIZE); 1493 DUMP_SEEK(file->f_pos + PAGE_SIZE);
1494 } 1494 }
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 6339a30879b7..379a446e243e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -378,14 +378,26 @@ static int blkdev_readpage(struct file * file, struct page * page)
378 return block_read_full_page(page, blkdev_get_block); 378 return block_read_full_page(page, blkdev_get_block);
379} 379}
380 380
381static int blkdev_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) 381static int blkdev_write_begin(struct file *file, struct address_space *mapping,
382 loff_t pos, unsigned len, unsigned flags,
383 struct page **pagep, void **fsdata)
382{ 384{
383 return block_prepare_write(page, from, to, blkdev_get_block); 385 *pagep = NULL;
386 return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
387 blkdev_get_block);
384} 388}
385 389
386static int blkdev_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) 390static int blkdev_write_end(struct file *file, struct address_space *mapping,
391 loff_t pos, unsigned len, unsigned copied,
392 struct page *page, void *fsdata)
387{ 393{
388 return block_commit_write(page, from, to); 394 int ret;
395 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
396
397 unlock_page(page);
398 page_cache_release(page);
399
400 return ret;
389} 401}
390 402
391/* 403/*
@@ -1327,8 +1339,8 @@ const struct address_space_operations def_blk_aops = {
1327 .readpage = blkdev_readpage, 1339 .readpage = blkdev_readpage,
1328 .writepage = blkdev_writepage, 1340 .writepage = blkdev_writepage,
1329 .sync_page = block_sync_page, 1341 .sync_page = block_sync_page,
1330 .prepare_write = blkdev_prepare_write, 1342 .write_begin = blkdev_write_begin,
1331 .commit_write = blkdev_commit_write, 1343 .write_end = blkdev_write_end,
1332 .writepages = generic_writepages, 1344 .writepages = generic_writepages,
1333 .direct_IO = blkdev_direct_IO, 1345 .direct_IO = blkdev_direct_IO,
1334}; 1346};
diff --git a/fs/buffer.c b/fs/buffer.c
index 75b51dfa5e03..faceb5eecca9 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -110,10 +110,14 @@ static void buffer_io_error(struct buffer_head *bh)
110} 110}
111 111
112/* 112/*
113 * Default synchronous end-of-IO handler.. Just mark it up-to-date and 113 * End-of-IO handler helper function which does not touch the bh after
114 * unlock the buffer. This is what ll_rw_block uses too. 114 * unlocking it.
115 * Note: unlock_buffer() sort-of does touch the bh after unlocking it, but
116 * a race there is benign: unlock_buffer() only use the bh's address for
117 * hashing after unlocking the buffer, so it doesn't actually touch the bh
118 * itself.
115 */ 119 */
116void end_buffer_read_sync(struct buffer_head *bh, int uptodate) 120static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
117{ 121{
118 if (uptodate) { 122 if (uptodate) {
119 set_buffer_uptodate(bh); 123 set_buffer_uptodate(bh);
@@ -122,6 +126,15 @@ void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
122 clear_buffer_uptodate(bh); 126 clear_buffer_uptodate(bh);
123 } 127 }
124 unlock_buffer(bh); 128 unlock_buffer(bh);
129}
130
131/*
132 * Default synchronous end-of-IO handler.. Just mark it up-to-date and
133 * unlock the buffer. This is what ll_rw_block uses too.
134 */
135void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
136{
137 __end_buffer_read_notouch(bh, uptodate);
125 put_bh(bh); 138 put_bh(bh);
126} 139}
127 140
@@ -1757,6 +1770,48 @@ recover:
1757 goto done; 1770 goto done;
1758} 1771}
1759 1772
1773/*
1774 * If a page has any new buffers, zero them out here, and mark them uptodate
1775 * and dirty so they'll be written out (in order to prevent uninitialised
1776 * block data from leaking). And clear the new bit.
1777 */
1778void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1779{
1780 unsigned int block_start, block_end;
1781 struct buffer_head *head, *bh;
1782
1783 BUG_ON(!PageLocked(page));
1784 if (!page_has_buffers(page))
1785 return;
1786
1787 bh = head = page_buffers(page);
1788 block_start = 0;
1789 do {
1790 block_end = block_start + bh->b_size;
1791
1792 if (buffer_new(bh)) {
1793 if (block_end > from && block_start < to) {
1794 if (!PageUptodate(page)) {
1795 unsigned start, size;
1796
1797 start = max(from, block_start);
1798 size = min(to, block_end) - start;
1799
1800 zero_user_page(page, start, size, KM_USER0);
1801 set_buffer_uptodate(bh);
1802 }
1803
1804 clear_buffer_new(bh);
1805 mark_buffer_dirty(bh);
1806 }
1807 }
1808
1809 block_start = block_end;
1810 bh = bh->b_this_page;
1811 } while (bh != head);
1812}
1813EXPORT_SYMBOL(page_zero_new_buffers);
1814
1760static int __block_prepare_write(struct inode *inode, struct page *page, 1815static int __block_prepare_write(struct inode *inode, struct page *page,
1761 unsigned from, unsigned to, get_block_t *get_block) 1816 unsigned from, unsigned to, get_block_t *get_block)
1762{ 1817{
@@ -1800,7 +1855,9 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
1800 unmap_underlying_metadata(bh->b_bdev, 1855 unmap_underlying_metadata(bh->b_bdev,
1801 bh->b_blocknr); 1856 bh->b_blocknr);
1802 if (PageUptodate(page)) { 1857 if (PageUptodate(page)) {
1858 clear_buffer_new(bh);
1803 set_buffer_uptodate(bh); 1859 set_buffer_uptodate(bh);
1860 mark_buffer_dirty(bh);
1804 continue; 1861 continue;
1805 } 1862 }
1806 if (block_end > to || block_start < from) { 1863 if (block_end > to || block_start < from) {
@@ -1839,38 +1896,8 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
1839 if (!buffer_uptodate(*wait_bh)) 1896 if (!buffer_uptodate(*wait_bh))
1840 err = -EIO; 1897 err = -EIO;
1841 } 1898 }
1842 if (!err) { 1899 if (unlikely(err))
1843 bh = head; 1900 page_zero_new_buffers(page, from, to);
1844 do {
1845 if (buffer_new(bh))
1846 clear_buffer_new(bh);
1847 } while ((bh = bh->b_this_page) != head);
1848 return 0;
1849 }
1850 /* Error case: */
1851 /*
1852 * Zero out any newly allocated blocks to avoid exposing stale
1853 * data. If BH_New is set, we know that the block was newly
1854 * allocated in the above loop.
1855 */
1856 bh = head;
1857 block_start = 0;
1858 do {
1859 block_end = block_start+blocksize;
1860 if (block_end <= from)
1861 goto next_bh;
1862 if (block_start >= to)
1863 break;
1864 if (buffer_new(bh)) {
1865 clear_buffer_new(bh);
1866 zero_user_page(page, block_start, bh->b_size, KM_USER0);
1867 set_buffer_uptodate(bh);
1868 mark_buffer_dirty(bh);
1869 }
1870next_bh:
1871 block_start = block_end;
1872 bh = bh->b_this_page;
1873 } while (bh != head);
1874 return err; 1901 return err;
1875} 1902}
1876 1903
@@ -1895,6 +1922,7 @@ static int __block_commit_write(struct inode *inode, struct page *page,
1895 set_buffer_uptodate(bh); 1922 set_buffer_uptodate(bh);
1896 mark_buffer_dirty(bh); 1923 mark_buffer_dirty(bh);
1897 } 1924 }
1925 clear_buffer_new(bh);
1898 } 1926 }
1899 1927
1900 /* 1928 /*
@@ -1909,6 +1937,130 @@ static int __block_commit_write(struct inode *inode, struct page *page,
1909} 1937}
1910 1938
1911/* 1939/*
1940 * block_write_begin takes care of the basic task of block allocation and
1941 * bringing partial write blocks uptodate first.
1942 *
1943 * If *pagep is not NULL, then block_write_begin uses the locked page
1944 * at *pagep rather than allocating its own. In this case, the page will
1945 * not be unlocked or deallocated on failure.
1946 */
1947int block_write_begin(struct file *file, struct address_space *mapping,
1948 loff_t pos, unsigned len, unsigned flags,
1949 struct page **pagep, void **fsdata,
1950 get_block_t *get_block)
1951{
1952 struct inode *inode = mapping->host;
1953 int status = 0;
1954 struct page *page;
1955 pgoff_t index;
1956 unsigned start, end;
1957 int ownpage = 0;
1958
1959 index = pos >> PAGE_CACHE_SHIFT;
1960 start = pos & (PAGE_CACHE_SIZE - 1);
1961 end = start + len;
1962
1963 page = *pagep;
1964 if (page == NULL) {
1965 ownpage = 1;
1966 page = __grab_cache_page(mapping, index);
1967 if (!page) {
1968 status = -ENOMEM;
1969 goto out;
1970 }
1971 *pagep = page;
1972 } else
1973 BUG_ON(!PageLocked(page));
1974
1975 status = __block_prepare_write(inode, page, start, end, get_block);
1976 if (unlikely(status)) {
1977 ClearPageUptodate(page);
1978
1979 if (ownpage) {
1980 unlock_page(page);
1981 page_cache_release(page);
1982 *pagep = NULL;
1983
1984 /*
1985 * prepare_write() may have instantiated a few blocks
1986 * outside i_size. Trim these off again. Don't need
1987 * i_size_read because we hold i_mutex.
1988 */
1989 if (pos + len > inode->i_size)
1990 vmtruncate(inode, inode->i_size);
1991 }
1992 goto out;
1993 }
1994
1995out:
1996 return status;
1997}
1998EXPORT_SYMBOL(block_write_begin);
1999
2000int block_write_end(struct file *file, struct address_space *mapping,
2001 loff_t pos, unsigned len, unsigned copied,
2002 struct page *page, void *fsdata)
2003{
2004 struct inode *inode = mapping->host;
2005 unsigned start;
2006
2007 start = pos & (PAGE_CACHE_SIZE - 1);
2008
2009 if (unlikely(copied < len)) {
2010 /*
2011 * The buffers that were written will now be uptodate, so we
2012 * don't have to worry about a readpage reading them and
2013 * overwriting a partial write. However if we have encountered
2014 * a short write and only partially written into a buffer, it
2015 * will not be marked uptodate, so a readpage might come in and
2016 * destroy our partial write.
2017 *
2018 * Do the simplest thing, and just treat any short write to a
2019 * non uptodate page as a zero-length write, and force the
2020 * caller to redo the whole thing.
2021 */
2022 if (!PageUptodate(page))
2023 copied = 0;
2024
2025 page_zero_new_buffers(page, start+copied, start+len);
2026 }
2027 flush_dcache_page(page);
2028
2029 /* This could be a short (even 0-length) commit */
2030 __block_commit_write(inode, page, start, start+copied);
2031
2032 return copied;
2033}
2034EXPORT_SYMBOL(block_write_end);
2035
2036int generic_write_end(struct file *file, struct address_space *mapping,
2037 loff_t pos, unsigned len, unsigned copied,
2038 struct page *page, void *fsdata)
2039{
2040 struct inode *inode = mapping->host;
2041
2042 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
2043
2044 /*
2045 * No need to use i_size_read() here, the i_size
2046 * cannot change under us because we hold i_mutex.
2047 *
2048 * But it's important to update i_size while still holding page lock:
2049 * page writeout could otherwise come in and zero beyond i_size.
2050 */
2051 if (pos+copied > inode->i_size) {
2052 i_size_write(inode, pos+copied);
2053 mark_inode_dirty(inode);
2054 }
2055
2056 unlock_page(page);
2057 page_cache_release(page);
2058
2059 return copied;
2060}
2061EXPORT_SYMBOL(generic_write_end);
2062
2063/*
1912 * Generic "read page" function for block devices that have the normal 2064 * Generic "read page" function for block devices that have the normal
1913 * get_block functionality. This is most of the block device filesystems. 2065 * get_block functionality. This is most of the block device filesystems.
1914 * Reads the page asynchronously --- the unlock_buffer() and 2066 * Reads the page asynchronously --- the unlock_buffer() and
@@ -2004,14 +2156,14 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
2004} 2156}
2005 2157
2006/* utility function for filesystems that need to do work on expanding 2158/* utility function for filesystems that need to do work on expanding
2007 * truncates. Uses prepare/commit_write to allow the filesystem to 2159 * truncates. Uses filesystem pagecache writes to allow the filesystem to
2008 * deal with the hole. 2160 * deal with the hole.
2009 */ 2161 */
2010static int __generic_cont_expand(struct inode *inode, loff_t size, 2162int generic_cont_expand_simple(struct inode *inode, loff_t size)
2011 pgoff_t index, unsigned int offset)
2012{ 2163{
2013 struct address_space *mapping = inode->i_mapping; 2164 struct address_space *mapping = inode->i_mapping;
2014 struct page *page; 2165 struct page *page;
2166 void *fsdata;
2015 unsigned long limit; 2167 unsigned long limit;
2016 int err; 2168 int err;
2017 2169
@@ -2024,140 +2176,115 @@ static int __generic_cont_expand(struct inode *inode, loff_t size,
2024 if (size > inode->i_sb->s_maxbytes) 2176 if (size > inode->i_sb->s_maxbytes)
2025 goto out; 2177 goto out;
2026 2178
2027 err = -ENOMEM; 2179 err = pagecache_write_begin(NULL, mapping, size, 0,
2028 page = grab_cache_page(mapping, index); 2180 AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
2029 if (!page) 2181 &page, &fsdata);
2030 goto out; 2182 if (err)
2031 err = mapping->a_ops->prepare_write(NULL, page, offset, offset);
2032 if (err) {
2033 /*
2034 * ->prepare_write() may have instantiated a few blocks
2035 * outside i_size. Trim these off again.
2036 */
2037 unlock_page(page);
2038 page_cache_release(page);
2039 vmtruncate(inode, inode->i_size);
2040 goto out; 2183 goto out;
2041 }
2042 2184
2043 err = mapping->a_ops->commit_write(NULL, page, offset, offset); 2185 err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
2186 BUG_ON(err > 0);
2044 2187
2045 unlock_page(page);
2046 page_cache_release(page);
2047 if (err > 0)
2048 err = 0;
2049out: 2188out:
2050 return err; 2189 return err;
2051} 2190}
2052 2191
2053int generic_cont_expand(struct inode *inode, loff_t size) 2192int cont_expand_zero(struct file *file, struct address_space *mapping,
2193 loff_t pos, loff_t *bytes)
2054{ 2194{
2055 pgoff_t index; 2195 struct inode *inode = mapping->host;
2056 unsigned int offset; 2196 unsigned blocksize = 1 << inode->i_blkbits;
2197 struct page *page;
2198 void *fsdata;
2199 pgoff_t index, curidx;
2200 loff_t curpos;
2201 unsigned zerofrom, offset, len;
2202 int err = 0;
2057 2203
2058 offset = (size & (PAGE_CACHE_SIZE - 1)); /* Within page */ 2204 index = pos >> PAGE_CACHE_SHIFT;
2205 offset = pos & ~PAGE_CACHE_MASK;
2059 2206
2060 /* ugh. in prepare/commit_write, if from==to==start of block, we 2207 while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
2061 ** skip the prepare. make sure we never send an offset for the start 2208 zerofrom = curpos & ~PAGE_CACHE_MASK;
2062 ** of a block 2209 if (zerofrom & (blocksize-1)) {
2063 */ 2210 *bytes |= (blocksize-1);
2064 if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) { 2211 (*bytes)++;
2065 /* caller must handle this extra byte. */ 2212 }
2066 offset++; 2213 len = PAGE_CACHE_SIZE - zerofrom;
2067 }
2068 index = size >> PAGE_CACHE_SHIFT;
2069 2214
2070 return __generic_cont_expand(inode, size, index, offset); 2215 err = pagecache_write_begin(file, mapping, curpos, len,
2071} 2216 AOP_FLAG_UNINTERRUPTIBLE,
2217 &page, &fsdata);
2218 if (err)
2219 goto out;
2220 zero_user_page(page, zerofrom, len, KM_USER0);
2221 err = pagecache_write_end(file, mapping, curpos, len, len,
2222 page, fsdata);
2223 if (err < 0)
2224 goto out;
2225 BUG_ON(err != len);
2226 err = 0;
2227 }
2072 2228
2073int generic_cont_expand_simple(struct inode *inode, loff_t size) 2229 /* page covers the boundary, find the boundary offset */
2074{ 2230 if (index == curidx) {
2075 loff_t pos = size - 1; 2231 zerofrom = curpos & ~PAGE_CACHE_MASK;
2076 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 2232 /* if we will expand the thing last block will be filled */
2077 unsigned int offset = (pos & (PAGE_CACHE_SIZE - 1)) + 1; 2233 if (offset <= zerofrom) {
2234 goto out;
2235 }
2236 if (zerofrom & (blocksize-1)) {
2237 *bytes |= (blocksize-1);
2238 (*bytes)++;
2239 }
2240 len = offset - zerofrom;
2078 2241
2079 /* prepare/commit_write can handle even if from==to==start of block. */ 2242 err = pagecache_write_begin(file, mapping, curpos, len,
2080 return __generic_cont_expand(inode, size, index, offset); 2243 AOP_FLAG_UNINTERRUPTIBLE,
2244 &page, &fsdata);
2245 if (err)
2246 goto out;
2247 zero_user_page(page, zerofrom, len, KM_USER0);
2248 err = pagecache_write_end(file, mapping, curpos, len, len,
2249 page, fsdata);
2250 if (err < 0)
2251 goto out;
2252 BUG_ON(err != len);
2253 err = 0;
2254 }
2255out:
2256 return err;
2081} 2257}
2082 2258
2083/* 2259/*
2084 * For moronic filesystems that do not allow holes in file. 2260 * For moronic filesystems that do not allow holes in file.
2085 * We may have to extend the file. 2261 * We may have to extend the file.
2086 */ 2262 */
2087 2263int cont_write_begin(struct file *file, struct address_space *mapping,
2088int cont_prepare_write(struct page *page, unsigned offset, 2264 loff_t pos, unsigned len, unsigned flags,
2089 unsigned to, get_block_t *get_block, loff_t *bytes) 2265 struct page **pagep, void **fsdata,
2266 get_block_t *get_block, loff_t *bytes)
2090{ 2267{
2091 struct address_space *mapping = page->mapping;
2092 struct inode *inode = mapping->host; 2268 struct inode *inode = mapping->host;
2093 struct page *new_page;
2094 pgoff_t pgpos;
2095 long status;
2096 unsigned zerofrom;
2097 unsigned blocksize = 1 << inode->i_blkbits; 2269 unsigned blocksize = 1 << inode->i_blkbits;
2270 unsigned zerofrom;
2271 int err;
2098 2272
2099 while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) { 2273 err = cont_expand_zero(file, mapping, pos, bytes);
2100 status = -ENOMEM; 2274 if (err)
2101 new_page = grab_cache_page(mapping, pgpos); 2275 goto out;
2102 if (!new_page)
2103 goto out;
2104 /* we might sleep */
2105 if (*bytes>>PAGE_CACHE_SHIFT != pgpos) {
2106 unlock_page(new_page);
2107 page_cache_release(new_page);
2108 continue;
2109 }
2110 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2111 if (zerofrom & (blocksize-1)) {
2112 *bytes |= (blocksize-1);
2113 (*bytes)++;
2114 }
2115 status = __block_prepare_write(inode, new_page, zerofrom,
2116 PAGE_CACHE_SIZE, get_block);
2117 if (status)
2118 goto out_unmap;
2119 zero_user_page(new_page, zerofrom, PAGE_CACHE_SIZE - zerofrom,
2120 KM_USER0);
2121 generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE);
2122 unlock_page(new_page);
2123 page_cache_release(new_page);
2124 }
2125
2126 if (page->index < pgpos) {
2127 /* completely inside the area */
2128 zerofrom = offset;
2129 } else {
2130 /* page covers the boundary, find the boundary offset */
2131 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2132
2133 /* if we will expand the thing last block will be filled */
2134 if (to > zerofrom && (zerofrom & (blocksize-1))) {
2135 *bytes |= (blocksize-1);
2136 (*bytes)++;
2137 }
2138 2276
2139 /* starting below the boundary? Nothing to zero out */ 2277 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2140 if (offset <= zerofrom) 2278 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2141 zerofrom = offset; 2279 *bytes |= (blocksize-1);
2142 } 2280 (*bytes)++;
2143 status = __block_prepare_write(inode, page, zerofrom, to, get_block);
2144 if (status)
2145 goto out1;
2146 if (zerofrom < offset) {
2147 zero_user_page(page, zerofrom, offset - zerofrom, KM_USER0);
2148 __block_commit_write(inode, page, zerofrom, offset);
2149 } 2281 }
2150 return 0;
2151out1:
2152 ClearPageUptodate(page);
2153 return status;
2154 2282
2155out_unmap: 2283 *pagep = NULL;
2156 ClearPageUptodate(new_page); 2284 err = block_write_begin(file, mapping, pos, len,
2157 unlock_page(new_page); 2285 flags, pagep, fsdata, get_block);
2158 page_cache_release(new_page);
2159out: 2286out:
2160 return status; 2287 return err;
2161} 2288}
2162 2289
2163int block_prepare_write(struct page *page, unsigned from, unsigned to, 2290int block_prepare_write(struct page *page, unsigned from, unsigned to,
@@ -2242,81 +2369,129 @@ out_unlock:
2242} 2369}
2243 2370
2244/* 2371/*
2245 * nobh_prepare_write()'s prereads are special: the buffer_heads are freed 2372 * nobh_write_begin()'s prereads are special: the buffer_heads are freed
2246 * immediately, while under the page lock. So it needs a special end_io 2373 * immediately, while under the page lock. So it needs a special end_io
2247 * handler which does not touch the bh after unlocking it. 2374 * handler which does not touch the bh after unlocking it.
2248 *
2249 * Note: unlock_buffer() sort-of does touch the bh after unlocking it, but
2250 * a race there is benign: unlock_buffer() only use the bh's address for
2251 * hashing after unlocking the buffer, so it doesn't actually touch the bh
2252 * itself.
2253 */ 2375 */
2254static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate) 2376static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2255{ 2377{
2256 if (uptodate) { 2378 __end_buffer_read_notouch(bh, uptodate);
2257 set_buffer_uptodate(bh); 2379}
2258 } else { 2380
2259 /* This happens, due to failed READA attempts. */ 2381/*
2260 clear_buffer_uptodate(bh); 2382 * Attach the singly-linked list of buffers created by nobh_write_begin, to
2261 } 2383 * the page (converting it to circular linked list and taking care of page
2262 unlock_buffer(bh); 2384 * dirty races).
2385 */
2386static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2387{
2388 struct buffer_head *bh;
2389
2390 BUG_ON(!PageLocked(page));
2391
2392 spin_lock(&page->mapping->private_lock);
2393 bh = head;
2394 do {
2395 if (PageDirty(page))
2396 set_buffer_dirty(bh);
2397 if (!bh->b_this_page)
2398 bh->b_this_page = head;
2399 bh = bh->b_this_page;
2400 } while (bh != head);
2401 attach_page_buffers(page, head);
2402 spin_unlock(&page->mapping->private_lock);
2263} 2403}
2264 2404
2265/* 2405/*
2266 * On entry, the page is fully not uptodate. 2406 * On entry, the page is fully not uptodate.
2267 * On exit the page is fully uptodate in the areas outside (from,to) 2407 * On exit the page is fully uptodate in the areas outside (from,to)
2268 */ 2408 */
2269int nobh_prepare_write(struct page *page, unsigned from, unsigned to, 2409int nobh_write_begin(struct file *file, struct address_space *mapping,
2410 loff_t pos, unsigned len, unsigned flags,
2411 struct page **pagep, void **fsdata,
2270 get_block_t *get_block) 2412 get_block_t *get_block)
2271{ 2413{
2272 struct inode *inode = page->mapping->host; 2414 struct inode *inode = mapping->host;
2273 const unsigned blkbits = inode->i_blkbits; 2415 const unsigned blkbits = inode->i_blkbits;
2274 const unsigned blocksize = 1 << blkbits; 2416 const unsigned blocksize = 1 << blkbits;
2275 struct buffer_head map_bh; 2417 struct buffer_head *head, *bh;
2276 struct buffer_head *read_bh[MAX_BUF_PER_PAGE]; 2418 struct page *page;
2419 pgoff_t index;
2420 unsigned from, to;
2277 unsigned block_in_page; 2421 unsigned block_in_page;
2278 unsigned block_start; 2422 unsigned block_start, block_end;
2279 sector_t block_in_file; 2423 sector_t block_in_file;
2280 char *kaddr; 2424 char *kaddr;
2281 int nr_reads = 0; 2425 int nr_reads = 0;
2282 int i;
2283 int ret = 0; 2426 int ret = 0;
2284 int is_mapped_to_disk = 1; 2427 int is_mapped_to_disk = 1;
2285 2428
2429 index = pos >> PAGE_CACHE_SHIFT;
2430 from = pos & (PAGE_CACHE_SIZE - 1);
2431 to = from + len;
2432
2433 page = __grab_cache_page(mapping, index);
2434 if (!page)
2435 return -ENOMEM;
2436 *pagep = page;
2437 *fsdata = NULL;
2438
2439 if (page_has_buffers(page)) {
2440 unlock_page(page);
2441 page_cache_release(page);
2442 *pagep = NULL;
2443 return block_write_begin(file, mapping, pos, len, flags, pagep,
2444 fsdata, get_block);
2445 }
2446
2286 if (PageMappedToDisk(page)) 2447 if (PageMappedToDisk(page))
2287 return 0; 2448 return 0;
2288 2449
2450 /*
2451 * Allocate buffers so that we can keep track of state, and potentially
2452 * attach them to the page if an error occurs. In the common case of
2453 * no error, they will just be freed again without ever being attached
2454 * to the page (which is all OK, because we're under the page lock).
2455 *
2456 * Be careful: the buffer linked list is a NULL terminated one, rather
2457 * than the circular one we're used to.
2458 */
2459 head = alloc_page_buffers(page, blocksize, 0);
2460 if (!head) {
2461 ret = -ENOMEM;
2462 goto out_release;
2463 }
2464
2289 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits); 2465 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
2290 map_bh.b_page = page;
2291 2466
2292 /* 2467 /*
2293 * We loop across all blocks in the page, whether or not they are 2468 * We loop across all blocks in the page, whether or not they are
2294 * part of the affected region. This is so we can discover if the 2469 * part of the affected region. This is so we can discover if the
2295 * page is fully mapped-to-disk. 2470 * page is fully mapped-to-disk.
2296 */ 2471 */
2297 for (block_start = 0, block_in_page = 0; 2472 for (block_start = 0, block_in_page = 0, bh = head;
2298 block_start < PAGE_CACHE_SIZE; 2473 block_start < PAGE_CACHE_SIZE;
2299 block_in_page++, block_start += blocksize) { 2474 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2300 unsigned block_end = block_start + blocksize;
2301 int create; 2475 int create;
2302 2476
2303 map_bh.b_state = 0; 2477 block_end = block_start + blocksize;
2478 bh->b_state = 0;
2304 create = 1; 2479 create = 1;
2305 if (block_start >= to) 2480 if (block_start >= to)
2306 create = 0; 2481 create = 0;
2307 map_bh.b_size = blocksize;
2308 ret = get_block(inode, block_in_file + block_in_page, 2482 ret = get_block(inode, block_in_file + block_in_page,
2309 &map_bh, create); 2483 bh, create);
2310 if (ret) 2484 if (ret)
2311 goto failed; 2485 goto failed;
2312 if (!buffer_mapped(&map_bh)) 2486 if (!buffer_mapped(bh))
2313 is_mapped_to_disk = 0; 2487 is_mapped_to_disk = 0;
2314 if (buffer_new(&map_bh)) 2488 if (buffer_new(bh))
2315 unmap_underlying_metadata(map_bh.b_bdev, 2489 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
2316 map_bh.b_blocknr); 2490 if (PageUptodate(page)) {
2317 if (PageUptodate(page)) 2491 set_buffer_uptodate(bh);
2318 continue; 2492 continue;
2319 if (buffer_new(&map_bh) || !buffer_mapped(&map_bh)) { 2493 }
2494 if (buffer_new(bh) || !buffer_mapped(bh)) {
2320 kaddr = kmap_atomic(page, KM_USER0); 2495 kaddr = kmap_atomic(page, KM_USER0);
2321 if (block_start < from) 2496 if (block_start < from)
2322 memset(kaddr+block_start, 0, from-block_start); 2497 memset(kaddr+block_start, 0, from-block_start);
@@ -2326,49 +2501,26 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
2326 kunmap_atomic(kaddr, KM_USER0); 2501 kunmap_atomic(kaddr, KM_USER0);
2327 continue; 2502 continue;
2328 } 2503 }
2329 if (buffer_uptodate(&map_bh)) 2504 if (buffer_uptodate(bh))
2330 continue; /* reiserfs does this */ 2505 continue; /* reiserfs does this */
2331 if (block_start < from || block_end > to) { 2506 if (block_start < from || block_end > to) {
2332 struct buffer_head *bh = alloc_buffer_head(GFP_NOFS); 2507 lock_buffer(bh);
2333 2508 bh->b_end_io = end_buffer_read_nobh;
2334 if (!bh) { 2509 submit_bh(READ, bh);
2335 ret = -ENOMEM; 2510 nr_reads++;
2336 goto failed;
2337 }
2338 bh->b_state = map_bh.b_state;
2339 atomic_set(&bh->b_count, 0);
2340 bh->b_this_page = NULL;
2341 bh->b_page = page;
2342 bh->b_blocknr = map_bh.b_blocknr;
2343 bh->b_size = blocksize;
2344 bh->b_data = (char *)(long)block_start;
2345 bh->b_bdev = map_bh.b_bdev;
2346 bh->b_private = NULL;
2347 read_bh[nr_reads++] = bh;
2348 } 2511 }
2349 } 2512 }
2350 2513
2351 if (nr_reads) { 2514 if (nr_reads) {
2352 struct buffer_head *bh;
2353
2354 /* 2515 /*
2355 * The page is locked, so these buffers are protected from 2516 * The page is locked, so these buffers are protected from
2356 * any VM or truncate activity. Hence we don't need to care 2517 * any VM or truncate activity. Hence we don't need to care
2357 * for the buffer_head refcounts. 2518 * for the buffer_head refcounts.
2358 */ 2519 */
2359 for (i = 0; i < nr_reads; i++) { 2520 for (bh = head; bh; bh = bh->b_this_page) {
2360 bh = read_bh[i];
2361 lock_buffer(bh);
2362 bh->b_end_io = end_buffer_read_nobh;
2363 submit_bh(READ, bh);
2364 }
2365 for (i = 0; i < nr_reads; i++) {
2366 bh = read_bh[i];
2367 wait_on_buffer(bh); 2521 wait_on_buffer(bh);
2368 if (!buffer_uptodate(bh)) 2522 if (!buffer_uptodate(bh))
2369 ret = -EIO; 2523 ret = -EIO;
2370 free_buffer_head(bh);
2371 read_bh[i] = NULL;
2372 } 2524 }
2373 if (ret) 2525 if (ret)
2374 goto failed; 2526 goto failed;
@@ -2377,44 +2529,70 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
2377 if (is_mapped_to_disk) 2529 if (is_mapped_to_disk)
2378 SetPageMappedToDisk(page); 2530 SetPageMappedToDisk(page);
2379 2531
2532 *fsdata = head; /* to be released by nobh_write_end */
2533
2380 return 0; 2534 return 0;
2381 2535
2382failed: 2536failed:
2383 for (i = 0; i < nr_reads; i++) { 2537 BUG_ON(!ret);
2384 if (read_bh[i])
2385 free_buffer_head(read_bh[i]);
2386 }
2387
2388 /* 2538 /*
2389 * Error recovery is pretty slack. Clear the page and mark it dirty 2539 * Error recovery is a bit difficult. We need to zero out blocks that
2390 * so we'll later zero out any blocks which _were_ allocated. 2540 * were newly allocated, and dirty them to ensure they get written out.
2541 * Buffers need to be attached to the page at this point, otherwise
2542 * the handling of potential IO errors during writeout would be hard
2543 * (could try doing synchronous writeout, but what if that fails too?)
2391 */ 2544 */
2392 zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); 2545 attach_nobh_buffers(page, head);
2393 SetPageUptodate(page); 2546 page_zero_new_buffers(page, from, to);
2394 set_page_dirty(page); 2547
2548out_release:
2549 unlock_page(page);
2550 page_cache_release(page);
2551 *pagep = NULL;
2552
2553 if (pos + len > inode->i_size)
2554 vmtruncate(inode, inode->i_size);
2555
2395 return ret; 2556 return ret;
2396} 2557}
2397EXPORT_SYMBOL(nobh_prepare_write); 2558EXPORT_SYMBOL(nobh_write_begin);
2398 2559
2399/* 2560int nobh_write_end(struct file *file, struct address_space *mapping,
2400 * Make sure any changes to nobh_commit_write() are reflected in 2561 loff_t pos, unsigned len, unsigned copied,
2401 * nobh_truncate_page(), since it doesn't call commit_write(). 2562 struct page *page, void *fsdata)
2402 */
2403int nobh_commit_write(struct file *file, struct page *page,
2404 unsigned from, unsigned to)
2405{ 2563{
2406 struct inode *inode = page->mapping->host; 2564 struct inode *inode = page->mapping->host;
2407 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; 2565 struct buffer_head *head = NULL;
2566 struct buffer_head *bh;
2567
2568 if (!PageMappedToDisk(page)) {
2569 if (unlikely(copied < len) && !page_has_buffers(page))
2570 attach_nobh_buffers(page, head);
2571 if (page_has_buffers(page))
2572 return generic_write_end(file, mapping, pos, len,
2573 copied, page, fsdata);
2574 }
2408 2575
2409 SetPageUptodate(page); 2576 SetPageUptodate(page);
2410 set_page_dirty(page); 2577 set_page_dirty(page);
2411 if (pos > inode->i_size) { 2578 if (pos+copied > inode->i_size) {
2412 i_size_write(inode, pos); 2579 i_size_write(inode, pos+copied);
2413 mark_inode_dirty(inode); 2580 mark_inode_dirty(inode);
2414 } 2581 }
2415 return 0; 2582
2583 unlock_page(page);
2584 page_cache_release(page);
2585
2586 head = fsdata;
2587 while (head) {
2588 bh = head;
2589 head = head->b_this_page;
2590 free_buffer_head(bh);
2591 }
2592
2593 return copied;
2416} 2594}
2417EXPORT_SYMBOL(nobh_commit_write); 2595EXPORT_SYMBOL(nobh_write_end);
2418 2596
2419/* 2597/*
2420 * nobh_writepage() - based on block_full_write_page() except 2598 * nobh_writepage() - based on block_full_write_page() except
@@ -2467,44 +2645,79 @@ out:
2467} 2645}
2468EXPORT_SYMBOL(nobh_writepage); 2646EXPORT_SYMBOL(nobh_writepage);
2469 2647
2470/* 2648int nobh_truncate_page(struct address_space *mapping,
2471 * This function assumes that ->prepare_write() uses nobh_prepare_write(). 2649 loff_t from, get_block_t *get_block)
2472 */
2473int nobh_truncate_page(struct address_space *mapping, loff_t from)
2474{ 2650{
2475 struct inode *inode = mapping->host;
2476 unsigned blocksize = 1 << inode->i_blkbits;
2477 pgoff_t index = from >> PAGE_CACHE_SHIFT; 2651 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2478 unsigned offset = from & (PAGE_CACHE_SIZE-1); 2652 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2479 unsigned to; 2653 unsigned blocksize;
2654 sector_t iblock;
2655 unsigned length, pos;
2656 struct inode *inode = mapping->host;
2480 struct page *page; 2657 struct page *page;
2481 const struct address_space_operations *a_ops = mapping->a_ops; 2658 struct buffer_head map_bh;
2482 int ret = 0; 2659 int err;
2483 2660
2484 if ((offset & (blocksize - 1)) == 0) 2661 blocksize = 1 << inode->i_blkbits;
2485 goto out; 2662 length = offset & (blocksize - 1);
2663
2664 /* Block boundary? Nothing to do */
2665 if (!length)
2666 return 0;
2667
2668 length = blocksize - length;
2669 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2486 2670
2487 ret = -ENOMEM;
2488 page = grab_cache_page(mapping, index); 2671 page = grab_cache_page(mapping, index);
2672 err = -ENOMEM;
2489 if (!page) 2673 if (!page)
2490 goto out; 2674 goto out;
2491 2675
2492 to = (offset + blocksize) & ~(blocksize - 1); 2676 if (page_has_buffers(page)) {
2493 ret = a_ops->prepare_write(NULL, page, offset, to); 2677has_buffers:
2494 if (ret == 0) { 2678 unlock_page(page);
2495 zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, 2679 page_cache_release(page);
2496 KM_USER0); 2680 return block_truncate_page(mapping, from, get_block);
2497 /*
2498 * It would be more correct to call aops->commit_write()
2499 * here, but this is more efficient.
2500 */
2501 SetPageUptodate(page);
2502 set_page_dirty(page);
2503 } 2681 }
2682
2683 /* Find the buffer that contains "offset" */
2684 pos = blocksize;
2685 while (offset >= pos) {
2686 iblock++;
2687 pos += blocksize;
2688 }
2689
2690 err = get_block(inode, iblock, &map_bh, 0);
2691 if (err)
2692 goto unlock;
2693 /* unmapped? It's a hole - nothing to do */
2694 if (!buffer_mapped(&map_bh))
2695 goto unlock;
2696
2697 /* Ok, it's mapped. Make sure it's up-to-date */
2698 if (!PageUptodate(page)) {
2699 err = mapping->a_ops->readpage(NULL, page);
2700 if (err) {
2701 page_cache_release(page);
2702 goto out;
2703 }
2704 lock_page(page);
2705 if (!PageUptodate(page)) {
2706 err = -EIO;
2707 goto unlock;
2708 }
2709 if (page_has_buffers(page))
2710 goto has_buffers;
2711 }
2712 zero_user_page(page, offset, length, KM_USER0);
2713 set_page_dirty(page);
2714 err = 0;
2715
2716unlock:
2504 unlock_page(page); 2717 unlock_page(page);
2505 page_cache_release(page); 2718 page_cache_release(page);
2506out: 2719out:
2507 return ret; 2720 return err;
2508} 2721}
2509EXPORT_SYMBOL(nobh_truncate_page); 2722EXPORT_SYMBOL(nobh_truncate_page);
2510 2723
@@ -2956,7 +3169,8 @@ static void recalc_bh_state(void)
2956 3169
2957struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) 3170struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
2958{ 3171{
2959 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); 3172 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep,
3173 set_migrateflags(gfp_flags, __GFP_RECLAIMABLE));
2960 if (ret) { 3174 if (ret) {
2961 INIT_LIST_HEAD(&ret->b_assoc_buffers); 3175 INIT_LIST_HEAD(&ret->b_assoc_buffers);
2962 get_cpu_var(bh_accounting).nr++; 3176 get_cpu_var(bh_accounting).nr++;
@@ -3024,14 +3238,13 @@ EXPORT_SYMBOL(block_read_full_page);
3024EXPORT_SYMBOL(block_sync_page); 3238EXPORT_SYMBOL(block_sync_page);
3025EXPORT_SYMBOL(block_truncate_page); 3239EXPORT_SYMBOL(block_truncate_page);
3026EXPORT_SYMBOL(block_write_full_page); 3240EXPORT_SYMBOL(block_write_full_page);
3027EXPORT_SYMBOL(cont_prepare_write); 3241EXPORT_SYMBOL(cont_write_begin);
3028EXPORT_SYMBOL(end_buffer_read_sync); 3242EXPORT_SYMBOL(end_buffer_read_sync);
3029EXPORT_SYMBOL(end_buffer_write_sync); 3243EXPORT_SYMBOL(end_buffer_write_sync);
3030EXPORT_SYMBOL(file_fsync); 3244EXPORT_SYMBOL(file_fsync);
3031EXPORT_SYMBOL(fsync_bdev); 3245EXPORT_SYMBOL(fsync_bdev);
3032EXPORT_SYMBOL(generic_block_bmap); 3246EXPORT_SYMBOL(generic_block_bmap);
3033EXPORT_SYMBOL(generic_commit_write); 3247EXPORT_SYMBOL(generic_commit_write);
3034EXPORT_SYMBOL(generic_cont_expand);
3035EXPORT_SYMBOL(generic_cont_expand_simple); 3248EXPORT_SYMBOL(generic_cont_expand_simple);
3036EXPORT_SYMBOL(init_buffer); 3249EXPORT_SYMBOL(init_buffer);
3037EXPORT_SYMBOL(invalidate_bdev); 3250EXPORT_SYMBOL(invalidate_bdev);
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index ddc003a9d214..dbd257d956c4 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -41,8 +41,8 @@ extern struct super_block * configfs_sb;
41 41
42static const struct address_space_operations configfs_aops = { 42static const struct address_space_operations configfs_aops = {
43 .readpage = simple_readpage, 43 .readpage = simple_readpage,
44 .prepare_write = simple_prepare_write, 44 .write_begin = simple_write_begin,
45 .commit_write = simple_commit_write 45 .write_end = simple_write_end,
46}; 46};
47 47
48static struct backing_dev_info configfs_backing_dev_info = { 48static struct backing_dev_info configfs_backing_dev_info = {
diff --git a/fs/dcache.c b/fs/dcache.c
index 678d39deb607..7da0cf50873e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -903,7 +903,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
903 struct dentry *dentry; 903 struct dentry *dentry;
904 char *dname; 904 char *dname;
905 905
906 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 906 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
907 if (!dentry) 907 if (!dentry)
908 return NULL; 908 return NULL;
909 909
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index a9b99c0dc2e7..fa6b7f7ff914 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -227,15 +227,24 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_x16, debugfs_u16_get, debugfs_u16_set, "0x%04llx\n"
227 227
228DEFINE_SIMPLE_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n"); 228DEFINE_SIMPLE_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n");
229 229
230/** 230/*
231 * debugfs_create_x8 - create a debugfs file that is used to read and write an unsigned 8-bit value 231 * debugfs_create_x{8,16,32} - create a debugfs file that is used to read and write an unsigned {8,16,32}-bit value
232 * debugfs_create_x16 - create a debugfs file that is used to read and write an unsigned 16-bit value
233 * debugfs_create_x32 - create a debugfs file that is used to read and write an unsigned 32-bit value
234 * 232 *
235 * These functions are exactly the same as the above functions, (but use a hex 233 * These functions are exactly the same as the above functions (but use a hex
236 * output for the decimal challenged) for details look at the above unsigned 234 * output for the decimal challenged). For details look at the above unsigned
237 * decimal functions. 235 * decimal functions.
238 */ 236 */
237
238/**
239 * debugfs_create_x8 - create a debugfs file that is used to read and write an unsigned 8-bit value
240 * @name: a pointer to a string containing the name of the file to create.
241 * @mode: the permission that the file should have
242 * @parent: a pointer to the parent dentry for this file. This should be a
243 * directory dentry if set. If this parameter is %NULL, then the
244 * file will be created in the root of the debugfs filesystem.
245 * @value: a pointer to the variable that the file should read to and write
246 * from.
247 */
239struct dentry *debugfs_create_x8(const char *name, mode_t mode, 248struct dentry *debugfs_create_x8(const char *name, mode_t mode,
240 struct dentry *parent, u8 *value) 249 struct dentry *parent, u8 *value)
241{ 250{
@@ -243,6 +252,16 @@ struct dentry *debugfs_create_x8(const char *name, mode_t mode,
243} 252}
244EXPORT_SYMBOL_GPL(debugfs_create_x8); 253EXPORT_SYMBOL_GPL(debugfs_create_x8);
245 254
255/**
256 * debugfs_create_x16 - create a debugfs file that is used to read and write an unsigned 16-bit value
257 * @name: a pointer to a string containing the name of the file to create.
258 * @mode: the permission that the file should have
259 * @parent: a pointer to the parent dentry for this file. This should be a
260 * directory dentry if set. If this parameter is %NULL, then the
261 * file will be created in the root of the debugfs filesystem.
262 * @value: a pointer to the variable that the file should read to and write
263 * from.
264 */
246struct dentry *debugfs_create_x16(const char *name, mode_t mode, 265struct dentry *debugfs_create_x16(const char *name, mode_t mode,
247 struct dentry *parent, u16 *value) 266 struct dentry *parent, u16 *value)
248{ 267{
@@ -250,6 +269,16 @@ struct dentry *debugfs_create_x16(const char *name, mode_t mode,
250} 269}
251EXPORT_SYMBOL_GPL(debugfs_create_x16); 270EXPORT_SYMBOL_GPL(debugfs_create_x16);
252 271
272/**
273 * debugfs_create_x32 - create a debugfs file that is used to read and write an unsigned 32-bit value
274 * @name: a pointer to a string containing the name of the file to create.
275 * @mode: the permission that the file should have
276 * @parent: a pointer to the parent dentry for this file. This should be a
277 * directory dentry if set. If this parameter is %NULL, then the
278 * file will be created in the root of the debugfs filesystem.
279 * @value: a pointer to the variable that the file should read to and write
280 * from.
281 */
253struct dentry *debugfs_create_x32(const char *name, mode_t mode, 282struct dentry *debugfs_create_x32(const char *name, mode_t mode,
254 struct dentry *parent, u32 *value) 283 struct dentry *parent, u32 *value)
255{ 284{
diff --git a/fs/direct-io.c b/fs/direct-io.c
index b5928a7b6a5a..acf0da1bd257 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -163,7 +163,7 @@ static int dio_refill_pages(struct dio *dio)
163 up_read(&current->mm->mmap_sem); 163 up_read(&current->mm->mmap_sem);
164 164
165 if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) { 165 if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) {
166 struct page *page = ZERO_PAGE(dio->curr_user_address); 166 struct page *page = ZERO_PAGE(0);
167 /* 167 /*
168 * A memory fault, but the filesystem has some outstanding 168 * A memory fault, but the filesystem has some outstanding
169 * mapped blocks. We need to use those blocks up to avoid 169 * mapped blocks. We need to use those blocks up to avoid
@@ -763,7 +763,7 @@ static void dio_zero_block(struct dio *dio, int end)
763 763
764 this_chunk_bytes = this_chunk_blocks << dio->blkbits; 764 this_chunk_bytes = this_chunk_blocks << dio->blkbits;
765 765
766 page = ZERO_PAGE(dio->curr_user_address); 766 page = ZERO_PAGE(0);
767 if (submit_page_section(dio, page, 0, this_chunk_bytes, 767 if (submit_page_section(dio, page, 0, this_chunk_bytes,
768 dio->next_block_for_io)) 768 dio->next_block_for_io))
769 return; 769 return;
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
index 1f1107237eab..768857015516 100644
--- a/fs/ecryptfs/Makefile
+++ b/fs/ecryptfs/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o 5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
6 6
7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o crypto.o keystore.o messaging.o netlink.o debug.o 7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o debug.o
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 6ac630625b70..1ae90ef2c74d 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -123,9 +123,9 @@ out:
123 return rc; 123 return rc;
124} 124}
125 125
126int ecryptfs_crypto_api_algify_cipher_name(char **algified_name, 126static int ecryptfs_crypto_api_algify_cipher_name(char **algified_name,
127 char *cipher_name, 127 char *cipher_name,
128 char *chaining_modifier) 128 char *chaining_modifier)
129{ 129{
130 int cipher_name_len = strlen(cipher_name); 130 int cipher_name_len = strlen(cipher_name);
131 int chaining_modifier_len = strlen(chaining_modifier); 131 int chaining_modifier_len = strlen(chaining_modifier);
@@ -149,7 +149,7 @@ out:
149 * ecryptfs_derive_iv 149 * ecryptfs_derive_iv
150 * @iv: destination for the derived iv vale 150 * @iv: destination for the derived iv vale
151 * @crypt_stat: Pointer to crypt_stat struct for the current inode 151 * @crypt_stat: Pointer to crypt_stat struct for the current inode
152 * @offset: Offset of the page whose's iv we are to derive 152 * @offset: Offset of the extent whose IV we are to derive
153 * 153 *
154 * Generate the initialization vector from the given root IV and page 154 * Generate the initialization vector from the given root IV and page
155 * offset. 155 * offset.
@@ -157,7 +157,7 @@ out:
157 * Returns zero on success; non-zero on error. 157 * Returns zero on success; non-zero on error.
158 */ 158 */
159static int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat, 159static int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
160 pgoff_t offset) 160 loff_t offset)
161{ 161{
162 int rc = 0; 162 int rc = 0;
163 char dst[MD5_DIGEST_SIZE]; 163 char dst[MD5_DIGEST_SIZE];
@@ -173,7 +173,7 @@ static int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
173 * hashing business. -Halcrow */ 173 * hashing business. -Halcrow */
174 memcpy(src, crypt_stat->root_iv, crypt_stat->iv_bytes); 174 memcpy(src, crypt_stat->root_iv, crypt_stat->iv_bytes);
175 memset((src + crypt_stat->iv_bytes), 0, 16); 175 memset((src + crypt_stat->iv_bytes), 0, 16);
176 snprintf((src + crypt_stat->iv_bytes), 16, "%ld", offset); 176 snprintf((src + crypt_stat->iv_bytes), 16, "%lld", offset);
177 if (unlikely(ecryptfs_verbosity > 0)) { 177 if (unlikely(ecryptfs_verbosity > 0)) {
178 ecryptfs_printk(KERN_DEBUG, "source:\n"); 178 ecryptfs_printk(KERN_DEBUG, "source:\n");
179 ecryptfs_dump_hex(src, (crypt_stat->iv_bytes + 16)); 179 ecryptfs_dump_hex(src, (crypt_stat->iv_bytes + 16));
@@ -204,6 +204,8 @@ void
204ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat) 204ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
205{ 205{
206 memset((void *)crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat)); 206 memset((void *)crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat));
207 INIT_LIST_HEAD(&crypt_stat->keysig_list);
208 mutex_init(&crypt_stat->keysig_list_mutex);
207 mutex_init(&crypt_stat->cs_mutex); 209 mutex_init(&crypt_stat->cs_mutex);
208 mutex_init(&crypt_stat->cs_tfm_mutex); 210 mutex_init(&crypt_stat->cs_tfm_mutex);
209 mutex_init(&crypt_stat->cs_hash_tfm_mutex); 211 mutex_init(&crypt_stat->cs_hash_tfm_mutex);
@@ -211,27 +213,48 @@ ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
211} 213}
212 214
213/** 215/**
214 * ecryptfs_destruct_crypt_stat 216 * ecryptfs_destroy_crypt_stat
215 * @crypt_stat: Pointer to the crypt_stat struct to initialize. 217 * @crypt_stat: Pointer to the crypt_stat struct to initialize.
216 * 218 *
217 * Releases all memory associated with a crypt_stat struct. 219 * Releases all memory associated with a crypt_stat struct.
218 */ 220 */
219void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat) 221void ecryptfs_destroy_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
220{ 222{
223 struct ecryptfs_key_sig *key_sig, *key_sig_tmp;
224
221 if (crypt_stat->tfm) 225 if (crypt_stat->tfm)
222 crypto_free_blkcipher(crypt_stat->tfm); 226 crypto_free_blkcipher(crypt_stat->tfm);
223 if (crypt_stat->hash_tfm) 227 if (crypt_stat->hash_tfm)
224 crypto_free_hash(crypt_stat->hash_tfm); 228 crypto_free_hash(crypt_stat->hash_tfm);
229 mutex_lock(&crypt_stat->keysig_list_mutex);
230 list_for_each_entry_safe(key_sig, key_sig_tmp,
231 &crypt_stat->keysig_list, crypt_stat_list) {
232 list_del(&key_sig->crypt_stat_list);
233 kmem_cache_free(ecryptfs_key_sig_cache, key_sig);
234 }
235 mutex_unlock(&crypt_stat->keysig_list_mutex);
225 memset(crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat)); 236 memset(crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat));
226} 237}
227 238
228void ecryptfs_destruct_mount_crypt_stat( 239void ecryptfs_destroy_mount_crypt_stat(
229 struct ecryptfs_mount_crypt_stat *mount_crypt_stat) 240 struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
230{ 241{
231 if (mount_crypt_stat->global_auth_tok_key) 242 struct ecryptfs_global_auth_tok *auth_tok, *auth_tok_tmp;
232 key_put(mount_crypt_stat->global_auth_tok_key); 243
233 if (mount_crypt_stat->global_key_tfm) 244 if (!(mount_crypt_stat->flags & ECRYPTFS_MOUNT_CRYPT_STAT_INITIALIZED))
234 crypto_free_blkcipher(mount_crypt_stat->global_key_tfm); 245 return;
246 mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
247 list_for_each_entry_safe(auth_tok, auth_tok_tmp,
248 &mount_crypt_stat->global_auth_tok_list,
249 mount_crypt_stat_list) {
250 list_del(&auth_tok->mount_crypt_stat_list);
251 mount_crypt_stat->num_global_auth_toks--;
252 if (auth_tok->global_auth_tok_key
253 && !(auth_tok->flags & ECRYPTFS_AUTH_TOK_INVALID))
254 key_put(auth_tok->global_auth_tok_key);
255 kmem_cache_free(ecryptfs_global_auth_tok_cache, auth_tok);
256 }
257 mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
235 memset(mount_crypt_stat, 0, sizeof(struct ecryptfs_mount_crypt_stat)); 258 memset(mount_crypt_stat, 0, sizeof(struct ecryptfs_mount_crypt_stat));
236} 259}
237 260
@@ -330,114 +353,82 @@ out:
330 return rc; 353 return rc;
331} 354}
332 355
333static void 356/**
334ecryptfs_extent_to_lwr_pg_idx_and_offset(unsigned long *lower_page_idx, 357 * ecryptfs_lower_offset_for_extent
335 int *byte_offset, 358 *
336 struct ecryptfs_crypt_stat *crypt_stat, 359 * Convert an eCryptfs page index into a lower byte offset
337 unsigned long extent_num) 360 */
361void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num,
362 struct ecryptfs_crypt_stat *crypt_stat)
338{ 363{
339 unsigned long lower_extent_num; 364 (*offset) = ((crypt_stat->extent_size
340 int extents_occupied_by_headers_at_front; 365 * crypt_stat->num_header_extents_at_front)
341 int bytes_occupied_by_headers_at_front; 366 + (crypt_stat->extent_size * extent_num));
342 int extent_offset;
343 int extents_per_page;
344
345 bytes_occupied_by_headers_at_front =
346 ( crypt_stat->header_extent_size
347 * crypt_stat->num_header_extents_at_front );
348 extents_occupied_by_headers_at_front =
349 ( bytes_occupied_by_headers_at_front
350 / crypt_stat->extent_size );
351 lower_extent_num = extents_occupied_by_headers_at_front + extent_num;
352 extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size;
353 (*lower_page_idx) = lower_extent_num / extents_per_page;
354 extent_offset = lower_extent_num % extents_per_page;
355 (*byte_offset) = extent_offset * crypt_stat->extent_size;
356 ecryptfs_printk(KERN_DEBUG, " * crypt_stat->header_extent_size = "
357 "[%d]\n", crypt_stat->header_extent_size);
358 ecryptfs_printk(KERN_DEBUG, " * crypt_stat->"
359 "num_header_extents_at_front = [%d]\n",
360 crypt_stat->num_header_extents_at_front);
361 ecryptfs_printk(KERN_DEBUG, " * extents_occupied_by_headers_at_"
362 "front = [%d]\n", extents_occupied_by_headers_at_front);
363 ecryptfs_printk(KERN_DEBUG, " * lower_extent_num = [0x%.16x]\n",
364 lower_extent_num);
365 ecryptfs_printk(KERN_DEBUG, " * extents_per_page = [%d]\n",
366 extents_per_page);
367 ecryptfs_printk(KERN_DEBUG, " * (*lower_page_idx) = [0x%.16x]\n",
368 (*lower_page_idx));
369 ecryptfs_printk(KERN_DEBUG, " * extent_offset = [%d]\n",
370 extent_offset);
371 ecryptfs_printk(KERN_DEBUG, " * (*byte_offset) = [%d]\n",
372 (*byte_offset));
373} 367}
374 368
375static int ecryptfs_write_out_page(struct ecryptfs_page_crypt_context *ctx, 369/**
376 struct page *lower_page, 370 * ecryptfs_encrypt_extent
377 struct inode *lower_inode, 371 * @enc_extent_page: Allocated page into which to encrypt the data in
378 int byte_offset_in_page, int bytes_to_write) 372 * @page
373 * @crypt_stat: crypt_stat containing cryptographic context for the
374 * encryption operation
375 * @page: Page containing plaintext data extent to encrypt
376 * @extent_offset: Page extent offset for use in generating IV
377 *
378 * Encrypts one extent of data.
379 *
380 * Return zero on success; non-zero otherwise
381 */
382static int ecryptfs_encrypt_extent(struct page *enc_extent_page,
383 struct ecryptfs_crypt_stat *crypt_stat,
384 struct page *page,
385 unsigned long extent_offset)
379{ 386{
380 int rc = 0; 387 loff_t extent_base;
388 char extent_iv[ECRYPTFS_MAX_IV_BYTES];
389 int rc;
381 390
382 if (ctx->mode == ECRYPTFS_PREPARE_COMMIT_MODE) { 391 extent_base = (((loff_t)page->index)
383 rc = ecryptfs_commit_lower_page(lower_page, lower_inode, 392 * (PAGE_CACHE_SIZE / crypt_stat->extent_size));
384 ctx->param.lower_file, 393 rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
385 byte_offset_in_page, 394 (extent_base + extent_offset));
386 bytes_to_write); 395 if (rc) {
387 if (rc) { 396 ecryptfs_printk(KERN_ERR, "Error attempting to "
388 ecryptfs_printk(KERN_ERR, "Error calling lower " 397 "derive IV for extent [0x%.16x]; "
389 "commit; rc = [%d]\n", rc); 398 "rc = [%d]\n", (extent_base + extent_offset),
390 goto out; 399 rc);
391 } 400 goto out;
392 } else {
393 rc = ecryptfs_writepage_and_release_lower_page(lower_page,
394 lower_inode,
395 ctx->param.wbc);
396 if (rc) {
397 ecryptfs_printk(KERN_ERR, "Error calling lower "
398 "writepage(); rc = [%d]\n", rc);
399 goto out;
400 }
401 } 401 }
402out: 402 if (unlikely(ecryptfs_verbosity > 0)) {
403 return rc; 403 ecryptfs_printk(KERN_DEBUG, "Encrypting extent "
404} 404 "with iv:\n");
405 405 ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes);
406static int ecryptfs_read_in_page(struct ecryptfs_page_crypt_context *ctx, 406 ecryptfs_printk(KERN_DEBUG, "First 8 bytes before "
407 struct page **lower_page, 407 "encryption:\n");
408 struct inode *lower_inode, 408 ecryptfs_dump_hex((char *)
409 unsigned long lower_page_idx, 409 (page_address(page)
410 int byte_offset_in_page) 410 + (extent_offset * crypt_stat->extent_size)),
411{ 411 8);
412 int rc = 0; 412 }
413 413 rc = ecryptfs_encrypt_page_offset(crypt_stat, enc_extent_page, 0,
414 if (ctx->mode == ECRYPTFS_PREPARE_COMMIT_MODE) { 414 page, (extent_offset
415 /* TODO: Limit this to only the data extents that are 415 * crypt_stat->extent_size),
416 * needed */ 416 crypt_stat->extent_size, extent_iv);
417 rc = ecryptfs_get_lower_page(lower_page, lower_inode, 417 if (rc < 0) {
418 ctx->param.lower_file, 418 printk(KERN_ERR "%s: Error attempting to encrypt page with "
419 lower_page_idx, 419 "page->index = [%ld], extent_offset = [%ld]; "
420 byte_offset_in_page, 420 "rc = [%d]\n", __FUNCTION__, page->index, extent_offset,
421 (PAGE_CACHE_SIZE 421 rc);
422 - byte_offset_in_page)); 422 goto out;
423 if (rc) { 423 }
424 ecryptfs_printk( 424 rc = 0;
425 KERN_ERR, "Error attempting to grab, map, " 425 if (unlikely(ecryptfs_verbosity > 0)) {
426 "and prepare_write lower page with index " 426 ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16x]; "
427 "[0x%.16x]; rc = [%d]\n", lower_page_idx, rc); 427 "rc = [%d]\n", (extent_base + extent_offset),
428 goto out; 428 rc);
429 } 429 ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
430 } else { 430 "encryption:\n");
431 *lower_page = grab_cache_page(lower_inode->i_mapping, 431 ecryptfs_dump_hex((char *)(page_address(enc_extent_page)), 8);
432 lower_page_idx);
433 if (!(*lower_page)) {
434 rc = -EINVAL;
435 ecryptfs_printk(
436 KERN_ERR, "Error attempting to grab and map "
437 "lower page with index [0x%.16x]; rc = [%d]\n",
438 lower_page_idx, rc);
439 goto out;
440 }
441 } 432 }
442out: 433out:
443 return rc; 434 return rc;
@@ -445,7 +436,9 @@ out:
445 436
446/** 437/**
447 * ecryptfs_encrypt_page 438 * ecryptfs_encrypt_page
448 * @ctx: The context of the page 439 * @page: Page mapped from the eCryptfs inode for the file; contains
440 * decrypted content that needs to be encrypted (to a temporary
441 * page; not in place) and written out to the lower file
449 * 442 *
450 * Encrypt an eCryptfs page. This is done on a per-extent basis. Note 443 * Encrypt an eCryptfs page. This is done on a per-extent basis. Note
451 * that eCryptfs pages may straddle the lower pages -- for instance, 444 * that eCryptfs pages may straddle the lower pages -- for instance,
@@ -455,128 +448,122 @@ out:
455 * file, 24K of page 0 of the lower file will be read and decrypted, 448 * file, 24K of page 0 of the lower file will be read and decrypted,
456 * and then 8K of page 1 of the lower file will be read and decrypted. 449 * and then 8K of page 1 of the lower file will be read and decrypted.
457 * 450 *
458 * The actual operations performed on each page depends on the
459 * contents of the ecryptfs_page_crypt_context struct.
460 *
461 * Returns zero on success; negative on error 451 * Returns zero on success; negative on error
462 */ 452 */
463int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx) 453int ecryptfs_encrypt_page(struct page *page)
464{ 454{
465 char extent_iv[ECRYPTFS_MAX_IV_BYTES]; 455 struct inode *ecryptfs_inode;
466 unsigned long base_extent;
467 unsigned long extent_offset = 0;
468 unsigned long lower_page_idx = 0;
469 unsigned long prior_lower_page_idx = 0;
470 struct page *lower_page;
471 struct inode *lower_inode;
472 struct ecryptfs_inode_info *inode_info;
473 struct ecryptfs_crypt_stat *crypt_stat; 456 struct ecryptfs_crypt_stat *crypt_stat;
457 char *enc_extent_virt = NULL;
458 struct page *enc_extent_page;
459 loff_t extent_offset;
474 int rc = 0; 460 int rc = 0;
475 int lower_byte_offset = 0; 461
476 int orig_byte_offset = 0; 462 ecryptfs_inode = page->mapping->host;
477 int num_extents_per_page; 463 crypt_stat =
478#define ECRYPTFS_PAGE_STATE_UNREAD 0 464 &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat);
479#define ECRYPTFS_PAGE_STATE_READ 1
480#define ECRYPTFS_PAGE_STATE_MODIFIED 2
481#define ECRYPTFS_PAGE_STATE_WRITTEN 3
482 int page_state;
483
484 lower_inode = ecryptfs_inode_to_lower(ctx->page->mapping->host);
485 inode_info = ecryptfs_inode_to_private(ctx->page->mapping->host);
486 crypt_stat = &inode_info->crypt_stat;
487 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { 465 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
488 rc = ecryptfs_copy_page_to_lower(ctx->page, lower_inode, 466 rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page,
489 ctx->param.lower_file); 467 0, PAGE_CACHE_SIZE);
490 if (rc) 468 if (rc)
491 ecryptfs_printk(KERN_ERR, "Error attempting to copy " 469 printk(KERN_ERR "%s: Error attempting to copy "
492 "page at index [0x%.16x]\n", 470 "page at index [%ld]\n", __FUNCTION__,
493 ctx->page->index); 471 page->index);
494 goto out; 472 goto out;
495 } 473 }
496 num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size; 474 enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER);
497 base_extent = (ctx->page->index * num_extents_per_page); 475 if (!enc_extent_virt) {
498 page_state = ECRYPTFS_PAGE_STATE_UNREAD; 476 rc = -ENOMEM;
499 while (extent_offset < num_extents_per_page) { 477 ecryptfs_printk(KERN_ERR, "Error allocating memory for "
500 ecryptfs_extent_to_lwr_pg_idx_and_offset( 478 "encrypted extent\n");
501 &lower_page_idx, &lower_byte_offset, crypt_stat, 479 goto out;
502 (base_extent + extent_offset)); 480 }
503 if (prior_lower_page_idx != lower_page_idx 481 enc_extent_page = virt_to_page(enc_extent_virt);
504 && page_state == ECRYPTFS_PAGE_STATE_MODIFIED) { 482 for (extent_offset = 0;
505 rc = ecryptfs_write_out_page(ctx, lower_page, 483 extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
506 lower_inode, 484 extent_offset++) {
507 orig_byte_offset, 485 loff_t offset;
508 (PAGE_CACHE_SIZE 486
509 - orig_byte_offset)); 487 rc = ecryptfs_encrypt_extent(enc_extent_page, crypt_stat, page,
510 if (rc) { 488 extent_offset);
511 ecryptfs_printk(KERN_ERR, "Error attempting "
512 "to write out page; rc = [%d]"
513 "\n", rc);
514 goto out;
515 }
516 page_state = ECRYPTFS_PAGE_STATE_WRITTEN;
517 }
518 if (page_state == ECRYPTFS_PAGE_STATE_UNREAD
519 || page_state == ECRYPTFS_PAGE_STATE_WRITTEN) {
520 rc = ecryptfs_read_in_page(ctx, &lower_page,
521 lower_inode, lower_page_idx,
522 lower_byte_offset);
523 if (rc) {
524 ecryptfs_printk(KERN_ERR, "Error attempting "
525 "to read in lower page with "
526 "index [0x%.16x]; rc = [%d]\n",
527 lower_page_idx, rc);
528 goto out;
529 }
530 orig_byte_offset = lower_byte_offset;
531 prior_lower_page_idx = lower_page_idx;
532 page_state = ECRYPTFS_PAGE_STATE_READ;
533 }
534 BUG_ON(!(page_state == ECRYPTFS_PAGE_STATE_MODIFIED
535 || page_state == ECRYPTFS_PAGE_STATE_READ));
536 rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
537 (base_extent + extent_offset));
538 if (rc) { 489 if (rc) {
539 ecryptfs_printk(KERN_ERR, "Error attempting to " 490 printk(KERN_ERR "%s: Error encrypting extent; "
540 "derive IV for extent [0x%.16x]; " 491 "rc = [%d]\n", __FUNCTION__, rc);
541 "rc = [%d]\n",
542 (base_extent + extent_offset), rc);
543 goto out; 492 goto out;
544 } 493 }
545 if (unlikely(ecryptfs_verbosity > 0)) { 494 ecryptfs_lower_offset_for_extent(
546 ecryptfs_printk(KERN_DEBUG, "Encrypting extent " 495 &offset, ((((loff_t)page->index)
547 "with iv:\n"); 496 * (PAGE_CACHE_SIZE
548 ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes); 497 / crypt_stat->extent_size))
549 ecryptfs_printk(KERN_DEBUG, "First 8 bytes before " 498 + extent_offset), crypt_stat);
550 "encryption:\n"); 499 rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt,
551 ecryptfs_dump_hex((char *) 500 offset, crypt_stat->extent_size);
552 (page_address(ctx->page) 501 if (rc) {
553 + (extent_offset 502 ecryptfs_printk(KERN_ERR, "Error attempting "
554 * crypt_stat->extent_size)), 8); 503 "to write lower page; rc = [%d]"
555 } 504 "\n", rc);
556 rc = ecryptfs_encrypt_page_offset( 505 goto out;
557 crypt_stat, lower_page, lower_byte_offset, ctx->page,
558 (extent_offset * crypt_stat->extent_size),
559 crypt_stat->extent_size, extent_iv);
560 ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16x]; "
561 "rc = [%d]\n",
562 (base_extent + extent_offset), rc);
563 if (unlikely(ecryptfs_verbosity > 0)) {
564 ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
565 "encryption:\n");
566 ecryptfs_dump_hex((char *)(page_address(lower_page)
567 + lower_byte_offset), 8);
568 } 506 }
569 page_state = ECRYPTFS_PAGE_STATE_MODIFIED;
570 extent_offset++; 507 extent_offset++;
571 } 508 }
572 BUG_ON(orig_byte_offset != 0); 509out:
573 rc = ecryptfs_write_out_page(ctx, lower_page, lower_inode, 0, 510 kfree(enc_extent_virt);
574 (lower_byte_offset 511 return rc;
575 + crypt_stat->extent_size)); 512}
513
514static int ecryptfs_decrypt_extent(struct page *page,
515 struct ecryptfs_crypt_stat *crypt_stat,
516 struct page *enc_extent_page,
517 unsigned long extent_offset)
518{
519 loff_t extent_base;
520 char extent_iv[ECRYPTFS_MAX_IV_BYTES];
521 int rc;
522
523 extent_base = (((loff_t)page->index)
524 * (PAGE_CACHE_SIZE / crypt_stat->extent_size));
525 rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
526 (extent_base + extent_offset));
576 if (rc) { 527 if (rc) {
577 ecryptfs_printk(KERN_ERR, "Error attempting to write out " 528 ecryptfs_printk(KERN_ERR, "Error attempting to "
578 "page; rc = [%d]\n", rc); 529 "derive IV for extent [0x%.16x]; "
579 goto out; 530 "rc = [%d]\n", (extent_base + extent_offset),
531 rc);
532 goto out;
533 }
534 if (unlikely(ecryptfs_verbosity > 0)) {
535 ecryptfs_printk(KERN_DEBUG, "Decrypting extent "
536 "with iv:\n");
537 ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes);
538 ecryptfs_printk(KERN_DEBUG, "First 8 bytes before "
539 "decryption:\n");
540 ecryptfs_dump_hex((char *)
541 (page_address(enc_extent_page)
542 + (extent_offset * crypt_stat->extent_size)),
543 8);
544 }
545 rc = ecryptfs_decrypt_page_offset(crypt_stat, page,
546 (extent_offset
547 * crypt_stat->extent_size),
548 enc_extent_page, 0,
549 crypt_stat->extent_size, extent_iv);
550 if (rc < 0) {
551 printk(KERN_ERR "%s: Error attempting to decrypt to page with "
552 "page->index = [%ld], extent_offset = [%ld]; "
553 "rc = [%d]\n", __FUNCTION__, page->index, extent_offset,
554 rc);
555 goto out;
556 }
557 rc = 0;
558 if (unlikely(ecryptfs_verbosity > 0)) {
559 ecryptfs_printk(KERN_DEBUG, "Decrypt extent [0x%.16x]; "
560 "rc = [%d]\n", (extent_base + extent_offset),
561 rc);
562 ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
563 "decryption:\n");
564 ecryptfs_dump_hex((char *)(page_address(page)
565 + (extent_offset
566 * crypt_stat->extent_size)), 8);
580 } 567 }
581out: 568out:
582 return rc; 569 return rc;
@@ -584,8 +571,9 @@ out:
584 571
585/** 572/**
586 * ecryptfs_decrypt_page 573 * ecryptfs_decrypt_page
587 * @file: The ecryptfs file 574 * @page: Page mapped from the eCryptfs inode for the file; data read
588 * @page: The page in ecryptfs to decrypt 575 * and decrypted from the lower file will be written into this
576 * page
589 * 577 *
590 * Decrypt an eCryptfs page. This is done on a per-extent basis. Note 578 * Decrypt an eCryptfs page. This is done on a per-extent basis. Note
591 * that eCryptfs pages may straddle the lower pages -- for instance, 579 * that eCryptfs pages may straddle the lower pages -- for instance,
@@ -597,108 +585,75 @@ out:
597 * 585 *
598 * Returns zero on success; negative on error 586 * Returns zero on success; negative on error
599 */ 587 */
600int ecryptfs_decrypt_page(struct file *file, struct page *page) 588int ecryptfs_decrypt_page(struct page *page)
601{ 589{
602 char extent_iv[ECRYPTFS_MAX_IV_BYTES]; 590 struct inode *ecryptfs_inode;
603 unsigned long base_extent;
604 unsigned long extent_offset = 0;
605 unsigned long lower_page_idx = 0;
606 unsigned long prior_lower_page_idx = 0;
607 struct page *lower_page;
608 char *lower_page_virt = NULL;
609 struct inode *lower_inode;
610 struct ecryptfs_crypt_stat *crypt_stat; 591 struct ecryptfs_crypt_stat *crypt_stat;
592 char *enc_extent_virt = NULL;
593 struct page *enc_extent_page;
594 unsigned long extent_offset;
611 int rc = 0; 595 int rc = 0;
612 int byte_offset;
613 int num_extents_per_page;
614 int page_state;
615 596
616 crypt_stat = &(ecryptfs_inode_to_private( 597 ecryptfs_inode = page->mapping->host;
617 page->mapping->host)->crypt_stat); 598 crypt_stat =
618 lower_inode = ecryptfs_inode_to_lower(page->mapping->host); 599 &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat);
619 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { 600 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
620 rc = ecryptfs_do_readpage(file, page, page->index); 601 rc = ecryptfs_read_lower_page_segment(page, page->index, 0,
602 PAGE_CACHE_SIZE,
603 ecryptfs_inode);
621 if (rc) 604 if (rc)
622 ecryptfs_printk(KERN_ERR, "Error attempting to copy " 605 printk(KERN_ERR "%s: Error attempting to copy "
623 "page at index [0x%.16x]\n", 606 "page at index [%ld]\n", __FUNCTION__,
624 page->index); 607 page->index);
625 goto out; 608 goto out;
626 } 609 }
627 num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size; 610 enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER);
628 base_extent = (page->index * num_extents_per_page); 611 if (!enc_extent_virt) {
629 lower_page_virt = kmem_cache_alloc(ecryptfs_lower_page_cache,
630 GFP_KERNEL);
631 if (!lower_page_virt) {
632 rc = -ENOMEM; 612 rc = -ENOMEM;
633 ecryptfs_printk(KERN_ERR, "Error getting page for encrypted " 613 ecryptfs_printk(KERN_ERR, "Error allocating memory for "
634 "lower page(s)\n"); 614 "encrypted extent\n");
635 goto out; 615 goto out;
636 } 616 }
637 lower_page = virt_to_page(lower_page_virt); 617 enc_extent_page = virt_to_page(enc_extent_virt);
638 page_state = ECRYPTFS_PAGE_STATE_UNREAD; 618 for (extent_offset = 0;
639 while (extent_offset < num_extents_per_page) { 619 extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
640 ecryptfs_extent_to_lwr_pg_idx_and_offset( 620 extent_offset++) {
641 &lower_page_idx, &byte_offset, crypt_stat, 621 loff_t offset;
642 (base_extent + extent_offset)); 622
643 if (prior_lower_page_idx != lower_page_idx 623 ecryptfs_lower_offset_for_extent(
644 || page_state == ECRYPTFS_PAGE_STATE_UNREAD) { 624 &offset, ((page->index * (PAGE_CACHE_SIZE
645 rc = ecryptfs_do_readpage(file, lower_page, 625 / crypt_stat->extent_size))
646 lower_page_idx); 626 + extent_offset), crypt_stat);
647 if (rc) { 627 rc = ecryptfs_read_lower(enc_extent_virt, offset,
648 ecryptfs_printk(KERN_ERR, "Error reading " 628 crypt_stat->extent_size,
649 "lower encrypted page; rc = " 629 ecryptfs_inode);
650 "[%d]\n", rc);
651 goto out;
652 }
653 prior_lower_page_idx = lower_page_idx;
654 page_state = ECRYPTFS_PAGE_STATE_READ;
655 }
656 rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
657 (base_extent + extent_offset));
658 if (rc) { 630 if (rc) {
659 ecryptfs_printk(KERN_ERR, "Error attempting to " 631 ecryptfs_printk(KERN_ERR, "Error attempting "
660 "derive IV for extent [0x%.16x]; rc = " 632 "to read lower page; rc = [%d]"
661 "[%d]\n", 633 "\n", rc);
662 (base_extent + extent_offset), rc);
663 goto out; 634 goto out;
664 } 635 }
665 if (unlikely(ecryptfs_verbosity > 0)) { 636 rc = ecryptfs_decrypt_extent(page, crypt_stat, enc_extent_page,
666 ecryptfs_printk(KERN_DEBUG, "Decrypting extent " 637 extent_offset);
667 "with iv:\n"); 638 if (rc) {
668 ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes); 639 printk(KERN_ERR "%s: Error encrypting extent; "
669 ecryptfs_printk(KERN_DEBUG, "First 8 bytes before " 640 "rc = [%d]\n", __FUNCTION__, rc);
670 "decryption:\n");
671 ecryptfs_dump_hex((lower_page_virt + byte_offset), 8);
672 }
673 rc = ecryptfs_decrypt_page_offset(crypt_stat, page,
674 (extent_offset
675 * crypt_stat->extent_size),
676 lower_page, byte_offset,
677 crypt_stat->extent_size,
678 extent_iv);
679 if (rc != crypt_stat->extent_size) {
680 ecryptfs_printk(KERN_ERR, "Error attempting to "
681 "decrypt extent [0x%.16x]\n",
682 (base_extent + extent_offset));
683 goto out; 641 goto out;
684 } 642 }
685 rc = 0;
686 if (unlikely(ecryptfs_verbosity > 0)) {
687 ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
688 "decryption:\n");
689 ecryptfs_dump_hex((char *)(page_address(page)
690 + byte_offset), 8);
691 }
692 extent_offset++; 643 extent_offset++;
693 } 644 }
694out: 645out:
695 if (lower_page_virt) 646 kfree(enc_extent_virt);
696 kmem_cache_free(ecryptfs_lower_page_cache, lower_page_virt);
697 return rc; 647 return rc;
698} 648}
699 649
700/** 650/**
701 * decrypt_scatterlist 651 * decrypt_scatterlist
652 * @crypt_stat: Cryptographic context
653 * @dest_sg: The destination scatterlist to decrypt into
654 * @src_sg: The source scatterlist to decrypt from
655 * @size: The number of bytes to decrypt
656 * @iv: The initialization vector to use for the decryption
702 * 657 *
703 * Returns the number of bytes decrypted; negative value on error 658 * Returns the number of bytes decrypted; negative value on error
704 */ 659 */
@@ -740,6 +695,13 @@ out:
740 695
741/** 696/**
742 * ecryptfs_encrypt_page_offset 697 * ecryptfs_encrypt_page_offset
698 * @crypt_stat: The cryptographic context
699 * @dst_page: The page to encrypt into
700 * @dst_offset: The offset in the page to encrypt into
701 * @src_page: The page to encrypt from
702 * @src_offset: The offset in the page to encrypt from
703 * @size: The number of bytes to encrypt
704 * @iv: The initialization vector to use for the encryption
743 * 705 *
744 * Returns the number of bytes encrypted 706 * Returns the number of bytes encrypted
745 */ 707 */
@@ -762,6 +724,13 @@ ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
762 724
763/** 725/**
764 * ecryptfs_decrypt_page_offset 726 * ecryptfs_decrypt_page_offset
727 * @crypt_stat: The cryptographic context
728 * @dst_page: The page to decrypt into
729 * @dst_offset: The offset in the page to decrypt into
730 * @src_page: The page to decrypt from
731 * @src_offset: The offset in the page to decrypt from
732 * @size: The number of bytes to decrypt
733 * @iv: The initialization vector to use for the decryption
765 * 734 *
766 * Returns the number of bytes decrypted 735 * Returns the number of bytes decrypted
767 */ 736 */
@@ -857,15 +826,17 @@ void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat)
857 crypt_stat->extent_size = ECRYPTFS_DEFAULT_EXTENT_SIZE; 826 crypt_stat->extent_size = ECRYPTFS_DEFAULT_EXTENT_SIZE;
858 set_extent_mask_and_shift(crypt_stat); 827 set_extent_mask_and_shift(crypt_stat);
859 crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES; 828 crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES;
860 if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) {
861 crypt_stat->header_extent_size =
862 ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
863 } else
864 crypt_stat->header_extent_size = PAGE_CACHE_SIZE;
865 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) 829 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
866 crypt_stat->num_header_extents_at_front = 0; 830 crypt_stat->num_header_extents_at_front = 0;
867 else 831 else {
868 crypt_stat->num_header_extents_at_front = 1; 832 if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)
833 crypt_stat->num_header_extents_at_front =
834 (ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE
835 / crypt_stat->extent_size);
836 else
837 crypt_stat->num_header_extents_at_front =
838 (PAGE_CACHE_SIZE / crypt_stat->extent_size);
839 }
869} 840}
870 841
871/** 842/**
@@ -917,6 +888,8 @@ static void ecryptfs_generate_new_key(struct ecryptfs_crypt_stat *crypt_stat)
917 888
918/** 889/**
919 * ecryptfs_copy_mount_wide_flags_to_inode_flags 890 * ecryptfs_copy_mount_wide_flags_to_inode_flags
891 * @crypt_stat: The inode's cryptographic context
892 * @mount_crypt_stat: The mount point's cryptographic context
920 * 893 *
921 * This function propagates the mount-wide flags to individual inode 894 * This function propagates the mount-wide flags to individual inode
922 * flags. 895 * flags.
@@ -931,9 +904,34 @@ static void ecryptfs_copy_mount_wide_flags_to_inode_flags(
931 crypt_stat->flags |= ECRYPTFS_VIEW_AS_ENCRYPTED; 904 crypt_stat->flags |= ECRYPTFS_VIEW_AS_ENCRYPTED;
932} 905}
933 906
907static int ecryptfs_copy_mount_wide_sigs_to_inode_sigs(
908 struct ecryptfs_crypt_stat *crypt_stat,
909 struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
910{
911 struct ecryptfs_global_auth_tok *global_auth_tok;
912 int rc = 0;
913
914 mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
915 list_for_each_entry(global_auth_tok,
916 &mount_crypt_stat->global_auth_tok_list,
917 mount_crypt_stat_list) {
918 rc = ecryptfs_add_keysig(crypt_stat, global_auth_tok->sig);
919 if (rc) {
920 printk(KERN_ERR "Error adding keysig; rc = [%d]\n", rc);
921 mutex_unlock(
922 &mount_crypt_stat->global_auth_tok_list_mutex);
923 goto out;
924 }
925 }
926 mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
927out:
928 return rc;
929}
930
934/** 931/**
935 * ecryptfs_set_default_crypt_stat_vals 932 * ecryptfs_set_default_crypt_stat_vals
936 * @crypt_stat 933 * @crypt_stat: The inode's cryptographic context
934 * @mount_crypt_stat: The mount point's cryptographic context
937 * 935 *
938 * Default values in the event that policy does not override them. 936 * Default values in the event that policy does not override them.
939 */ 937 */
@@ -953,7 +951,7 @@ static void ecryptfs_set_default_crypt_stat_vals(
953 951
954/** 952/**
955 * ecryptfs_new_file_context 953 * ecryptfs_new_file_context
956 * @ecryptfs_dentry 954 * @ecryptfs_dentry: The eCryptfs dentry
957 * 955 *
958 * If the crypto context for the file has not yet been established, 956 * If the crypto context for the file has not yet been established,
959 * this is where we do that. Establishing a new crypto context 957 * this is where we do that. Establishing a new crypto context
@@ -970,49 +968,42 @@ static void ecryptfs_set_default_crypt_stat_vals(
970 * 968 *
971 * Returns zero on success; non-zero otherwise 969 * Returns zero on success; non-zero otherwise
972 */ 970 */
973/* Associate an authentication token(s) with the file */
974int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry) 971int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry)
975{ 972{
976 int rc = 0;
977 struct ecryptfs_crypt_stat *crypt_stat = 973 struct ecryptfs_crypt_stat *crypt_stat =
978 &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; 974 &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
979 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = 975 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
980 &ecryptfs_superblock_to_private( 976 &ecryptfs_superblock_to_private(
981 ecryptfs_dentry->d_sb)->mount_crypt_stat; 977 ecryptfs_dentry->d_sb)->mount_crypt_stat;
982 int cipher_name_len; 978 int cipher_name_len;
979 int rc = 0;
983 980
984 ecryptfs_set_default_crypt_stat_vals(crypt_stat, mount_crypt_stat); 981 ecryptfs_set_default_crypt_stat_vals(crypt_stat, mount_crypt_stat);
985 /* See if there are mount crypt options */ 982 crypt_stat->flags |= (ECRYPTFS_ENCRYPTED | ECRYPTFS_KEY_VALID);
986 if (mount_crypt_stat->global_auth_tok) { 983 ecryptfs_copy_mount_wide_flags_to_inode_flags(crypt_stat,
987 ecryptfs_printk(KERN_DEBUG, "Initializing context for new " 984 mount_crypt_stat);
988 "file using mount_crypt_stat\n"); 985 rc = ecryptfs_copy_mount_wide_sigs_to_inode_sigs(crypt_stat,
989 crypt_stat->flags |= ECRYPTFS_ENCRYPTED; 986 mount_crypt_stat);
990 crypt_stat->flags |= ECRYPTFS_KEY_VALID; 987 if (rc) {
991 ecryptfs_copy_mount_wide_flags_to_inode_flags(crypt_stat, 988 printk(KERN_ERR "Error attempting to copy mount-wide key sigs "
992 mount_crypt_stat); 989 "to the inode key sigs; rc = [%d]\n", rc);
993 memcpy(crypt_stat->keysigs[crypt_stat->num_keysigs++], 990 goto out;
994 mount_crypt_stat->global_auth_tok_sig, 991 }
995 ECRYPTFS_SIG_SIZE_HEX); 992 cipher_name_len =
996 cipher_name_len = 993 strlen(mount_crypt_stat->global_default_cipher_name);
997 strlen(mount_crypt_stat->global_default_cipher_name); 994 memcpy(crypt_stat->cipher,
998 memcpy(crypt_stat->cipher, 995 mount_crypt_stat->global_default_cipher_name,
999 mount_crypt_stat->global_default_cipher_name, 996 cipher_name_len);
1000 cipher_name_len); 997 crypt_stat->cipher[cipher_name_len] = '\0';
1001 crypt_stat->cipher[cipher_name_len] = '\0'; 998 crypt_stat->key_size =
1002 crypt_stat->key_size = 999 mount_crypt_stat->global_default_cipher_key_size;
1003 mount_crypt_stat->global_default_cipher_key_size; 1000 ecryptfs_generate_new_key(crypt_stat);
1004 ecryptfs_generate_new_key(crypt_stat);
1005 } else
1006 /* We should not encounter this scenario since we
1007 * should detect lack of global_auth_tok at mount time
1008 * TODO: Applies to 0.1 release only; remove in future
1009 * release */
1010 BUG();
1011 rc = ecryptfs_init_crypt_ctx(crypt_stat); 1001 rc = ecryptfs_init_crypt_ctx(crypt_stat);
1012 if (rc) 1002 if (rc)
1013 ecryptfs_printk(KERN_ERR, "Error initializing cryptographic " 1003 ecryptfs_printk(KERN_ERR, "Error initializing cryptographic "
1014 "context for cipher [%s]: rc = [%d]\n", 1004 "context for cipher [%s]: rc = [%d]\n",
1015 crypt_stat->cipher, rc); 1005 crypt_stat->cipher, rc);
1006out:
1016 return rc; 1007 return rc;
1017} 1008}
1018 1009
@@ -1054,7 +1045,7 @@ static struct ecryptfs_flag_map_elem ecryptfs_flag_map[] = {
1054 1045
1055/** 1046/**
1056 * ecryptfs_process_flags 1047 * ecryptfs_process_flags
1057 * @crypt_stat 1048 * @crypt_stat: The cryptographic context
1058 * @page_virt: Source data to be parsed 1049 * @page_virt: Source data to be parsed
1059 * @bytes_read: Updated with the number of bytes read 1050 * @bytes_read: Updated with the number of bytes read
1060 * 1051 *
@@ -1142,7 +1133,7 @@ ecryptfs_cipher_code_str_map[] = {
1142 1133
1143/** 1134/**
1144 * ecryptfs_code_for_cipher_string 1135 * ecryptfs_code_for_cipher_string
1145 * @str: The string representing the cipher name 1136 * @crypt_stat: The cryptographic context
1146 * 1137 *
1147 * Returns zero on no match, or the cipher code on match 1138 * Returns zero on no match, or the cipher code on match
1148 */ 1139 */
@@ -1198,59 +1189,28 @@ int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code)
1198 return rc; 1189 return rc;
1199} 1190}
1200 1191
1201/** 1192int ecryptfs_read_and_validate_header_region(char *data,
1202 * ecryptfs_read_header_region 1193 struct inode *ecryptfs_inode)
1203 * @data
1204 * @dentry
1205 * @nd
1206 *
1207 * Returns zero on success; non-zero otherwise
1208 */
1209static int ecryptfs_read_header_region(char *data, struct dentry *dentry,
1210 struct vfsmount *mnt)
1211{ 1194{
1212 struct file *lower_file; 1195 struct ecryptfs_crypt_stat *crypt_stat =
1213 mm_segment_t oldfs; 1196 &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat);
1214 int rc; 1197 int rc;
1215 1198
1216 if ((rc = ecryptfs_open_lower_file(&lower_file, dentry, mnt, 1199 rc = ecryptfs_read_lower(data, 0, crypt_stat->extent_size,
1217 O_RDONLY))) { 1200 ecryptfs_inode);
1218 printk(KERN_ERR 1201 if (rc) {
1219 "Error opening lower_file to read header region\n"); 1202 printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n",
1220 goto out; 1203 __FUNCTION__, rc);
1221 }
1222 lower_file->f_pos = 0;
1223 oldfs = get_fs();
1224 set_fs(get_ds());
1225 /* For releases 0.1 and 0.2, all of the header information
1226 * fits in the first data extent-sized region. */
1227 rc = lower_file->f_op->read(lower_file, (char __user *)data,
1228 ECRYPTFS_DEFAULT_EXTENT_SIZE, &lower_file->f_pos);
1229 set_fs(oldfs);
1230 if ((rc = ecryptfs_close_lower_file(lower_file))) {
1231 printk(KERN_ERR "Error closing lower_file\n");
1232 goto out; 1204 goto out;
1233 } 1205 }
1234 rc = 0; 1206 if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) {
1235out:
1236 return rc;
1237}
1238
1239int ecryptfs_read_and_validate_header_region(char *data, struct dentry *dentry,
1240 struct vfsmount *mnt)
1241{
1242 int rc;
1243
1244 rc = ecryptfs_read_header_region(data, dentry, mnt);
1245 if (rc)
1246 goto out;
1247 if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES))
1248 rc = -EINVAL; 1207 rc = -EINVAL;
1208 ecryptfs_printk(KERN_DEBUG, "Valid marker not found\n");
1209 }
1249out: 1210out:
1250 return rc; 1211 return rc;
1251} 1212}
1252 1213
1253
1254void 1214void
1255ecryptfs_write_header_metadata(char *virt, 1215ecryptfs_write_header_metadata(char *virt,
1256 struct ecryptfs_crypt_stat *crypt_stat, 1216 struct ecryptfs_crypt_stat *crypt_stat,
@@ -1259,7 +1219,7 @@ ecryptfs_write_header_metadata(char *virt,
1259 u32 header_extent_size; 1219 u32 header_extent_size;
1260 u16 num_header_extents_at_front; 1220 u16 num_header_extents_at_front;
1261 1221
1262 header_extent_size = (u32)crypt_stat->header_extent_size; 1222 header_extent_size = (u32)crypt_stat->extent_size;
1263 num_header_extents_at_front = 1223 num_header_extents_at_front =
1264 (u16)crypt_stat->num_header_extents_at_front; 1224 (u16)crypt_stat->num_header_extents_at_front;
1265 header_extent_size = cpu_to_be32(header_extent_size); 1225 header_extent_size = cpu_to_be32(header_extent_size);
@@ -1276,9 +1236,10 @@ struct kmem_cache *ecryptfs_header_cache_2;
1276 1236
1277/** 1237/**
1278 * ecryptfs_write_headers_virt 1238 * ecryptfs_write_headers_virt
1279 * @page_virt 1239 * @page_virt: The virtual address to write the headers to
1280 * @crypt_stat 1240 * @size: Set to the number of bytes written by this function
1281 * @ecryptfs_dentry 1241 * @crypt_stat: The cryptographic context
1242 * @ecryptfs_dentry: The eCryptfs dentry
1282 * 1243 *
1283 * Format version: 1 1244 * Format version: 1
1284 * 1245 *
@@ -1332,53 +1293,50 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t *size,
1332 return rc; 1293 return rc;
1333} 1294}
1334 1295
1335static int ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat, 1296static int
1336 struct file *lower_file, 1297ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat,
1337 char *page_virt) 1298 struct dentry *ecryptfs_dentry,
1299 char *page_virt)
1338{ 1300{
1339 mm_segment_t oldfs;
1340 int current_header_page; 1301 int current_header_page;
1341 int header_pages; 1302 int header_pages;
1342 ssize_t size; 1303 int rc;
1343 int rc = 0;
1344 1304
1345 lower_file->f_pos = 0; 1305 rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, page_virt,
1346 oldfs = get_fs(); 1306 0, PAGE_CACHE_SIZE);
1347 set_fs(get_ds()); 1307 if (rc) {
1348 size = vfs_write(lower_file, (char __user *)page_virt, PAGE_CACHE_SIZE, 1308 printk(KERN_ERR "%s: Error attempting to write header "
1349 &lower_file->f_pos); 1309 "information to lower file; rc = [%d]\n", __FUNCTION__,
1350 if (size < 0) { 1310 rc);
1351 rc = (int)size;
1352 printk(KERN_ERR "Error attempting to write lower page; "
1353 "rc = [%d]\n", rc);
1354 set_fs(oldfs);
1355 goto out; 1311 goto out;
1356 } 1312 }
1357 header_pages = ((crypt_stat->header_extent_size 1313 header_pages = ((crypt_stat->extent_size
1358 * crypt_stat->num_header_extents_at_front) 1314 * crypt_stat->num_header_extents_at_front)
1359 / PAGE_CACHE_SIZE); 1315 / PAGE_CACHE_SIZE);
1360 memset(page_virt, 0, PAGE_CACHE_SIZE); 1316 memset(page_virt, 0, PAGE_CACHE_SIZE);
1361 current_header_page = 1; 1317 current_header_page = 1;
1362 while (current_header_page < header_pages) { 1318 while (current_header_page < header_pages) {
1363 size = vfs_write(lower_file, (char __user *)page_virt, 1319 loff_t offset;
1364 PAGE_CACHE_SIZE, &lower_file->f_pos); 1320
1365 if (size < 0) { 1321 offset = (((loff_t)current_header_page) << PAGE_CACHE_SHIFT);
1366 rc = (int)size; 1322 if ((rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode,
1367 printk(KERN_ERR "Error attempting to write lower page; " 1323 page_virt, offset,
1368 "rc = [%d]\n", rc); 1324 PAGE_CACHE_SIZE))) {
1369 set_fs(oldfs); 1325 printk(KERN_ERR "%s: Error attempting to write header "
1326 "information to lower file; rc = [%d]\n",
1327 __FUNCTION__, rc);
1370 goto out; 1328 goto out;
1371 } 1329 }
1372 current_header_page++; 1330 current_header_page++;
1373 } 1331 }
1374 set_fs(oldfs);
1375out: 1332out:
1376 return rc; 1333 return rc;
1377} 1334}
1378 1335
1379static int ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry, 1336static int
1380 struct ecryptfs_crypt_stat *crypt_stat, 1337ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry,
1381 char *page_virt, size_t size) 1338 struct ecryptfs_crypt_stat *crypt_stat,
1339 char *page_virt, size_t size)
1382{ 1340{
1383 int rc; 1341 int rc;
1384 1342
@@ -1389,7 +1347,7 @@ static int ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry,
1389 1347
1390/** 1348/**
1391 * ecryptfs_write_metadata 1349 * ecryptfs_write_metadata
1392 * @lower_file: The lower file struct, which was returned from dentry_open 1350 * @ecryptfs_dentry: The eCryptfs dentry
1393 * 1351 *
1394 * Write the file headers out. This will likely involve a userspace 1352 * Write the file headers out. This will likely involve a userspace
1395 * callout, in which the session key is encrypted with one or more 1353 * callout, in which the session key is encrypted with one or more
@@ -1397,22 +1355,21 @@ static int ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry,
1397 * retrieved via a prompt. Exactly what happens at this point should 1355 * retrieved via a prompt. Exactly what happens at this point should
1398 * be policy-dependent. 1356 * be policy-dependent.
1399 * 1357 *
1358 * TODO: Support header information spanning multiple pages
1359 *
1400 * Returns zero on success; non-zero on error 1360 * Returns zero on success; non-zero on error
1401 */ 1361 */
1402int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry, 1362int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1403 struct file *lower_file)
1404{ 1363{
1405 struct ecryptfs_crypt_stat *crypt_stat; 1364 struct ecryptfs_crypt_stat *crypt_stat =
1365 &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
1406 char *page_virt; 1366 char *page_virt;
1407 size_t size; 1367 size_t size = 0;
1408 int rc = 0; 1368 int rc = 0;
1409 1369
1410 crypt_stat = &ecryptfs_inode_to_private(
1411 ecryptfs_dentry->d_inode)->crypt_stat;
1412 if (likely(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { 1370 if (likely(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
1413 if (!(crypt_stat->flags & ECRYPTFS_KEY_VALID)) { 1371 if (!(crypt_stat->flags & ECRYPTFS_KEY_VALID)) {
1414 ecryptfs_printk(KERN_DEBUG, "Key is " 1372 printk(KERN_ERR "Key is invalid; bailing out\n");
1415 "invalid; bailing out\n");
1416 rc = -EINVAL; 1373 rc = -EINVAL;
1417 goto out; 1374 goto out;
1418 } 1375 }
@@ -1441,7 +1398,8 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry,
1441 crypt_stat, page_virt, 1398 crypt_stat, page_virt,
1442 size); 1399 size);
1443 else 1400 else
1444 rc = ecryptfs_write_metadata_to_contents(crypt_stat, lower_file, 1401 rc = ecryptfs_write_metadata_to_contents(crypt_stat,
1402 ecryptfs_dentry,
1445 page_virt); 1403 page_virt);
1446 if (rc) { 1404 if (rc) {
1447 printk(KERN_ERR "Error writing metadata out to lower file; " 1405 printk(KERN_ERR "Error writing metadata out to lower file; "
@@ -1464,28 +1422,28 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
1464 u32 header_extent_size; 1422 u32 header_extent_size;
1465 u16 num_header_extents_at_front; 1423 u16 num_header_extents_at_front;
1466 1424
1467 memcpy(&header_extent_size, virt, 4); 1425 memcpy(&header_extent_size, virt, sizeof(u32));
1468 header_extent_size = be32_to_cpu(header_extent_size); 1426 header_extent_size = be32_to_cpu(header_extent_size);
1469 virt += 4; 1427 virt += sizeof(u32);
1470 memcpy(&num_header_extents_at_front, virt, 2); 1428 memcpy(&num_header_extents_at_front, virt, sizeof(u16));
1471 num_header_extents_at_front = be16_to_cpu(num_header_extents_at_front); 1429 num_header_extents_at_front = be16_to_cpu(num_header_extents_at_front);
1472 crypt_stat->header_extent_size = (int)header_extent_size;
1473 crypt_stat->num_header_extents_at_front = 1430 crypt_stat->num_header_extents_at_front =
1474 (int)num_header_extents_at_front; 1431 (int)num_header_extents_at_front;
1475 (*bytes_read) = 6; 1432 (*bytes_read) = (sizeof(u32) + sizeof(u16));
1476 if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE) 1433 if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE)
1477 && ((crypt_stat->header_extent_size 1434 && ((crypt_stat->extent_size
1478 * crypt_stat->num_header_extents_at_front) 1435 * crypt_stat->num_header_extents_at_front)
1479 < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) { 1436 < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) {
1480 rc = -EINVAL; 1437 rc = -EINVAL;
1481 ecryptfs_printk(KERN_WARNING, "Invalid header extent size: " 1438 printk(KERN_WARNING "Invalid number of header extents: [%zd]\n",
1482 "[%d]\n", crypt_stat->header_extent_size); 1439 crypt_stat->num_header_extents_at_front);
1483 } 1440 }
1484 return rc; 1441 return rc;
1485} 1442}
1486 1443
1487/** 1444/**
1488 * set_default_header_data 1445 * set_default_header_data
1446 * @crypt_stat: The cryptographic context
1489 * 1447 *
1490 * For version 0 file format; this function is only for backwards 1448 * For version 0 file format; this function is only for backwards
1491 * compatibility for files created with the prior versions of 1449 * compatibility for files created with the prior versions of
@@ -1493,12 +1451,15 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
1493 */ 1451 */
1494static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat) 1452static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat)
1495{ 1453{
1496 crypt_stat->header_extent_size = 4096; 1454 crypt_stat->num_header_extents_at_front = 2;
1497 crypt_stat->num_header_extents_at_front = 1;
1498} 1455}
1499 1456
1500/** 1457/**
1501 * ecryptfs_read_headers_virt 1458 * ecryptfs_read_headers_virt
1459 * @page_virt: The virtual address into which to read the headers
1460 * @crypt_stat: The cryptographic context
1461 * @ecryptfs_dentry: The eCryptfs dentry
1462 * @validate_header_size: Whether to validate the header size while reading
1502 * 1463 *
1503 * Read/parse the header data. The header format is detailed in the 1464 * Read/parse the header data. The header format is detailed in the
1504 * comment block for the ecryptfs_write_headers_virt() function. 1465 * comment block for the ecryptfs_write_headers_virt() function.
@@ -1558,19 +1519,25 @@ out:
1558 1519
1559/** 1520/**
1560 * ecryptfs_read_xattr_region 1521 * ecryptfs_read_xattr_region
1522 * @page_virt: The vitual address into which to read the xattr data
1523 * @ecryptfs_inode: The eCryptfs inode
1561 * 1524 *
1562 * Attempts to read the crypto metadata from the extended attribute 1525 * Attempts to read the crypto metadata from the extended attribute
1563 * region of the lower file. 1526 * region of the lower file.
1527 *
1528 * Returns zero on success; non-zero on error
1564 */ 1529 */
1565int ecryptfs_read_xattr_region(char *page_virt, struct dentry *ecryptfs_dentry) 1530int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode)
1566{ 1531{
1532 struct dentry *lower_dentry =
1533 ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry;
1567 ssize_t size; 1534 ssize_t size;
1568 int rc = 0; 1535 int rc = 0;
1569 1536
1570 size = ecryptfs_getxattr(ecryptfs_dentry, ECRYPTFS_XATTR_NAME, 1537 size = ecryptfs_getxattr_lower(lower_dentry, ECRYPTFS_XATTR_NAME,
1571 page_virt, ECRYPTFS_DEFAULT_EXTENT_SIZE); 1538 page_virt, ECRYPTFS_DEFAULT_EXTENT_SIZE);
1572 if (size < 0) { 1539 if (size < 0) {
1573 printk(KERN_DEBUG "Error attempting to read the [%s] " 1540 printk(KERN_ERR "Error attempting to read the [%s] "
1574 "xattr from the lower file; return value = [%zd]\n", 1541 "xattr from the lower file; return value = [%zd]\n",
1575 ECRYPTFS_XATTR_NAME, size); 1542 ECRYPTFS_XATTR_NAME, size);
1576 rc = -EINVAL; 1543 rc = -EINVAL;
@@ -1585,7 +1552,7 @@ int ecryptfs_read_and_validate_xattr_region(char *page_virt,
1585{ 1552{
1586 int rc; 1553 int rc;
1587 1554
1588 rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_dentry); 1555 rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_dentry->d_inode);
1589 if (rc) 1556 if (rc)
1590 goto out; 1557 goto out;
1591 if (!contains_ecryptfs_marker(page_virt + ECRYPTFS_FILE_SIZE_BYTES)) { 1558 if (!contains_ecryptfs_marker(page_virt + ECRYPTFS_FILE_SIZE_BYTES)) {
@@ -1609,15 +1576,13 @@ out:
1609 * 1576 *
1610 * Returns zero if valid headers found and parsed; non-zero otherwise 1577 * Returns zero if valid headers found and parsed; non-zero otherwise
1611 */ 1578 */
1612int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry, 1579int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
1613 struct file *lower_file)
1614{ 1580{
1615 int rc = 0; 1581 int rc = 0;
1616 char *page_virt = NULL; 1582 char *page_virt = NULL;
1617 mm_segment_t oldfs; 1583 struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode;
1618 ssize_t bytes_read;
1619 struct ecryptfs_crypt_stat *crypt_stat = 1584 struct ecryptfs_crypt_stat *crypt_stat =
1620 &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; 1585 &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
1621 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = 1586 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
1622 &ecryptfs_superblock_to_private( 1587 &ecryptfs_superblock_to_private(
1623 ecryptfs_dentry->d_sb)->mount_crypt_stat; 1588 ecryptfs_dentry->d_sb)->mount_crypt_stat;
@@ -1628,27 +1593,18 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry,
1628 page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, GFP_USER); 1593 page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, GFP_USER);
1629 if (!page_virt) { 1594 if (!page_virt) {
1630 rc = -ENOMEM; 1595 rc = -ENOMEM;
1631 ecryptfs_printk(KERN_ERR, "Unable to allocate page_virt\n"); 1596 printk(KERN_ERR "%s: Unable to allocate page_virt\n",
1597 __FUNCTION__);
1632 goto out; 1598 goto out;
1633 } 1599 }
1634 lower_file->f_pos = 0; 1600 rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size,
1635 oldfs = get_fs(); 1601 ecryptfs_inode);
1636 set_fs(get_ds()); 1602 if (!rc)
1637 bytes_read = lower_file->f_op->read(lower_file, 1603 rc = ecryptfs_read_headers_virt(page_virt, crypt_stat,
1638 (char __user *)page_virt, 1604 ecryptfs_dentry,
1639 ECRYPTFS_DEFAULT_EXTENT_SIZE, 1605 ECRYPTFS_VALIDATE_HEADER_SIZE);
1640 &lower_file->f_pos);
1641 set_fs(oldfs);
1642 if (bytes_read != ECRYPTFS_DEFAULT_EXTENT_SIZE) {
1643 rc = -EINVAL;
1644 goto out;
1645 }
1646 rc = ecryptfs_read_headers_virt(page_virt, crypt_stat,
1647 ecryptfs_dentry,
1648 ECRYPTFS_VALIDATE_HEADER_SIZE);
1649 if (rc) { 1606 if (rc) {
1650 rc = ecryptfs_read_xattr_region(page_virt, 1607 rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode);
1651 ecryptfs_dentry);
1652 if (rc) { 1608 if (rc) {
1653 printk(KERN_DEBUG "Valid eCryptfs headers not found in " 1609 printk(KERN_DEBUG "Valid eCryptfs headers not found in "
1654 "file header region or xattr region\n"); 1610 "file header region or xattr region\n");
@@ -1776,7 +1732,7 @@ out:
1776} 1732}
1777 1733
1778/** 1734/**
1779 * ecryptfs_process_cipher - Perform cipher initialization. 1735 * ecryptfs_process_key_cipher - Perform key cipher initialization.
1780 * @key_tfm: Crypto context for key material, set by this function 1736 * @key_tfm: Crypto context for key material, set by this function
1781 * @cipher_name: Name of the cipher 1737 * @cipher_name: Name of the cipher
1782 * @key_size: Size of the key in bytes 1738 * @key_size: Size of the key in bytes
@@ -1785,9 +1741,9 @@ out:
1785 * should be released by other functions, such as on a superblock put 1741 * should be released by other functions, such as on a superblock put
1786 * event, regardless of whether this function succeeds for fails. 1742 * event, regardless of whether this function succeeds for fails.
1787 */ 1743 */
1788int 1744static int
1789ecryptfs_process_cipher(struct crypto_blkcipher **key_tfm, char *cipher_name, 1745ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
1790 size_t *key_size) 1746 char *cipher_name, size_t *key_size)
1791{ 1747{
1792 char dummy_key[ECRYPTFS_MAX_KEY_BYTES]; 1748 char dummy_key[ECRYPTFS_MAX_KEY_BYTES];
1793 char *full_alg_name; 1749 char *full_alg_name;
@@ -1829,3 +1785,100 @@ ecryptfs_process_cipher(struct crypto_blkcipher **key_tfm, char *cipher_name,
1829out: 1785out:
1830 return rc; 1786 return rc;
1831} 1787}
1788
1789struct kmem_cache *ecryptfs_key_tfm_cache;
1790struct list_head key_tfm_list;
1791struct mutex key_tfm_list_mutex;
1792
1793int ecryptfs_init_crypto(void)
1794{
1795 mutex_init(&key_tfm_list_mutex);
1796 INIT_LIST_HEAD(&key_tfm_list);
1797 return 0;
1798}
1799
1800int ecryptfs_destroy_crypto(void)
1801{
1802 struct ecryptfs_key_tfm *key_tfm, *key_tfm_tmp;
1803
1804 mutex_lock(&key_tfm_list_mutex);
1805 list_for_each_entry_safe(key_tfm, key_tfm_tmp, &key_tfm_list,
1806 key_tfm_list) {
1807 list_del(&key_tfm->key_tfm_list);
1808 if (key_tfm->key_tfm)
1809 crypto_free_blkcipher(key_tfm->key_tfm);
1810 kmem_cache_free(ecryptfs_key_tfm_cache, key_tfm);
1811 }
1812 mutex_unlock(&key_tfm_list_mutex);
1813 return 0;
1814}
1815
1816int
1817ecryptfs_add_new_key_tfm(struct ecryptfs_key_tfm **key_tfm, char *cipher_name,
1818 size_t key_size)
1819{
1820 struct ecryptfs_key_tfm *tmp_tfm;
1821 int rc = 0;
1822
1823 tmp_tfm = kmem_cache_alloc(ecryptfs_key_tfm_cache, GFP_KERNEL);
1824 if (key_tfm != NULL)
1825 (*key_tfm) = tmp_tfm;
1826 if (!tmp_tfm) {
1827 rc = -ENOMEM;
1828 printk(KERN_ERR "Error attempting to allocate from "
1829 "ecryptfs_key_tfm_cache\n");
1830 goto out;
1831 }
1832 mutex_init(&tmp_tfm->key_tfm_mutex);
1833 strncpy(tmp_tfm->cipher_name, cipher_name,
1834 ECRYPTFS_MAX_CIPHER_NAME_SIZE);
1835 tmp_tfm->key_size = key_size;
1836 rc = ecryptfs_process_key_cipher(&tmp_tfm->key_tfm,
1837 tmp_tfm->cipher_name,
1838 &tmp_tfm->key_size);
1839 if (rc) {
1840 printk(KERN_ERR "Error attempting to initialize key TFM "
1841 "cipher with name = [%s]; rc = [%d]\n",
1842 tmp_tfm->cipher_name, rc);
1843 kmem_cache_free(ecryptfs_key_tfm_cache, tmp_tfm);
1844 if (key_tfm != NULL)
1845 (*key_tfm) = NULL;
1846 goto out;
1847 }
1848 mutex_lock(&key_tfm_list_mutex);
1849 list_add(&tmp_tfm->key_tfm_list, &key_tfm_list);
1850 mutex_unlock(&key_tfm_list_mutex);
1851out:
1852 return rc;
1853}
1854
1855int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm,
1856 struct mutex **tfm_mutex,
1857 char *cipher_name)
1858{
1859 struct ecryptfs_key_tfm *key_tfm;
1860 int rc = 0;
1861
1862 (*tfm) = NULL;
1863 (*tfm_mutex) = NULL;
1864 mutex_lock(&key_tfm_list_mutex);
1865 list_for_each_entry(key_tfm, &key_tfm_list, key_tfm_list) {
1866 if (strcmp(key_tfm->cipher_name, cipher_name) == 0) {
1867 (*tfm) = key_tfm->key_tfm;
1868 (*tfm_mutex) = &key_tfm->key_tfm_mutex;
1869 mutex_unlock(&key_tfm_list_mutex);
1870 goto out;
1871 }
1872 }
1873 mutex_unlock(&key_tfm_list_mutex);
1874 rc = ecryptfs_add_new_key_tfm(&key_tfm, cipher_name, 0);
1875 if (rc) {
1876 printk(KERN_ERR "Error adding new key_tfm to list; rc = [%d]\n",
1877 rc);
1878 goto out;
1879 }
1880 (*tfm) = key_tfm->key_tfm;
1881 (*tfm_mutex) = &key_tfm->key_tfm_mutex;
1882out:
1883 return rc;
1884}
diff --git a/fs/ecryptfs/debug.c b/fs/ecryptfs/debug.c
index 434c7efd80f8..3d2bdf546ec6 100644
--- a/fs/ecryptfs/debug.c
+++ b/fs/ecryptfs/debug.c
@@ -38,8 +38,6 @@ void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok)
38 auth_tok); 38 auth_tok);
39 if (auth_tok->flags & ECRYPTFS_PRIVATE_KEY) { 39 if (auth_tok->flags & ECRYPTFS_PRIVATE_KEY) {
40 ecryptfs_printk(KERN_DEBUG, " * private key type\n"); 40 ecryptfs_printk(KERN_DEBUG, " * private key type\n");
41 ecryptfs_printk(KERN_DEBUG, " * (NO PRIVATE KEY SUPPORT "
42 "IN ECRYPTFS VERSION 0.1)\n");
43 } else { 41 } else {
44 ecryptfs_printk(KERN_DEBUG, " * passphrase type\n"); 42 ecryptfs_printk(KERN_DEBUG, " * passphrase type\n");
45 ecryptfs_to_hex(salt, auth_tok->token.password.salt, 43 ecryptfs_to_hex(salt, auth_tok->token.password.salt,
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 1b9dd9a96f19..ce7a5d4aec36 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -38,7 +38,7 @@
38/* Version verification for shared data structures w/ userspace */ 38/* Version verification for shared data structures w/ userspace */
39#define ECRYPTFS_VERSION_MAJOR 0x00 39#define ECRYPTFS_VERSION_MAJOR 0x00
40#define ECRYPTFS_VERSION_MINOR 0x04 40#define ECRYPTFS_VERSION_MINOR 0x04
41#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x02 41#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x03
42/* These flags indicate which features are supported by the kernel 42/* These flags indicate which features are supported by the kernel
43 * module; userspace tools such as the mount helper read 43 * module; userspace tools such as the mount helper read
44 * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine 44 * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine
@@ -48,10 +48,12 @@
48#define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004 48#define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004
49#define ECRYPTFS_VERSIONING_POLICY 0x00000008 49#define ECRYPTFS_VERSIONING_POLICY 0x00000008
50#define ECRYPTFS_VERSIONING_XATTR 0x00000010 50#define ECRYPTFS_VERSIONING_XATTR 0x00000010
51#define ECRYPTFS_VERSIONING_MULTKEY 0x00000020
51#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ 52#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \
52 | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \ 53 | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \
53 | ECRYPTFS_VERSIONING_PUBKEY \ 54 | ECRYPTFS_VERSIONING_PUBKEY \
54 | ECRYPTFS_VERSIONING_XATTR) 55 | ECRYPTFS_VERSIONING_XATTR \
56 | ECRYPTFS_VERSIONING_MULTKEY)
55#define ECRYPTFS_MAX_PASSWORD_LENGTH 64 57#define ECRYPTFS_MAX_PASSWORD_LENGTH 64
56#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH 58#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH
57#define ECRYPTFS_SALT_SIZE 8 59#define ECRYPTFS_SALT_SIZE 8
@@ -65,8 +67,7 @@
65#define ECRYPTFS_MAX_KEY_BYTES 64 67#define ECRYPTFS_MAX_KEY_BYTES 64
66#define ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 512 68#define ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 512
67#define ECRYPTFS_DEFAULT_IV_BYTES 16 69#define ECRYPTFS_DEFAULT_IV_BYTES 16
68#define ECRYPTFS_FILE_VERSION 0x02 70#define ECRYPTFS_FILE_VERSION 0x03
69#define ECRYPTFS_DEFAULT_HEADER_EXTENT_SIZE 8192
70#define ECRYPTFS_DEFAULT_EXTENT_SIZE 4096 71#define ECRYPTFS_DEFAULT_EXTENT_SIZE 4096
71#define ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE 8192 72#define ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE 8192
72#define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32 73#define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32
@@ -144,6 +145,7 @@ struct ecryptfs_private_key {
144struct ecryptfs_auth_tok { 145struct ecryptfs_auth_tok {
145 u16 version; /* 8-bit major and 8-bit minor */ 146 u16 version; /* 8-bit major and 8-bit minor */
146 u16 token_type; 147 u16 token_type;
148#define ECRYPTFS_ENCRYPT_ONLY 0x00000001
147 u32 flags; 149 u32 flags;
148 struct ecryptfs_session_key session_key; 150 struct ecryptfs_session_key session_key;
149 u8 reserved[32]; 151 u8 reserved[32];
@@ -194,12 +196,11 @@ ecryptfs_get_key_payload_data(struct key *key)
194#define ECRYPTFS_MAX_KEYSET_SIZE 1024 196#define ECRYPTFS_MAX_KEYSET_SIZE 1024
195#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32 197#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32
196#define ECRYPTFS_MAX_NUM_ENC_KEYS 64 198#define ECRYPTFS_MAX_NUM_ENC_KEYS 64
197#define ECRYPTFS_MAX_NUM_KEYSIGS 2 /* TODO: Make this a linked list */
198#define ECRYPTFS_MAX_IV_BYTES 16 /* 128 bits */ 199#define ECRYPTFS_MAX_IV_BYTES 16 /* 128 bits */
199#define ECRYPTFS_SALT_BYTES 2 200#define ECRYPTFS_SALT_BYTES 2
200#define MAGIC_ECRYPTFS_MARKER 0x3c81b7f5 201#define MAGIC_ECRYPTFS_MARKER 0x3c81b7f5
201#define MAGIC_ECRYPTFS_MARKER_SIZE_BYTES 8 /* 4*2 */ 202#define MAGIC_ECRYPTFS_MARKER_SIZE_BYTES 8 /* 4*2 */
202#define ECRYPTFS_FILE_SIZE_BYTES 8 203#define ECRYPTFS_FILE_SIZE_BYTES (sizeof(u64))
203#define ECRYPTFS_DEFAULT_CIPHER "aes" 204#define ECRYPTFS_DEFAULT_CIPHER "aes"
204#define ECRYPTFS_DEFAULT_KEY_BYTES 16 205#define ECRYPTFS_DEFAULT_KEY_BYTES 16
205#define ECRYPTFS_DEFAULT_HASH "md5" 206#define ECRYPTFS_DEFAULT_HASH "md5"
@@ -212,6 +213,11 @@ ecryptfs_get_key_payload_data(struct key *key)
212#define ECRYPTFS_TAG_67_PACKET_TYPE 0x43 213#define ECRYPTFS_TAG_67_PACKET_TYPE 0x43
213#define MD5_DIGEST_SIZE 16 214#define MD5_DIGEST_SIZE 16
214 215
216struct ecryptfs_key_sig {
217 struct list_head crypt_stat_list;
218 char keysig[ECRYPTFS_SIG_SIZE_HEX];
219};
220
215/** 221/**
216 * This is the primary struct associated with each encrypted file. 222 * This is the primary struct associated with each encrypted file.
217 * 223 *
@@ -231,8 +237,6 @@ struct ecryptfs_crypt_stat {
231 u32 flags; 237 u32 flags;
232 unsigned int file_version; 238 unsigned int file_version;
233 size_t iv_bytes; 239 size_t iv_bytes;
234 size_t num_keysigs;
235 size_t header_extent_size;
236 size_t num_header_extents_at_front; 240 size_t num_header_extents_at_front;
237 size_t extent_size; /* Data extent size; default is 4096 */ 241 size_t extent_size; /* Data extent size; default is 4096 */
238 size_t key_size; 242 size_t key_size;
@@ -245,7 +249,8 @@ struct ecryptfs_crypt_stat {
245 unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; 249 unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
246 unsigned char key[ECRYPTFS_MAX_KEY_BYTES]; 250 unsigned char key[ECRYPTFS_MAX_KEY_BYTES];
247 unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES]; 251 unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES];
248 unsigned char keysigs[ECRYPTFS_MAX_NUM_KEYSIGS][ECRYPTFS_SIG_SIZE_HEX]; 252 struct list_head keysig_list;
253 struct mutex keysig_list_mutex;
249 struct mutex cs_tfm_mutex; 254 struct mutex cs_tfm_mutex;
250 struct mutex cs_hash_tfm_mutex; 255 struct mutex cs_hash_tfm_mutex;
251 struct mutex cs_mutex; 256 struct mutex cs_mutex;
@@ -255,6 +260,8 @@ struct ecryptfs_crypt_stat {
255struct ecryptfs_inode_info { 260struct ecryptfs_inode_info {
256 struct inode vfs_inode; 261 struct inode vfs_inode;
257 struct inode *wii_inode; 262 struct inode *wii_inode;
263 struct file *lower_file;
264 struct mutex lower_file_mutex;
258 struct ecryptfs_crypt_stat crypt_stat; 265 struct ecryptfs_crypt_stat crypt_stat;
259}; 266};
260 267
@@ -266,6 +273,59 @@ struct ecryptfs_dentry_info {
266}; 273};
267 274
268/** 275/**
276 * ecryptfs_global_auth_tok - A key used to encrypt all new files under the mountpoint
277 * @flags: Status flags
278 * @mount_crypt_stat_list: These auth_toks hang off the mount-wide
279 * cryptographic context. Every time a new
280 * inode comes into existence, eCryptfs copies
281 * the auth_toks on that list to the set of
282 * auth_toks on the inode's crypt_stat
283 * @global_auth_tok_key: The key from the user's keyring for the sig
284 * @global_auth_tok: The key contents
285 * @sig: The key identifier
286 *
287 * ecryptfs_global_auth_tok structs refer to authentication token keys
288 * in the user keyring that apply to newly created files. A list of
289 * these objects hangs off of the mount_crypt_stat struct for any
290 * given eCryptfs mount. This struct maintains a reference to both the
291 * key contents and the key itself so that the key can be put on
292 * unmount.
293 */
294struct ecryptfs_global_auth_tok {
295#define ECRYPTFS_AUTH_TOK_INVALID 0x00000001
296 u32 flags;
297 struct list_head mount_crypt_stat_list;
298 struct key *global_auth_tok_key;
299 struct ecryptfs_auth_tok *global_auth_tok;
300 unsigned char sig[ECRYPTFS_SIG_SIZE_HEX + 1];
301};
302
303/**
304 * ecryptfs_key_tfm - Persistent key tfm
305 * @key_tfm: crypto API handle to the key
306 * @key_size: Key size in bytes
307 * @key_tfm_mutex: Mutex to ensure only one operation in eCryptfs is
308 * using the persistent TFM at any point in time
309 * @key_tfm_list: Handle to hang this off the module-wide TFM list
310 * @cipher_name: String name for the cipher for this TFM
311 *
312 * Typically, eCryptfs will use the same ciphers repeatedly throughout
313 * the course of its operations. In order to avoid unnecessarily
314 * destroying and initializing the same cipher repeatedly, eCryptfs
315 * keeps a list of crypto API contexts around to use when needed.
316 */
317struct ecryptfs_key_tfm {
318 struct crypto_blkcipher *key_tfm;
319 size_t key_size;
320 struct mutex key_tfm_mutex;
321 struct list_head key_tfm_list;
322 unsigned char cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
323};
324
325extern struct list_head key_tfm_list;
326extern struct mutex key_tfm_list_mutex;
327
328/**
269 * This struct is to enable a mount-wide passphrase/salt combo. This 329 * This struct is to enable a mount-wide passphrase/salt combo. This
270 * is more or less a stopgap to provide similar functionality to other 330 * is more or less a stopgap to provide similar functionality to other
271 * crypto filesystems like EncFS or CFS until full policy support is 331 * crypto filesystems like EncFS or CFS until full policy support is
@@ -276,15 +336,14 @@ struct ecryptfs_mount_crypt_stat {
276#define ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED 0x00000001 336#define ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED 0x00000001
277#define ECRYPTFS_XATTR_METADATA_ENABLED 0x00000002 337#define ECRYPTFS_XATTR_METADATA_ENABLED 0x00000002
278#define ECRYPTFS_ENCRYPTED_VIEW_ENABLED 0x00000004 338#define ECRYPTFS_ENCRYPTED_VIEW_ENABLED 0x00000004
339#define ECRYPTFS_MOUNT_CRYPT_STAT_INITIALIZED 0x00000008
279 u32 flags; 340 u32 flags;
280 struct ecryptfs_auth_tok *global_auth_tok; 341 struct list_head global_auth_tok_list;
281 struct key *global_auth_tok_key; 342 struct mutex global_auth_tok_list_mutex;
343 size_t num_global_auth_toks;
282 size_t global_default_cipher_key_size; 344 size_t global_default_cipher_key_size;
283 struct crypto_blkcipher *global_key_tfm;
284 struct mutex global_key_tfm_mutex;
285 unsigned char global_default_cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE 345 unsigned char global_default_cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE
286 + 1]; 346 + 1];
287 unsigned char global_auth_tok_sig[ECRYPTFS_SIG_SIZE_HEX + 1];
288}; 347};
289 348
290/* superblock private data. */ 349/* superblock private data. */
@@ -468,6 +527,9 @@ extern struct kmem_cache *ecryptfs_header_cache_2;
468extern struct kmem_cache *ecryptfs_xattr_cache; 527extern struct kmem_cache *ecryptfs_xattr_cache;
469extern struct kmem_cache *ecryptfs_lower_page_cache; 528extern struct kmem_cache *ecryptfs_lower_page_cache;
470extern struct kmem_cache *ecryptfs_key_record_cache; 529extern struct kmem_cache *ecryptfs_key_record_cache;
530extern struct kmem_cache *ecryptfs_key_sig_cache;
531extern struct kmem_cache *ecryptfs_global_auth_tok_cache;
532extern struct kmem_cache *ecryptfs_key_tfm_cache;
471 533
472int ecryptfs_interpose(struct dentry *hidden_dentry, 534int ecryptfs_interpose(struct dentry *hidden_dentry,
473 struct dentry *this_dentry, struct super_block *sb, 535 struct dentry *this_dentry, struct super_block *sb,
@@ -486,44 +548,18 @@ int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg,
486int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat); 548int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat);
487void ecryptfs_rotate_iv(unsigned char *iv); 549void ecryptfs_rotate_iv(unsigned char *iv);
488void ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat); 550void ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat);
489void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat); 551void ecryptfs_destroy_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat);
490void ecryptfs_destruct_mount_crypt_stat( 552void ecryptfs_destroy_mount_crypt_stat(
491 struct ecryptfs_mount_crypt_stat *mount_crypt_stat); 553 struct ecryptfs_mount_crypt_stat *mount_crypt_stat);
492int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat); 554int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat);
493int ecryptfs_crypto_api_algify_cipher_name(char **algified_name, 555int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode);
494 char *cipher_name, 556int ecryptfs_encrypt_page(struct page *page);
495 char *chaining_modifier); 557int ecryptfs_decrypt_page(struct page *page);
496#define ECRYPTFS_LOWER_I_MUTEX_NOT_HELD 0 558int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry);
497#define ECRYPTFS_LOWER_I_MUTEX_HELD 1 559int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry);
498int ecryptfs_write_inode_size_to_metadata(struct file *lower_file,
499 struct inode *lower_inode,
500 struct inode *inode,
501 struct dentry *ecryptfs_dentry,
502 int lower_i_mutex_held);
503int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode,
504 struct file *lower_file,
505 unsigned long lower_page_index, int byte_offset,
506 int region_bytes);
507int
508ecryptfs_commit_lower_page(struct page *lower_page, struct inode *lower_inode,
509 struct file *lower_file, int byte_offset,
510 int region_size);
511int ecryptfs_copy_page_to_lower(struct page *page, struct inode *lower_inode,
512 struct file *lower_file);
513int ecryptfs_do_readpage(struct file *file, struct page *page,
514 pgoff_t lower_page_index);
515int ecryptfs_writepage_and_release_lower_page(struct page *lower_page,
516 struct inode *lower_inode,
517 struct writeback_control *wbc);
518int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx);
519int ecryptfs_decrypt_page(struct file *file, struct page *page);
520int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry,
521 struct file *lower_file);
522int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry,
523 struct file *lower_file);
524int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry); 560int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry);
525int ecryptfs_read_and_validate_header_region(char *data, struct dentry *dentry, 561int ecryptfs_read_and_validate_header_region(char *data,
526 struct vfsmount *mnt); 562 struct inode *ecryptfs_inode);
527int ecryptfs_read_and_validate_xattr_region(char *page_virt, 563int ecryptfs_read_and_validate_xattr_region(char *page_virt,
528 struct dentry *ecryptfs_dentry); 564 struct dentry *ecryptfs_dentry);
529u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat); 565u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat);
@@ -533,27 +569,22 @@ int ecryptfs_generate_key_packet_set(char *dest_base,
533 struct ecryptfs_crypt_stat *crypt_stat, 569 struct ecryptfs_crypt_stat *crypt_stat,
534 struct dentry *ecryptfs_dentry, 570 struct dentry *ecryptfs_dentry,
535 size_t *len, size_t max); 571 size_t *len, size_t max);
536int process_request_key_err(long err_code);
537int 572int
538ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, 573ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
539 unsigned char *src, struct dentry *ecryptfs_dentry); 574 unsigned char *src, struct dentry *ecryptfs_dentry);
540int ecryptfs_truncate(struct dentry *dentry, loff_t new_length); 575int ecryptfs_truncate(struct dentry *dentry, loff_t new_length);
541int
542ecryptfs_process_cipher(struct crypto_blkcipher **key_tfm, char *cipher_name,
543 size_t *key_size);
544int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode); 576int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode);
545int ecryptfs_inode_set(struct inode *inode, void *lower_inode); 577int ecryptfs_inode_set(struct inode *inode, void *lower_inode);
546void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode); 578void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode);
547int ecryptfs_open_lower_file(struct file **lower_file,
548 struct dentry *lower_dentry,
549 struct vfsmount *lower_mnt, int flags);
550int ecryptfs_close_lower_file(struct file *lower_file);
551ssize_t ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value, 579ssize_t ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value,
552 size_t size); 580 size_t size);
581ssize_t
582ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name,
583 void *value, size_t size);
553int 584int
554ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, 585ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
555 size_t size, int flags); 586 size_t size, int flags);
556int ecryptfs_read_xattr_region(char *page_virt, struct dentry *ecryptfs_dentry); 587int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode);
557int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid); 588int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid);
558int ecryptfs_process_quit(uid_t uid, pid_t pid); 589int ecryptfs_process_quit(uid_t uid, pid_t pid);
559int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid, 590int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid,
@@ -580,7 +611,43 @@ void
580ecryptfs_write_header_metadata(char *virt, 611ecryptfs_write_header_metadata(char *virt,
581 struct ecryptfs_crypt_stat *crypt_stat, 612 struct ecryptfs_crypt_stat *crypt_stat,
582 size_t *written); 613 size_t *written);
614int ecryptfs_add_keysig(struct ecryptfs_crypt_stat *crypt_stat, char *sig);
615int
616ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
617 char *sig);
618int ecryptfs_get_global_auth_tok_for_sig(
619 struct ecryptfs_global_auth_tok **global_auth_tok,
620 struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *sig);
621int
622ecryptfs_add_new_key_tfm(struct ecryptfs_key_tfm **key_tfm, char *cipher_name,
623 size_t key_size);
624int ecryptfs_init_crypto(void);
625int ecryptfs_destroy_crypto(void);
626int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm,
627 struct mutex **tfm_mutex,
628 char *cipher_name);
629int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
630 struct ecryptfs_auth_tok **auth_tok,
631 char *sig);
583int ecryptfs_write_zeros(struct file *file, pgoff_t index, int start, 632int ecryptfs_write_zeros(struct file *file, pgoff_t index, int start,
584 int num_zeros); 633 int num_zeros);
634void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num,
635 struct ecryptfs_crypt_stat *crypt_stat);
636int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
637 loff_t offset, size_t size);
638int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode,
639 struct page *page_for_lower,
640 size_t offset_in_page, size_t size);
641int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
642 size_t size);
643int ecryptfs_read_lower(char *data, loff_t offset, size_t size,
644 struct inode *ecryptfs_inode);
645int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
646 pgoff_t page_index,
647 size_t offset_in_page, size_t size,
648 struct inode *ecryptfs_inode);
649int ecryptfs_read(char *data, loff_t offset, size_t size,
650 struct file *ecryptfs_file);
651struct page *ecryptfs_get_locked_page(struct file *file, loff_t index);
585 652
586#endif /* #ifndef ECRYPTFS_KERNEL_H */ 653#endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 94f456fe4d9b..c98c4690a771 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -141,34 +141,6 @@ retry:
141 141
142struct kmem_cache *ecryptfs_file_info_cache; 142struct kmem_cache *ecryptfs_file_info_cache;
143 143
144int ecryptfs_open_lower_file(struct file **lower_file,
145 struct dentry *lower_dentry,
146 struct vfsmount *lower_mnt, int flags)
147{
148 int rc = 0;
149
150 flags |= O_LARGEFILE;
151 dget(lower_dentry);
152 mntget(lower_mnt);
153 *lower_file = dentry_open(lower_dentry, lower_mnt, flags);
154 if (IS_ERR(*lower_file)) {
155 printk(KERN_ERR "Error opening lower file for lower_dentry "
156 "[0x%p], lower_mnt [0x%p], and flags [0x%x]\n",
157 lower_dentry, lower_mnt, flags);
158 rc = PTR_ERR(*lower_file);
159 *lower_file = NULL;
160 goto out;
161 }
162out:
163 return rc;
164}
165
166int ecryptfs_close_lower_file(struct file *lower_file)
167{
168 fput(lower_file);
169 return 0;
170}
171
172/** 144/**
173 * ecryptfs_open 145 * ecryptfs_open
174 * @inode: inode speciying file to open 146 * @inode: inode speciying file to open
@@ -187,11 +159,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
187 /* Private value of ecryptfs_dentry allocated in 159 /* Private value of ecryptfs_dentry allocated in
188 * ecryptfs_lookup() */ 160 * ecryptfs_lookup() */
189 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); 161 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
190 struct inode *lower_inode = NULL;
191 struct file *lower_file = NULL;
192 struct vfsmount *lower_mnt;
193 struct ecryptfs_file_info *file_info; 162 struct ecryptfs_file_info *file_info;
194 int lower_flags;
195 163
196 mount_crypt_stat = &ecryptfs_superblock_to_private( 164 mount_crypt_stat = &ecryptfs_superblock_to_private(
197 ecryptfs_dentry->d_sb)->mount_crypt_stat; 165 ecryptfs_dentry->d_sb)->mount_crypt_stat;
@@ -219,25 +187,12 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
219 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) { 187 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) {
220 ecryptfs_printk(KERN_DEBUG, "Setting flags for stat...\n"); 188 ecryptfs_printk(KERN_DEBUG, "Setting flags for stat...\n");
221 /* Policy code enabled in future release */ 189 /* Policy code enabled in future release */
222 crypt_stat->flags |= ECRYPTFS_POLICY_APPLIED; 190 crypt_stat->flags |= (ECRYPTFS_POLICY_APPLIED
223 crypt_stat->flags |= ECRYPTFS_ENCRYPTED; 191 | ECRYPTFS_ENCRYPTED);
224 } 192 }
225 mutex_unlock(&crypt_stat->cs_mutex); 193 mutex_unlock(&crypt_stat->cs_mutex);
226 lower_flags = file->f_flags; 194 ecryptfs_set_file_lower(
227 if ((lower_flags & O_ACCMODE) == O_WRONLY) 195 file, ecryptfs_inode_to_private(inode)->lower_file);
228 lower_flags = (lower_flags & O_ACCMODE) | O_RDWR;
229 if (file->f_flags & O_APPEND)
230 lower_flags &= ~O_APPEND;
231 lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
232 /* Corresponding fput() in ecryptfs_release() */
233 if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry, lower_mnt,
234 lower_flags))) {
235 ecryptfs_printk(KERN_ERR, "Error opening lower file\n");
236 goto out_puts;
237 }
238 ecryptfs_set_file_lower(file, lower_file);
239 /* Isn't this check the same as the one in lookup? */
240 lower_inode = lower_dentry->d_inode;
241 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { 196 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
242 ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); 197 ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
243 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); 198 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
@@ -247,7 +202,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
247 mutex_lock(&crypt_stat->cs_mutex); 202 mutex_lock(&crypt_stat->cs_mutex);
248 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED) 203 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)
249 || !(crypt_stat->flags & ECRYPTFS_KEY_VALID)) { 204 || !(crypt_stat->flags & ECRYPTFS_KEY_VALID)) {
250 rc = ecryptfs_read_metadata(ecryptfs_dentry, lower_file); 205 rc = ecryptfs_read_metadata(ecryptfs_dentry);
251 if (rc) { 206 if (rc) {
252 ecryptfs_printk(KERN_DEBUG, 207 ecryptfs_printk(KERN_DEBUG,
253 "Valid headers not found\n"); 208 "Valid headers not found\n");
@@ -259,7 +214,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
259 "and plaintext passthrough mode is not " 214 "and plaintext passthrough mode is not "
260 "enabled; returning -EIO\n"); 215 "enabled; returning -EIO\n");
261 mutex_unlock(&crypt_stat->cs_mutex); 216 mutex_unlock(&crypt_stat->cs_mutex);
262 goto out_puts; 217 goto out_free;
263 } 218 }
264 rc = 0; 219 rc = 0;
265 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); 220 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
@@ -271,11 +226,8 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
271 ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = [0x%.16x] " 226 ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = [0x%.16x] "
272 "size: [0x%.16x]\n", inode, inode->i_ino, 227 "size: [0x%.16x]\n", inode, inode->i_ino,
273 i_size_read(inode)); 228 i_size_read(inode));
274 ecryptfs_set_file_lower(file, lower_file);
275 goto out; 229 goto out;
276out_puts: 230out_free:
277 mntput(lower_mnt);
278 dput(lower_dentry);
279 kmem_cache_free(ecryptfs_file_info_cache, 231 kmem_cache_free(ecryptfs_file_info_cache,
280 ecryptfs_file_to_private(file)); 232 ecryptfs_file_to_private(file));
281out: 233out:
@@ -295,19 +247,9 @@ static int ecryptfs_flush(struct file *file, fl_owner_t td)
295 247
296static int ecryptfs_release(struct inode *inode, struct file *file) 248static int ecryptfs_release(struct inode *inode, struct file *file)
297{ 249{
298 struct file *lower_file = ecryptfs_file_to_lower(file); 250 kmem_cache_free(ecryptfs_file_info_cache,
299 struct ecryptfs_file_info *file_info = ecryptfs_file_to_private(file); 251 ecryptfs_file_to_private(file));
300 struct inode *lower_inode = ecryptfs_inode_to_lower(inode); 252 return 0;
301 int rc;
302
303 if ((rc = ecryptfs_close_lower_file(lower_file))) {
304 printk(KERN_ERR "Error closing lower_file\n");
305 goto out;
306 }
307 inode->i_blocks = lower_inode->i_blocks;
308 kmem_cache_free(ecryptfs_file_info_cache, file_info);
309out:
310 return rc;
311} 253}
312 254
313static int 255static int
@@ -338,21 +280,6 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
338 return rc; 280 return rc;
339} 281}
340 282
341static ssize_t ecryptfs_splice_read(struct file *file, loff_t * ppos,
342 struct pipe_inode_info *pipe, size_t count,
343 unsigned int flags)
344{
345 struct file *lower_file = NULL;
346 int rc = -EINVAL;
347
348 lower_file = ecryptfs_file_to_lower(file);
349 if (lower_file->f_op && lower_file->f_op->splice_read)
350 rc = lower_file->f_op->splice_read(lower_file, ppos, pipe,
351 count, flags);
352
353 return rc;
354}
355
356static int ecryptfs_ioctl(struct inode *inode, struct file *file, 283static int ecryptfs_ioctl(struct inode *inode, struct file *file,
357 unsigned int cmd, unsigned long arg); 284 unsigned int cmd, unsigned long arg);
358 285
@@ -365,7 +292,7 @@ const struct file_operations ecryptfs_dir_fops = {
365 .release = ecryptfs_release, 292 .release = ecryptfs_release,
366 .fsync = ecryptfs_fsync, 293 .fsync = ecryptfs_fsync,
367 .fasync = ecryptfs_fasync, 294 .fasync = ecryptfs_fasync,
368 .splice_read = ecryptfs_splice_read, 295 .splice_read = generic_file_splice_read,
369}; 296};
370 297
371const struct file_operations ecryptfs_main_fops = { 298const struct file_operations ecryptfs_main_fops = {
@@ -382,7 +309,7 @@ const struct file_operations ecryptfs_main_fops = {
382 .release = ecryptfs_release, 309 .release = ecryptfs_release,
383 .fsync = ecryptfs_fsync, 310 .fsync = ecryptfs_fsync,
384 .fasync = ecryptfs_fasync, 311 .fasync = ecryptfs_fasync,
385 .splice_read = ecryptfs_splice_read, 312 .splice_read = generic_file_splice_read,
386}; 313};
387 314
388static int 315static int
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 131954b3fb98..5701f816faf4 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -119,10 +119,23 @@ ecryptfs_do_create(struct inode *directory_inode,
119 } 119 }
120 rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode, 120 rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode,
121 ecryptfs_dentry, mode, nd); 121 ecryptfs_dentry, mode, nd);
122 if (unlikely(rc)) { 122 if (rc) {
123 ecryptfs_printk(KERN_ERR, 123 struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode;
124 "Failure to create underlying file\n"); 124 struct ecryptfs_inode_info *inode_info =
125 goto out_lock; 125 ecryptfs_inode_to_private(ecryptfs_inode);
126
127 printk(KERN_WARNING "%s: Error creating underlying file; "
128 "rc = [%d]; checking for existing\n", __FUNCTION__, rc);
129 if (inode_info) {
130 mutex_lock(&inode_info->lower_file_mutex);
131 if (!inode_info->lower_file) {
132 mutex_unlock(&inode_info->lower_file_mutex);
133 printk(KERN_ERR "%s: Failure to set underlying "
134 "file; rc = [%d]\n", __FUNCTION__, rc);
135 goto out_lock;
136 }
137 mutex_unlock(&inode_info->lower_file_mutex);
138 }
126 } 139 }
127 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, 140 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
128 directory_inode->i_sb, 0); 141 directory_inode->i_sb, 0);
@@ -140,39 +153,30 @@ out:
140 153
141/** 154/**
142 * grow_file 155 * grow_file
143 * @ecryptfs_dentry: the ecryptfs dentry 156 * @ecryptfs_dentry: the eCryptfs dentry
144 * @lower_file: The lower file
145 * @inode: The ecryptfs inode
146 * @lower_inode: The lower inode
147 * 157 *
148 * This is the code which will grow the file to its correct size. 158 * This is the code which will grow the file to its correct size.
149 */ 159 */
150static int grow_file(struct dentry *ecryptfs_dentry, struct file *lower_file, 160static int grow_file(struct dentry *ecryptfs_dentry)
151 struct inode *inode, struct inode *lower_inode)
152{ 161{
153 int rc = 0; 162 struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode;
154 struct file fake_file; 163 struct file fake_file;
155 struct ecryptfs_file_info tmp_file_info; 164 struct ecryptfs_file_info tmp_file_info;
165 char zero_virt[] = { 0x00 };
166 int rc = 0;
156 167
157 memset(&fake_file, 0, sizeof(fake_file)); 168 memset(&fake_file, 0, sizeof(fake_file));
158 fake_file.f_path.dentry = ecryptfs_dentry; 169 fake_file.f_path.dentry = ecryptfs_dentry;
159 memset(&tmp_file_info, 0, sizeof(tmp_file_info)); 170 memset(&tmp_file_info, 0, sizeof(tmp_file_info));
160 ecryptfs_set_file_private(&fake_file, &tmp_file_info); 171 ecryptfs_set_file_private(&fake_file, &tmp_file_info);
161 ecryptfs_set_file_lower(&fake_file, lower_file); 172 ecryptfs_set_file_lower(
162 rc = ecryptfs_fill_zeros(&fake_file, 1); 173 &fake_file,
163 if (rc) { 174 ecryptfs_inode_to_private(ecryptfs_inode)->lower_file);
164 ecryptfs_inode_to_private(inode)->crypt_stat.flags |= 175 rc = ecryptfs_write(&fake_file, zero_virt, 0, 1);
165 ECRYPTFS_SECURITY_WARNING; 176 i_size_write(ecryptfs_inode, 0);
166 ecryptfs_printk(KERN_WARNING, "Error attempting to fill zeros " 177 rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode);
167 "in file; rc = [%d]\n", rc); 178 ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat.flags |=
168 goto out; 179 ECRYPTFS_NEW_FILE;
169 }
170 i_size_write(inode, 0);
171 rc = ecryptfs_write_inode_size_to_metadata(lower_file, lower_inode,
172 inode, ecryptfs_dentry,
173 ECRYPTFS_LOWER_I_MUTEX_NOT_HELD);
174 ecryptfs_inode_to_private(inode)->crypt_stat.flags |= ECRYPTFS_NEW_FILE;
175out:
176 return rc; 180 return rc;
177} 181}
178 182
@@ -186,51 +190,31 @@ out:
186 */ 190 */
187static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry) 191static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
188{ 192{
193 struct ecryptfs_crypt_stat *crypt_stat =
194 &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
189 int rc = 0; 195 int rc = 0;
190 int lower_flags;
191 struct ecryptfs_crypt_stat *crypt_stat;
192 struct dentry *lower_dentry;
193 struct file *lower_file;
194 struct inode *inode, *lower_inode;
195 struct vfsmount *lower_mnt;
196 196
197 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
198 ecryptfs_printk(KERN_DEBUG, "lower_dentry->d_name.name = [%s]\n",
199 lower_dentry->d_name.name);
200 inode = ecryptfs_dentry->d_inode;
201 crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
202 lower_flags = ((O_CREAT | O_TRUNC) & O_ACCMODE) | O_RDWR;
203 lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
204 /* Corresponding fput() at end of this function */
205 if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry, lower_mnt,
206 lower_flags))) {
207 ecryptfs_printk(KERN_ERR,
208 "Error opening dentry; rc = [%i]\n", rc);
209 goto out;
210 }
211 lower_inode = lower_dentry->d_inode;
212 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { 197 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
213 ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); 198 ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
214 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); 199 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
215 goto out_fput; 200 goto out;
216 } 201 }
217 crypt_stat->flags |= ECRYPTFS_NEW_FILE; 202 crypt_stat->flags |= ECRYPTFS_NEW_FILE;
218 ecryptfs_printk(KERN_DEBUG, "Initializing crypto context\n"); 203 ecryptfs_printk(KERN_DEBUG, "Initializing crypto context\n");
219 rc = ecryptfs_new_file_context(ecryptfs_dentry); 204 rc = ecryptfs_new_file_context(ecryptfs_dentry);
220 if (rc) { 205 if (rc) {
221 ecryptfs_printk(KERN_DEBUG, "Error creating new file " 206 ecryptfs_printk(KERN_ERR, "Error creating new file "
222 "context\n"); 207 "context; rc = [%d]\n", rc);
223 goto out_fput; 208 goto out;
224 } 209 }
225 rc = ecryptfs_write_metadata(ecryptfs_dentry, lower_file); 210 rc = ecryptfs_write_metadata(ecryptfs_dentry);
226 if (rc) { 211 if (rc) {
227 ecryptfs_printk(KERN_DEBUG, "Error writing headers\n"); 212 printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc);
228 goto out_fput; 213 goto out;
229 } 214 }
230 rc = grow_file(ecryptfs_dentry, lower_file, inode, lower_inode); 215 rc = grow_file(ecryptfs_dentry);
231out_fput: 216 if (rc)
232 if ((rc = ecryptfs_close_lower_file(lower_file))) 217 printk(KERN_ERR "Error growing file; rc = [%d]\n", rc);
233 printk(KERN_ERR "Error closing lower_file\n");
234out: 218out:
235 return rc; 219 return rc;
236} 220}
@@ -252,6 +236,8 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
252{ 236{
253 int rc; 237 int rc;
254 238
239 /* ecryptfs_do_create() calls ecryptfs_interpose(), which opens
240 * the crypt_stat->lower_file (persistent file) */
255 rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode, nd); 241 rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode, nd);
256 if (unlikely(rc)) { 242 if (unlikely(rc)) {
257 ecryptfs_printk(KERN_WARNING, "Failed to create file in" 243 ecryptfs_printk(KERN_WARNING, "Failed to create file in"
@@ -374,8 +360,8 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
374 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; 360 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
375 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) 361 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
376 ecryptfs_set_default_sizes(crypt_stat); 362 ecryptfs_set_default_sizes(crypt_stat);
377 rc = ecryptfs_read_and_validate_header_region(page_virt, lower_dentry, 363 rc = ecryptfs_read_and_validate_header_region(page_virt,
378 nd->mnt); 364 dentry->d_inode);
379 if (rc) { 365 if (rc) {
380 rc = ecryptfs_read_and_validate_xattr_region(page_virt, dentry); 366 rc = ecryptfs_read_and_validate_xattr_region(page_virt, dentry);
381 if (rc) { 367 if (rc) {
@@ -392,7 +378,8 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
392 dentry->d_sb)->mount_crypt_stat; 378 dentry->d_sb)->mount_crypt_stat;
393 if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) { 379 if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) {
394 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) 380 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
395 file_size = (crypt_stat->header_extent_size 381 file_size = ((crypt_stat->extent_size
382 * crypt_stat->num_header_extents_at_front)
396 + i_size_read(lower_dentry->d_inode)); 383 + i_size_read(lower_dentry->d_inode));
397 else 384 else
398 file_size = i_size_read(lower_dentry->d_inode); 385 file_size = i_size_read(lower_dentry->d_inode);
@@ -722,8 +709,8 @@ upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat,
722{ 709{
723 loff_t lower_size; 710 loff_t lower_size;
724 711
725 lower_size = ( crypt_stat->header_extent_size 712 lower_size = (crypt_stat->extent_size
726 * crypt_stat->num_header_extents_at_front ); 713 * crypt_stat->num_header_extents_at_front);
727 if (upper_size != 0) { 714 if (upper_size != 0) {
728 loff_t num_extents; 715 loff_t num_extents;
729 716
@@ -752,8 +739,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
752 int rc = 0; 739 int rc = 0;
753 struct inode *inode = dentry->d_inode; 740 struct inode *inode = dentry->d_inode;
754 struct dentry *lower_dentry; 741 struct dentry *lower_dentry;
755 struct vfsmount *lower_mnt; 742 struct file fake_ecryptfs_file;
756 struct file fake_ecryptfs_file, *lower_file = NULL;
757 struct ecryptfs_crypt_stat *crypt_stat; 743 struct ecryptfs_crypt_stat *crypt_stat;
758 loff_t i_size = i_size_read(inode); 744 loff_t i_size = i_size_read(inode);
759 loff_t lower_size_before_truncate; 745 loff_t lower_size_before_truncate;
@@ -776,62 +762,52 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
776 goto out; 762 goto out;
777 } 763 }
778 lower_dentry = ecryptfs_dentry_to_lower(dentry); 764 lower_dentry = ecryptfs_dentry_to_lower(dentry);
779 /* This dget & mntget is released through fput at out_fput: */ 765 ecryptfs_set_file_lower(
780 lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); 766 &fake_ecryptfs_file,
781 if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry, lower_mnt, 767 ecryptfs_inode_to_private(dentry->d_inode)->lower_file);
782 O_RDWR))) {
783 ecryptfs_printk(KERN_ERR,
784 "Error opening dentry; rc = [%i]\n", rc);
785 goto out_free;
786 }
787 ecryptfs_set_file_lower(&fake_ecryptfs_file, lower_file);
788 /* Switch on growing or shrinking file */ 768 /* Switch on growing or shrinking file */
789 if (new_length > i_size) { 769 if (new_length > i_size) {
790 rc = ecryptfs_fill_zeros(&fake_ecryptfs_file, new_length); 770 char zero[] = { 0x00 };
791 if (rc) { 771
792 ecryptfs_printk(KERN_ERR, 772 /* Write a single 0 at the last position of the file;
793 "Problem with fill_zeros\n"); 773 * this triggers code that will fill in 0's throughout
794 goto out_fput; 774 * the intermediate portion of the previous end of the
795 } 775 * file and the new and of the file */
796 i_size_write(inode, new_length); 776 rc = ecryptfs_write(&fake_ecryptfs_file, zero,
797 rc = ecryptfs_write_inode_size_to_metadata( 777 (new_length - 1), 1);
798 lower_file, lower_dentry->d_inode, inode, dentry,
799 ECRYPTFS_LOWER_I_MUTEX_NOT_HELD);
800 if (rc) {
801 printk(KERN_ERR "Problem with "
802 "ecryptfs_write_inode_size_to_metadata; "
803 "rc = [%d]\n", rc);
804 goto out_fput;
805 }
806 } else { /* new_length < i_size_read(inode) */ 778 } else { /* new_length < i_size_read(inode) */
807 pgoff_t index = 0; 779 /* We're chopping off all the pages down do the page
808 int end_pos_in_page = -1; 780 * in which new_length is located. Fill in the end of
781 * that page from (new_length & ~PAGE_CACHE_MASK) to
782 * PAGE_CACHE_SIZE with zeros. */
783 size_t num_zeros = (PAGE_CACHE_SIZE
784 - (new_length & ~PAGE_CACHE_MASK));
809 785
810 if (new_length != 0) { 786 if (num_zeros) {
811 index = ((new_length - 1) >> PAGE_CACHE_SHIFT); 787 char *zeros_virt;
812 end_pos_in_page = ((new_length - 1) & ~PAGE_CACHE_MASK); 788
813 } 789 zeros_virt = kzalloc(num_zeros, GFP_KERNEL);
814 if (end_pos_in_page != (PAGE_CACHE_SIZE - 1)) { 790 if (!zeros_virt) {
815 if ((rc = ecryptfs_write_zeros(&fake_ecryptfs_file, 791 rc = -ENOMEM;
816 index, 792 goto out_free;
817 (end_pos_in_page + 1), 793 }
818 ((PAGE_CACHE_SIZE - 1) 794 rc = ecryptfs_write(&fake_ecryptfs_file, zeros_virt,
819 - end_pos_in_page)))) { 795 new_length, num_zeros);
796 kfree(zeros_virt);
797 if (rc) {
820 printk(KERN_ERR "Error attempting to zero out " 798 printk(KERN_ERR "Error attempting to zero out "
821 "the remainder of the end page on " 799 "the remainder of the end page on "
822 "reducing truncate; rc = [%d]\n", rc); 800 "reducing truncate; rc = [%d]\n", rc);
823 goto out_fput; 801 goto out_free;
824 } 802 }
825 } 803 }
826 vmtruncate(inode, new_length); 804 vmtruncate(inode, new_length);
827 rc = ecryptfs_write_inode_size_to_metadata( 805 rc = ecryptfs_write_inode_size_to_metadata(inode);
828 lower_file, lower_dentry->d_inode, inode, dentry,
829 ECRYPTFS_LOWER_I_MUTEX_NOT_HELD);
830 if (rc) { 806 if (rc) {
831 printk(KERN_ERR "Problem with " 807 printk(KERN_ERR "Problem with "
832 "ecryptfs_write_inode_size_to_metadata; " 808 "ecryptfs_write_inode_size_to_metadata; "
833 "rc = [%d]\n", rc); 809 "rc = [%d]\n", rc);
834 goto out_fput; 810 goto out_free;
835 } 811 }
836 /* We are reducing the size of the ecryptfs file, and need to 812 /* We are reducing the size of the ecryptfs file, and need to
837 * know if we need to reduce the size of the lower file. */ 813 * know if we need to reduce the size of the lower file. */
@@ -843,13 +819,6 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
843 vmtruncate(lower_dentry->d_inode, 819 vmtruncate(lower_dentry->d_inode,
844 lower_size_after_truncate); 820 lower_size_after_truncate);
845 } 821 }
846 /* Update the access times */
847 lower_dentry->d_inode->i_mtime = lower_dentry->d_inode->i_ctime
848 = CURRENT_TIME;
849 mark_inode_dirty_sync(inode);
850out_fput:
851 if ((rc = ecryptfs_close_lower_file(lower_file)))
852 printk(KERN_ERR "Error closing lower_file\n");
853out_free: 822out_free:
854 if (ecryptfs_file_to_private(&fake_ecryptfs_file)) 823 if (ecryptfs_file_to_private(&fake_ecryptfs_file))
855 kmem_cache_free(ecryptfs_file_info_cache, 824 kmem_cache_free(ecryptfs_file_info_cache,
@@ -909,23 +878,12 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
909 else if (S_ISREG(dentry->d_inode->i_mode) 878 else if (S_ISREG(dentry->d_inode->i_mode)
910 && (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED) 879 && (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)
911 || !(crypt_stat->flags & ECRYPTFS_KEY_VALID))) { 880 || !(crypt_stat->flags & ECRYPTFS_KEY_VALID))) {
912 struct vfsmount *lower_mnt;
913 struct file *lower_file = NULL;
914 struct ecryptfs_mount_crypt_stat *mount_crypt_stat; 881 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
915 int lower_flags; 882
916
917 lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
918 lower_flags = O_RDONLY;
919 if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry,
920 lower_mnt, lower_flags))) {
921 printk(KERN_ERR
922 "Error opening lower file; rc = [%d]\n", rc);
923 mutex_unlock(&crypt_stat->cs_mutex);
924 goto out;
925 }
926 mount_crypt_stat = &ecryptfs_superblock_to_private( 883 mount_crypt_stat = &ecryptfs_superblock_to_private(
927 dentry->d_sb)->mount_crypt_stat; 884 dentry->d_sb)->mount_crypt_stat;
928 if ((rc = ecryptfs_read_metadata(dentry, lower_file))) { 885 rc = ecryptfs_read_metadata(dentry);
886 if (rc) {
929 if (!(mount_crypt_stat->flags 887 if (!(mount_crypt_stat->flags
930 & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) { 888 & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) {
931 rc = -EIO; 889 rc = -EIO;
@@ -935,16 +893,13 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
935 "enabled; returning -EIO\n"); 893 "enabled; returning -EIO\n");
936 894
937 mutex_unlock(&crypt_stat->cs_mutex); 895 mutex_unlock(&crypt_stat->cs_mutex);
938 fput(lower_file);
939 goto out; 896 goto out;
940 } 897 }
941 rc = 0; 898 rc = 0;
942 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); 899 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
943 mutex_unlock(&crypt_stat->cs_mutex); 900 mutex_unlock(&crypt_stat->cs_mutex);
944 fput(lower_file);
945 goto out; 901 goto out;
946 } 902 }
947 fput(lower_file);
948 } 903 }
949 mutex_unlock(&crypt_stat->cs_mutex); 904 mutex_unlock(&crypt_stat->cs_mutex);
950 if (ia->ia_valid & ATTR_SIZE) { 905 if (ia->ia_valid & ATTR_SIZE) {
@@ -986,13 +941,11 @@ out:
986} 941}
987 942
988ssize_t 943ssize_t
989ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value, 944ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name,
990 size_t size) 945 void *value, size_t size)
991{ 946{
992 int rc = 0; 947 int rc = 0;
993 struct dentry *lower_dentry;
994 948
995 lower_dentry = ecryptfs_dentry_to_lower(dentry);
996 if (!lower_dentry->d_inode->i_op->getxattr) { 949 if (!lower_dentry->d_inode->i_op->getxattr) {
997 rc = -ENOSYS; 950 rc = -ENOSYS;
998 goto out; 951 goto out;
@@ -1005,6 +958,14 @@ out:
1005 return rc; 958 return rc;
1006} 959}
1007 960
961ssize_t
962ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value,
963 size_t size)
964{
965 return ecryptfs_getxattr_lower(ecryptfs_dentry_to_lower(dentry), name,
966 value, size);
967}
968
1008static ssize_t 969static ssize_t
1009ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size) 970ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size)
1010{ 971{
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index b550dea8eee6..89d9710dd63d 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -39,7 +39,7 @@
39 * determine the type of error, make appropriate log entries, and 39 * determine the type of error, make appropriate log entries, and
40 * return an error code. 40 * return an error code.
41 */ 41 */
42int process_request_key_err(long err_code) 42static int process_request_key_err(long err_code)
43{ 43{
44 int rc = 0; 44 int rc = 0;
45 45
@@ -71,7 +71,7 @@ int process_request_key_err(long err_code)
71 * address; zero on error 71 * address; zero on error
72 * @length_size: The number of bytes occupied by the encoded length 72 * @length_size: The number of bytes occupied by the encoded length
73 * 73 *
74 * Returns Zero on success 74 * Returns zero on success; non-zero on error
75 */ 75 */
76static int parse_packet_length(unsigned char *data, size_t *size, 76static int parse_packet_length(unsigned char *data, size_t *size,
77 size_t *length_size) 77 size_t *length_size)
@@ -106,11 +106,11 @@ out:
106 106
107/** 107/**
108 * write_packet_length 108 * write_packet_length
109 * @dest: The byte array target into which to write the 109 * @dest: The byte array target into which to write the length. Must
110 * length. Must have at least 5 bytes allocated. 110 * have at least 5 bytes allocated.
111 * @size: The length to write. 111 * @size: The length to write.
112 * @packet_size_length: The number of bytes used to encode the 112 * @packet_size_length: The number of bytes used to encode the packet
113 * packet length is written to this address. 113 * length is written to this address.
114 * 114 *
115 * Returns zero on success; non-zero on error. 115 * Returns zero on success; non-zero on error.
116 */ 116 */
@@ -396,26 +396,53 @@ out:
396 return rc; 396 return rc;
397} 397}
398 398
399static int
400ecryptfs_get_auth_tok_sig(char **sig, struct ecryptfs_auth_tok *auth_tok)
401{
402 int rc = 0;
403
404 (*sig) = NULL;
405 switch (auth_tok->token_type) {
406 case ECRYPTFS_PASSWORD:
407 (*sig) = auth_tok->token.password.signature;
408 break;
409 case ECRYPTFS_PRIVATE_KEY:
410 (*sig) = auth_tok->token.private_key.signature;
411 break;
412 default:
413 printk(KERN_ERR "Cannot get sig for auth_tok of type [%d]\n",
414 auth_tok->token_type);
415 rc = -EINVAL;
416 }
417 return rc;
418}
419
399/** 420/**
400 * decrypt_pki_encrypted_session_key - Decrypt the session key with 421 * decrypt_pki_encrypted_session_key - Decrypt the session key with the given auth_tok.
401 * the given auth_tok. 422 * @auth_tok: The key authentication token used to decrypt the session key
423 * @crypt_stat: The cryptographic context
402 * 424 *
403 * Returns Zero on success; non-zero error otherwise. 425 * Returns zero on success; non-zero error otherwise.
404 */ 426 */
405static int decrypt_pki_encrypted_session_key( 427static int
406 struct ecryptfs_mount_crypt_stat *mount_crypt_stat, 428decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
407 struct ecryptfs_auth_tok *auth_tok, 429 struct ecryptfs_crypt_stat *crypt_stat)
408 struct ecryptfs_crypt_stat *crypt_stat)
409{ 430{
410 u16 cipher_code = 0; 431 u16 cipher_code = 0;
411 struct ecryptfs_msg_ctx *msg_ctx; 432 struct ecryptfs_msg_ctx *msg_ctx;
412 struct ecryptfs_message *msg = NULL; 433 struct ecryptfs_message *msg = NULL;
434 char *auth_tok_sig;
413 char *netlink_message; 435 char *netlink_message;
414 size_t netlink_message_length; 436 size_t netlink_message_length;
415 int rc; 437 int rc;
416 438
417 rc = write_tag_64_packet(mount_crypt_stat->global_auth_tok_sig, 439 rc = ecryptfs_get_auth_tok_sig(&auth_tok_sig, auth_tok);
418 &(auth_tok->session_key), 440 if (rc) {
441 printk(KERN_ERR "Unrecognized auth tok type: [%d]\n",
442 auth_tok->token_type);
443 goto out;
444 }
445 rc = write_tag_64_packet(auth_tok_sig, &(auth_tok->session_key),
419 &netlink_message, &netlink_message_length); 446 &netlink_message, &netlink_message_length);
420 if (rc) { 447 if (rc) {
421 ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet"); 448 ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet");
@@ -465,40 +492,33 @@ out:
465 492
466static void wipe_auth_tok_list(struct list_head *auth_tok_list_head) 493static void wipe_auth_tok_list(struct list_head *auth_tok_list_head)
467{ 494{
468 struct list_head *walker;
469 struct ecryptfs_auth_tok_list_item *auth_tok_list_item; 495 struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
496 struct ecryptfs_auth_tok_list_item *auth_tok_list_item_tmp;
470 497
471 walker = auth_tok_list_head->next; 498 list_for_each_entry_safe(auth_tok_list_item, auth_tok_list_item_tmp,
472 while (walker != auth_tok_list_head) { 499 auth_tok_list_head, list) {
473 auth_tok_list_item = 500 list_del(&auth_tok_list_item->list);
474 list_entry(walker, struct ecryptfs_auth_tok_list_item,
475 list);
476 walker = auth_tok_list_item->list.next;
477 memset(auth_tok_list_item, 0,
478 sizeof(struct ecryptfs_auth_tok_list_item));
479 kmem_cache_free(ecryptfs_auth_tok_list_item_cache, 501 kmem_cache_free(ecryptfs_auth_tok_list_item_cache,
480 auth_tok_list_item); 502 auth_tok_list_item);
481 } 503 }
482 auth_tok_list_head->next = NULL;
483} 504}
484 505
485struct kmem_cache *ecryptfs_auth_tok_list_item_cache; 506struct kmem_cache *ecryptfs_auth_tok_list_item_cache;
486 507
487
488/** 508/**
489 * parse_tag_1_packet 509 * parse_tag_1_packet
490 * @crypt_stat: The cryptographic context to modify based on packet 510 * @crypt_stat: The cryptographic context to modify based on packet contents
491 * contents.
492 * @data: The raw bytes of the packet. 511 * @data: The raw bytes of the packet.
493 * @auth_tok_list: eCryptfs parses packets into authentication tokens; 512 * @auth_tok_list: eCryptfs parses packets into authentication tokens;
494 * a new authentication token will be placed at the end 513 * a new authentication token will be placed at the
495 * of this list for this packet. 514 * end of this list for this packet.
496 * @new_auth_tok: Pointer to a pointer to memory that this function 515 * @new_auth_tok: Pointer to a pointer to memory that this function
497 * allocates; sets the memory address of the pointer to 516 * allocates; sets the memory address of the pointer to
498 * NULL on error. This object is added to the 517 * NULL on error. This object is added to the
499 * auth_tok_list. 518 * auth_tok_list.
500 * @packet_size: This function writes the size of the parsed packet 519 * @packet_size: This function writes the size of the parsed packet
501 * into this memory location; zero on error. 520 * into this memory location; zero on error.
521 * @max_packet_size: The maximum allowable packet size
502 * 522 *
503 * Returns zero on success; non-zero on error. 523 * Returns zero on success; non-zero on error.
504 */ 524 */
@@ -515,72 +535,65 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat,
515 535
516 (*packet_size) = 0; 536 (*packet_size) = 0;
517 (*new_auth_tok) = NULL; 537 (*new_auth_tok) = NULL;
518 538 /**
519 /* we check that: 539 * This format is inspired by OpenPGP; see RFC 2440
520 * one byte for the Tag 1 ID flag 540 * packet tag 1
521 * two bytes for the body size 541 *
522 * do not exceed the maximum_packet_size 542 * Tag 1 identifier (1 byte)
543 * Max Tag 1 packet size (max 3 bytes)
544 * Version (1 byte)
545 * Key identifier (8 bytes; ECRYPTFS_SIG_SIZE)
546 * Cipher identifier (1 byte)
547 * Encrypted key size (arbitrary)
548 *
549 * 12 bytes minimum packet size
523 */ 550 */
524 if (unlikely((*packet_size) + 3 > max_packet_size)) { 551 if (unlikely(max_packet_size < 12)) {
525 ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); 552 printk(KERN_ERR "Invalid max packet size; must be >=12\n");
526 rc = -EINVAL; 553 rc = -EINVAL;
527 goto out; 554 goto out;
528 } 555 }
529 /* check for Tag 1 identifier - one byte */
530 if (data[(*packet_size)++] != ECRYPTFS_TAG_1_PACKET_TYPE) { 556 if (data[(*packet_size)++] != ECRYPTFS_TAG_1_PACKET_TYPE) {
531 ecryptfs_printk(KERN_ERR, "Enter w/ first byte != 0x%.2x\n", 557 printk(KERN_ERR "Enter w/ first byte != 0x%.2x\n",
532 ECRYPTFS_TAG_1_PACKET_TYPE); 558 ECRYPTFS_TAG_1_PACKET_TYPE);
533 rc = -EINVAL; 559 rc = -EINVAL;
534 goto out; 560 goto out;
535 } 561 }
536 /* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or 562 /* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or
537 * at end of function upon failure */ 563 * at end of function upon failure */
538 auth_tok_list_item = 564 auth_tok_list_item =
539 kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache, 565 kmem_cache_zalloc(ecryptfs_auth_tok_list_item_cache,
540 GFP_KERNEL); 566 GFP_KERNEL);
541 if (!auth_tok_list_item) { 567 if (!auth_tok_list_item) {
542 ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n"); 568 printk(KERN_ERR "Unable to allocate memory\n");
543 rc = -ENOMEM; 569 rc = -ENOMEM;
544 goto out; 570 goto out;
545 } 571 }
546 memset(auth_tok_list_item, 0,
547 sizeof(struct ecryptfs_auth_tok_list_item));
548 (*new_auth_tok) = &auth_tok_list_item->auth_tok; 572 (*new_auth_tok) = &auth_tok_list_item->auth_tok;
549 /* check for body size - one to two bytes
550 *
551 * ***** TAG 1 Packet Format *****
552 * | version number | 1 byte |
553 * | key ID | 8 bytes |
554 * | public key algorithm | 1 byte |
555 * | encrypted session key | arbitrary |
556 */
557 rc = parse_packet_length(&data[(*packet_size)], &body_size, 573 rc = parse_packet_length(&data[(*packet_size)], &body_size,
558 &length_size); 574 &length_size);
559 if (rc) { 575 if (rc) {
560 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; " 576 printk(KERN_WARNING "Error parsing packet length; "
561 "rc = [%d]\n", rc); 577 "rc = [%d]\n", rc);
562 goto out_free; 578 goto out_free;
563 } 579 }
564 if (unlikely(body_size < (0x02 + ECRYPTFS_SIG_SIZE))) { 580 if (unlikely(body_size < (ECRYPTFS_SIG_SIZE + 2))) {
565 ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n", 581 printk(KERN_WARNING "Invalid body size ([%td])\n", body_size);
566 body_size);
567 rc = -EINVAL; 582 rc = -EINVAL;
568 goto out_free; 583 goto out_free;
569 } 584 }
570 (*packet_size) += length_size; 585 (*packet_size) += length_size;
571 if (unlikely((*packet_size) + body_size > max_packet_size)) { 586 if (unlikely((*packet_size) + body_size > max_packet_size)) {
572 ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); 587 printk(KERN_WARNING "Packet size exceeds max\n");
573 rc = -EINVAL; 588 rc = -EINVAL;
574 goto out_free; 589 goto out_free;
575 } 590 }
576 /* Version 3 (from RFC2440) - one byte */
577 if (unlikely(data[(*packet_size)++] != 0x03)) { 591 if (unlikely(data[(*packet_size)++] != 0x03)) {
578 ecryptfs_printk(KERN_DEBUG, "Unknown version number " 592 printk(KERN_WARNING "Unknown version number [%d]\n",
579 "[%d]\n", data[(*packet_size) - 1]); 593 data[(*packet_size) - 1]);
580 rc = -EINVAL; 594 rc = -EINVAL;
581 goto out_free; 595 goto out_free;
582 } 596 }
583 /* Read Signature */
584 ecryptfs_to_hex((*new_auth_tok)->token.private_key.signature, 597 ecryptfs_to_hex((*new_auth_tok)->token.private_key.signature,
585 &data[(*packet_size)], ECRYPTFS_SIG_SIZE); 598 &data[(*packet_size)], ECRYPTFS_SIG_SIZE);
586 *packet_size += ECRYPTFS_SIG_SIZE; 599 *packet_size += ECRYPTFS_SIG_SIZE;
@@ -588,27 +601,23 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat,
588 * know which public key encryption algorithm was used */ 601 * know which public key encryption algorithm was used */
589 (*packet_size)++; 602 (*packet_size)++;
590 (*new_auth_tok)->session_key.encrypted_key_size = 603 (*new_auth_tok)->session_key.encrypted_key_size =
591 body_size - (0x02 + ECRYPTFS_SIG_SIZE); 604 body_size - (ECRYPTFS_SIG_SIZE + 2);
592 if ((*new_auth_tok)->session_key.encrypted_key_size 605 if ((*new_auth_tok)->session_key.encrypted_key_size
593 > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) { 606 > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) {
594 ecryptfs_printk(KERN_ERR, "Tag 1 packet contains key larger " 607 printk(KERN_WARNING "Tag 1 packet contains key larger "
595 "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES"); 608 "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES");
596 rc = -EINVAL; 609 rc = -EINVAL;
597 goto out; 610 goto out;
598 } 611 }
599 ecryptfs_printk(KERN_DEBUG, "Encrypted key size = [%d]\n",
600 (*new_auth_tok)->session_key.encrypted_key_size);
601 memcpy((*new_auth_tok)->session_key.encrypted_key, 612 memcpy((*new_auth_tok)->session_key.encrypted_key,
602 &data[(*packet_size)], (body_size - 0x02 - ECRYPTFS_SIG_SIZE)); 613 &data[(*packet_size)], (body_size - (ECRYPTFS_SIG_SIZE + 2)));
603 (*packet_size) += (*new_auth_tok)->session_key.encrypted_key_size; 614 (*packet_size) += (*new_auth_tok)->session_key.encrypted_key_size;
604 (*new_auth_tok)->session_key.flags &= 615 (*new_auth_tok)->session_key.flags &=
605 ~ECRYPTFS_CONTAINS_DECRYPTED_KEY; 616 ~ECRYPTFS_CONTAINS_DECRYPTED_KEY;
606 (*new_auth_tok)->session_key.flags |= 617 (*new_auth_tok)->session_key.flags |=
607 ECRYPTFS_CONTAINS_ENCRYPTED_KEY; 618 ECRYPTFS_CONTAINS_ENCRYPTED_KEY;
608 (*new_auth_tok)->token_type = ECRYPTFS_PRIVATE_KEY; 619 (*new_auth_tok)->token_type = ECRYPTFS_PRIVATE_KEY;
609 (*new_auth_tok)->flags |= ECRYPTFS_PRIVATE_KEY; 620 (*new_auth_tok)->flags = 0;
610 /* TODO: Why are we setting this flag here? Don't we want the
611 * userspace to decrypt the session key? */
612 (*new_auth_tok)->session_key.flags &= 621 (*new_auth_tok)->session_key.flags &=
613 ~(ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT); 622 ~(ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT);
614 (*new_auth_tok)->session_key.flags &= 623 (*new_auth_tok)->session_key.flags &=
@@ -658,22 +667,30 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
658 667
659 (*packet_size) = 0; 668 (*packet_size) = 0;
660 (*new_auth_tok) = NULL; 669 (*new_auth_tok) = NULL;
661 670 /**
662 /* we check that: 671 *This format is inspired by OpenPGP; see RFC 2440
663 * one byte for the Tag 3 ID flag 672 * packet tag 3
664 * two bytes for the body size 673 *
665 * do not exceed the maximum_packet_size 674 * Tag 3 identifier (1 byte)
675 * Max Tag 3 packet size (max 3 bytes)
676 * Version (1 byte)
677 * Cipher code (1 byte)
678 * S2K specifier (1 byte)
679 * Hash identifier (1 byte)
680 * Salt (ECRYPTFS_SALT_SIZE)
681 * Hash iterations (1 byte)
682 * Encrypted key (arbitrary)
683 *
684 * (ECRYPTFS_SALT_SIZE + 7) minimum packet size
666 */ 685 */
667 if (unlikely((*packet_size) + 3 > max_packet_size)) { 686 if (max_packet_size < (ECRYPTFS_SALT_SIZE + 7)) {
668 ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); 687 printk(KERN_ERR "Max packet size too large\n");
669 rc = -EINVAL; 688 rc = -EINVAL;
670 goto out; 689 goto out;
671 } 690 }
672
673 /* check for Tag 3 identifyer - one byte */
674 if (data[(*packet_size)++] != ECRYPTFS_TAG_3_PACKET_TYPE) { 691 if (data[(*packet_size)++] != ECRYPTFS_TAG_3_PACKET_TYPE) {
675 ecryptfs_printk(KERN_ERR, "Enter w/ first byte != 0x%.2x\n", 692 printk(KERN_ERR "First byte != 0x%.2x; invalid packet\n",
676 ECRYPTFS_TAG_3_PACKET_TYPE); 693 ECRYPTFS_TAG_3_PACKET_TYPE);
677 rc = -EINVAL; 694 rc = -EINVAL;
678 goto out; 695 goto out;
679 } 696 }
@@ -682,56 +699,37 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
682 auth_tok_list_item = 699 auth_tok_list_item =
683 kmem_cache_zalloc(ecryptfs_auth_tok_list_item_cache, GFP_KERNEL); 700 kmem_cache_zalloc(ecryptfs_auth_tok_list_item_cache, GFP_KERNEL);
684 if (!auth_tok_list_item) { 701 if (!auth_tok_list_item) {
685 ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n"); 702 printk(KERN_ERR "Unable to allocate memory\n");
686 rc = -ENOMEM; 703 rc = -ENOMEM;
687 goto out; 704 goto out;
688 } 705 }
689 (*new_auth_tok) = &auth_tok_list_item->auth_tok; 706 (*new_auth_tok) = &auth_tok_list_item->auth_tok;
690
691 /* check for body size - one to two bytes */
692 rc = parse_packet_length(&data[(*packet_size)], &body_size, 707 rc = parse_packet_length(&data[(*packet_size)], &body_size,
693 &length_size); 708 &length_size);
694 if (rc) { 709 if (rc) {
695 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; " 710 printk(KERN_WARNING "Error parsing packet length; rc = [%d]\n",
696 "rc = [%d]\n", rc); 711 rc);
697 goto out_free; 712 goto out_free;
698 } 713 }
699 if (unlikely(body_size < (0x05 + ECRYPTFS_SALT_SIZE))) { 714 if (unlikely(body_size < (ECRYPTFS_SALT_SIZE + 5))) {
700 ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n", 715 printk(KERN_WARNING "Invalid body size ([%td])\n", body_size);
701 body_size);
702 rc = -EINVAL; 716 rc = -EINVAL;
703 goto out_free; 717 goto out_free;
704 } 718 }
705 (*packet_size) += length_size; 719 (*packet_size) += length_size;
706
707 /* now we know the length of the remainting Tag 3 packet size:
708 * 5 fix bytes for: version string, cipher, S2K ID, hash algo,
709 * number of hash iterations
710 * ECRYPTFS_SALT_SIZE bytes for salt
711 * body_size bytes minus the stuff above is the encrypted key size
712 */
713 if (unlikely((*packet_size) + body_size > max_packet_size)) { 720 if (unlikely((*packet_size) + body_size > max_packet_size)) {
714 ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); 721 printk(KERN_ERR "Packet size exceeds max\n");
715 rc = -EINVAL; 722 rc = -EINVAL;
716 goto out_free; 723 goto out_free;
717 } 724 }
718
719 /* There are 5 characters of additional information in the
720 * packet */
721 (*new_auth_tok)->session_key.encrypted_key_size = 725 (*new_auth_tok)->session_key.encrypted_key_size =
722 body_size - (0x05 + ECRYPTFS_SALT_SIZE); 726 (body_size - (ECRYPTFS_SALT_SIZE + 5));
723 ecryptfs_printk(KERN_DEBUG, "Encrypted key size = [%d]\n",
724 (*new_auth_tok)->session_key.encrypted_key_size);
725
726 /* Version 4 (from RFC2440) - one byte */
727 if (unlikely(data[(*packet_size)++] != 0x04)) { 727 if (unlikely(data[(*packet_size)++] != 0x04)) {
728 ecryptfs_printk(KERN_DEBUG, "Unknown version number " 728 printk(KERN_WARNING "Unknown version number [%d]\n",
729 "[%d]\n", data[(*packet_size) - 1]); 729 data[(*packet_size) - 1]);
730 rc = -EINVAL; 730 rc = -EINVAL;
731 goto out_free; 731 goto out_free;
732 } 732 }
733
734 /* cipher - one byte */
735 ecryptfs_cipher_code_to_string(crypt_stat->cipher, 733 ecryptfs_cipher_code_to_string(crypt_stat->cipher,
736 (u16)data[(*packet_size)]); 734 (u16)data[(*packet_size)]);
737 /* A little extra work to differentiate among the AES key 735 /* A little extra work to differentiate among the AES key
@@ -745,33 +743,26 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
745 (*new_auth_tok)->session_key.encrypted_key_size; 743 (*new_auth_tok)->session_key.encrypted_key_size;
746 } 744 }
747 ecryptfs_init_crypt_ctx(crypt_stat); 745 ecryptfs_init_crypt_ctx(crypt_stat);
748 /* S2K identifier 3 (from RFC2440) */
749 if (unlikely(data[(*packet_size)++] != 0x03)) { 746 if (unlikely(data[(*packet_size)++] != 0x03)) {
750 ecryptfs_printk(KERN_ERR, "Only S2K ID 3 is currently " 747 printk(KERN_WARNING "Only S2K ID 3 is currently supported\n");
751 "supported\n");
752 rc = -ENOSYS; 748 rc = -ENOSYS;
753 goto out_free; 749 goto out_free;
754 } 750 }
755
756 /* TODO: finish the hash mapping */ 751 /* TODO: finish the hash mapping */
757 /* hash algorithm - one byte */
758 switch (data[(*packet_size)++]) { 752 switch (data[(*packet_size)++]) {
759 case 0x01: /* See RFC2440 for these numbers and their mappings */ 753 case 0x01: /* See RFC2440 for these numbers and their mappings */
760 /* Choose MD5 */ 754 /* Choose MD5 */
761 /* salt - ECRYPTFS_SALT_SIZE bytes */
762 memcpy((*new_auth_tok)->token.password.salt, 755 memcpy((*new_auth_tok)->token.password.salt,
763 &data[(*packet_size)], ECRYPTFS_SALT_SIZE); 756 &data[(*packet_size)], ECRYPTFS_SALT_SIZE);
764 (*packet_size) += ECRYPTFS_SALT_SIZE; 757 (*packet_size) += ECRYPTFS_SALT_SIZE;
765
766 /* This conversion was taken straight from RFC2440 */ 758 /* This conversion was taken straight from RFC2440 */
767 /* number of hash iterations - one byte */
768 (*new_auth_tok)->token.password.hash_iterations = 759 (*new_auth_tok)->token.password.hash_iterations =
769 ((u32) 16 + (data[(*packet_size)] & 15)) 760 ((u32) 16 + (data[(*packet_size)] & 15))
770 << ((data[(*packet_size)] >> 4) + 6); 761 << ((data[(*packet_size)] >> 4) + 6);
771 (*packet_size)++; 762 (*packet_size)++;
772 763 /* Friendly reminder:
773 /* encrypted session key - 764 * (*new_auth_tok)->session_key.encrypted_key_size =
774 * (body_size-5-ECRYPTFS_SALT_SIZE) bytes */ 765 * (body_size - (ECRYPTFS_SALT_SIZE + 5)); */
775 memcpy((*new_auth_tok)->session_key.encrypted_key, 766 memcpy((*new_auth_tok)->session_key.encrypted_key,
776 &data[(*packet_size)], 767 &data[(*packet_size)],
777 (*new_auth_tok)->session_key.encrypted_key_size); 768 (*new_auth_tok)->session_key.encrypted_key_size);
@@ -781,7 +772,7 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
781 ~ECRYPTFS_CONTAINS_DECRYPTED_KEY; 772 ~ECRYPTFS_CONTAINS_DECRYPTED_KEY;
782 (*new_auth_tok)->session_key.flags |= 773 (*new_auth_tok)->session_key.flags |=
783 ECRYPTFS_CONTAINS_ENCRYPTED_KEY; 774 ECRYPTFS_CONTAINS_ENCRYPTED_KEY;
784 (*new_auth_tok)->token.password.hash_algo = 0x01; 775 (*new_auth_tok)->token.password.hash_algo = 0x01; /* MD5 */
785 break; 776 break;
786 default: 777 default:
787 ecryptfs_printk(KERN_ERR, "Unsupported hash algorithm: " 778 ecryptfs_printk(KERN_ERR, "Unsupported hash algorithm: "
@@ -837,82 +828,61 @@ parse_tag_11_packet(unsigned char *data, unsigned char *contents,
837 828
838 (*packet_size) = 0; 829 (*packet_size) = 0;
839 (*tag_11_contents_size) = 0; 830 (*tag_11_contents_size) = 0;
840 831 /* This format is inspired by OpenPGP; see RFC 2440
841 /* check that: 832 * packet tag 11
842 * one byte for the Tag 11 ID flag 833 *
843 * two bytes for the Tag 11 length 834 * Tag 11 identifier (1 byte)
844 * do not exceed the maximum_packet_size 835 * Max Tag 11 packet size (max 3 bytes)
836 * Binary format specifier (1 byte)
837 * Filename length (1 byte)
838 * Filename ("_CONSOLE") (8 bytes)
839 * Modification date (4 bytes)
840 * Literal data (arbitrary)
841 *
842 * We need at least 16 bytes of data for the packet to even be
843 * valid.
845 */ 844 */
846 if (unlikely((*packet_size) + 3 > max_packet_size)) { 845 if (max_packet_size < 16) {
847 ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); 846 printk(KERN_ERR "Maximum packet size too small\n");
848 rc = -EINVAL; 847 rc = -EINVAL;
849 goto out; 848 goto out;
850 } 849 }
851
852 /* check for Tag 11 identifyer - one byte */
853 if (data[(*packet_size)++] != ECRYPTFS_TAG_11_PACKET_TYPE) { 850 if (data[(*packet_size)++] != ECRYPTFS_TAG_11_PACKET_TYPE) {
854 ecryptfs_printk(KERN_WARNING, 851 printk(KERN_WARNING "Invalid tag 11 packet format\n");
855 "Invalid tag 11 packet format\n");
856 rc = -EINVAL; 852 rc = -EINVAL;
857 goto out; 853 goto out;
858 } 854 }
859
860 /* get Tag 11 content length - one or two bytes */
861 rc = parse_packet_length(&data[(*packet_size)], &body_size, 855 rc = parse_packet_length(&data[(*packet_size)], &body_size,
862 &length_size); 856 &length_size);
863 if (rc) { 857 if (rc) {
864 ecryptfs_printk(KERN_WARNING, 858 printk(KERN_WARNING "Invalid tag 11 packet format\n");
865 "Invalid tag 11 packet format\n");
866 goto out; 859 goto out;
867 } 860 }
868 (*packet_size) += length_size; 861 if (body_size < 14) {
869 862 printk(KERN_WARNING "Invalid body size ([%td])\n", body_size);
870 if (body_size < 13) {
871 ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n",
872 body_size);
873 rc = -EINVAL; 863 rc = -EINVAL;
874 goto out; 864 goto out;
875 } 865 }
876 /* We have 13 bytes of surrounding packet values */ 866 (*packet_size) += length_size;
877 (*tag_11_contents_size) = (body_size - 13); 867 (*tag_11_contents_size) = (body_size - 14);
878
879 /* now we know the length of the remainting Tag 11 packet size:
880 * 14 fix bytes for: special flag one, special flag two,
881 * 12 skipped bytes
882 * body_size bytes minus the stuff above is the Tag 11 content
883 */
884 /* FIXME why is the body size one byte smaller than the actual
885 * size of the body?
886 * this seems to be an error here as well as in
887 * write_tag_11_packet() */
888 if (unlikely((*packet_size) + body_size + 1 > max_packet_size)) { 868 if (unlikely((*packet_size) + body_size + 1 > max_packet_size)) {
889 ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); 869 printk(KERN_ERR "Packet size exceeds max\n");
890 rc = -EINVAL; 870 rc = -EINVAL;
891 goto out; 871 goto out;
892 } 872 }
893
894 /* special flag one - one byte */
895 if (data[(*packet_size)++] != 0x62) { 873 if (data[(*packet_size)++] != 0x62) {
896 ecryptfs_printk(KERN_WARNING, "Unrecognizable packet\n"); 874 printk(KERN_WARNING "Unrecognizable packet\n");
897 rc = -EINVAL; 875 rc = -EINVAL;
898 goto out; 876 goto out;
899 } 877 }
900
901 /* special flag two - one byte */
902 if (data[(*packet_size)++] != 0x08) { 878 if (data[(*packet_size)++] != 0x08) {
903 ecryptfs_printk(KERN_WARNING, "Unrecognizable packet\n"); 879 printk(KERN_WARNING "Unrecognizable packet\n");
904 rc = -EINVAL; 880 rc = -EINVAL;
905 goto out; 881 goto out;
906 } 882 }
907 883 (*packet_size) += 12; /* Ignore filename and modification date */
908 /* skip the next 12 bytes */
909 (*packet_size) += 12; /* We don't care about the filename or
910 * the timestamp */
911
912 /* get the Tag 11 contents - tag_11_contents_size bytes */
913 memcpy(contents, &data[(*packet_size)], (*tag_11_contents_size)); 884 memcpy(contents, &data[(*packet_size)], (*tag_11_contents_size));
914 (*packet_size) += (*tag_11_contents_size); 885 (*packet_size) += (*tag_11_contents_size);
915
916out: 886out:
917 if (rc) { 887 if (rc) {
918 (*packet_size) = 0; 888 (*packet_size) = 0;
@@ -921,130 +891,229 @@ out:
921 return rc; 891 return rc;
922} 892}
923 893
894static int
895ecryptfs_find_global_auth_tok_for_sig(
896 struct ecryptfs_global_auth_tok **global_auth_tok,
897 struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *sig)
898{
899 struct ecryptfs_global_auth_tok *walker;
900 int rc = 0;
901
902 (*global_auth_tok) = NULL;
903 mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
904 list_for_each_entry(walker,
905 &mount_crypt_stat->global_auth_tok_list,
906 mount_crypt_stat_list) {
907 if (memcmp(walker->sig, sig, ECRYPTFS_SIG_SIZE_HEX) == 0) {
908 (*global_auth_tok) = walker;
909 goto out;
910 }
911 }
912 rc = -EINVAL;
913out:
914 mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
915 return rc;
916}
917
924/** 918/**
925 * decrypt_session_key - Decrypt the session key with the given auth_tok. 919 * ecryptfs_verify_version
920 * @version: The version number to confirm
926 * 921 *
927 * Returns Zero on success; non-zero error otherwise. 922 * Returns zero on good version; non-zero otherwise
928 */ 923 */
929static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok, 924static int ecryptfs_verify_version(u16 version)
930 struct ecryptfs_crypt_stat *crypt_stat)
931{ 925{
932 struct ecryptfs_password *password_s_ptr; 926 int rc = 0;
933 struct scatterlist src_sg[2], dst_sg[2]; 927 unsigned char major;
934 struct mutex *tfm_mutex = NULL; 928 unsigned char minor;
935 char *encrypted_session_key; 929
936 char *session_key; 930 major = ((version >> 8) & 0xFF);
931 minor = (version & 0xFF);
932 if (major != ECRYPTFS_VERSION_MAJOR) {
933 ecryptfs_printk(KERN_ERR, "Major version number mismatch. "
934 "Expected [%d]; got [%d]\n",
935 ECRYPTFS_VERSION_MAJOR, major);
936 rc = -EINVAL;
937 goto out;
938 }
939 if (minor != ECRYPTFS_VERSION_MINOR) {
940 ecryptfs_printk(KERN_ERR, "Minor version number mismatch. "
941 "Expected [%d]; got [%d]\n",
942 ECRYPTFS_VERSION_MINOR, minor);
943 rc = -EINVAL;
944 goto out;
945 }
946out:
947 return rc;
948}
949
950int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
951 struct ecryptfs_auth_tok **auth_tok,
952 char *sig)
953{
954 int rc = 0;
955
956 (*auth_tok_key) = request_key(&key_type_user, sig, NULL);
957 if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) {
958 printk(KERN_ERR "Could not find key with description: [%s]\n",
959 sig);
960 process_request_key_err(PTR_ERR(*auth_tok_key));
961 rc = -EINVAL;
962 goto out;
963 }
964 (*auth_tok) = ecryptfs_get_key_payload_data(*auth_tok_key);
965 if (ecryptfs_verify_version((*auth_tok)->version)) {
966 printk(KERN_ERR
967 "Data structure version mismatch. "
968 "Userspace tools must match eCryptfs "
969 "kernel module with major version [%d] "
970 "and minor version [%d]\n",
971 ECRYPTFS_VERSION_MAJOR,
972 ECRYPTFS_VERSION_MINOR);
973 rc = -EINVAL;
974 goto out;
975 }
976 if ((*auth_tok)->token_type != ECRYPTFS_PASSWORD
977 && (*auth_tok)->token_type != ECRYPTFS_PRIVATE_KEY) {
978 printk(KERN_ERR "Invalid auth_tok structure "
979 "returned from key query\n");
980 rc = -EINVAL;
981 goto out;
982 }
983out:
984 return rc;
985}
986
987/**
988 * ecryptfs_find_auth_tok_for_sig
989 * @auth_tok: Set to the matching auth_tok; NULL if not found
990 * @crypt_stat: inode crypt_stat crypto context
991 * @sig: Sig of auth_tok to find
992 *
993 * For now, this function simply looks at the registered auth_tok's
994 * linked off the mount_crypt_stat, so all the auth_toks that can be
995 * used must be registered at mount time. This function could
996 * potentially try a lot harder to find auth_tok's (e.g., by calling
997 * out to ecryptfsd to dynamically retrieve an auth_tok object) so
998 * that static registration of auth_tok's will no longer be necessary.
999 *
1000 * Returns zero on no error; non-zero on error
1001 */
1002static int
1003ecryptfs_find_auth_tok_for_sig(
1004 struct ecryptfs_auth_tok **auth_tok,
1005 struct ecryptfs_crypt_stat *crypt_stat, char *sig)
1006{
1007 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
1008 crypt_stat->mount_crypt_stat;
1009 struct ecryptfs_global_auth_tok *global_auth_tok;
1010 int rc = 0;
1011
1012 (*auth_tok) = NULL;
1013 if (ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok,
1014 mount_crypt_stat, sig)) {
1015 struct key *auth_tok_key;
1016
1017 rc = ecryptfs_keyring_auth_tok_for_sig(&auth_tok_key, auth_tok,
1018 sig);
1019 } else
1020 (*auth_tok) = global_auth_tok->global_auth_tok;
1021 return rc;
1022}
1023
1024/**
1025 * decrypt_passphrase_encrypted_session_key - Decrypt the session key with the given auth_tok.
1026 * @auth_tok: The passphrase authentication token to use to encrypt the FEK
1027 * @crypt_stat: The cryptographic context
1028 *
1029 * Returns zero on success; non-zero error otherwise
1030 */
1031static int
1032decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
1033 struct ecryptfs_crypt_stat *crypt_stat)
1034{
1035 struct scatterlist dst_sg;
1036 struct scatterlist src_sg;
1037 struct mutex *tfm_mutex;
937 struct blkcipher_desc desc = { 1038 struct blkcipher_desc desc = {
938 .flags = CRYPTO_TFM_REQ_MAY_SLEEP 1039 .flags = CRYPTO_TFM_REQ_MAY_SLEEP
939 }; 1040 };
940 int rc = 0; 1041 int rc = 0;
941 1042
942 password_s_ptr = &auth_tok->token.password; 1043 if (unlikely(ecryptfs_verbosity > 0)) {
943 if (password_s_ptr->flags & ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET) 1044 ecryptfs_printk(
944 ecryptfs_printk(KERN_DEBUG, "Session key encryption key " 1045 KERN_DEBUG, "Session key encryption key (size [%d]):\n",
945 "set; skipping key generation\n"); 1046 auth_tok->token.password.session_key_encryption_key_bytes);
946 ecryptfs_printk(KERN_DEBUG, "Session key encryption key (size [%d])" 1047 ecryptfs_dump_hex(
947 ":\n", 1048 auth_tok->token.password.session_key_encryption_key,
948 password_s_ptr->session_key_encryption_key_bytes); 1049 auth_tok->token.password.session_key_encryption_key_bytes);
949 if (ecryptfs_verbosity > 0) 1050 }
950 ecryptfs_dump_hex(password_s_ptr->session_key_encryption_key, 1051 rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&desc.tfm, &tfm_mutex,
951 password_s_ptr-> 1052 crypt_stat->cipher);
952 session_key_encryption_key_bytes); 1053 if (unlikely(rc)) {
953 if (!strcmp(crypt_stat->cipher, 1054 printk(KERN_ERR "Internal error whilst attempting to get "
954 crypt_stat->mount_crypt_stat->global_default_cipher_name) 1055 "tfm and mutex for cipher name [%s]; rc = [%d]\n",
955 && crypt_stat->mount_crypt_stat->global_key_tfm) { 1056 crypt_stat->cipher, rc);
956 desc.tfm = crypt_stat->mount_crypt_stat->global_key_tfm; 1057 goto out;
957 tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex;
958 } else {
959 char *full_alg_name;
960
961 rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name,
962 crypt_stat->cipher,
963 "ecb");
964 if (rc)
965 goto out;
966 desc.tfm = crypto_alloc_blkcipher(full_alg_name, 0,
967 CRYPTO_ALG_ASYNC);
968 kfree(full_alg_name);
969 if (IS_ERR(desc.tfm)) {
970 rc = PTR_ERR(desc.tfm);
971 printk(KERN_ERR "Error allocating crypto context; "
972 "rc = [%d]\n", rc);
973 goto out;
974 }
975 crypto_blkcipher_set_flags(desc.tfm, CRYPTO_TFM_REQ_WEAK_KEY);
976 } 1058 }
977 if (tfm_mutex) 1059 rc = virt_to_scatterlist(auth_tok->session_key.encrypted_key,
978 mutex_lock(tfm_mutex); 1060 auth_tok->session_key.encrypted_key_size,
979 rc = crypto_blkcipher_setkey(desc.tfm, 1061 &src_sg, 1);
980 password_s_ptr->session_key_encryption_key, 1062 if (rc != 1) {
981 crypt_stat->key_size); 1063 printk(KERN_ERR "Internal error whilst attempting to convert "
982 if (rc < 0) { 1064 "auth_tok->session_key.encrypted_key to scatterlist; "
1065 "expected rc = 1; got rc = [%d]. "
1066 "auth_tok->session_key.encrypted_key_size = [%d]\n", rc,
1067 auth_tok->session_key.encrypted_key_size);
1068 goto out;
1069 }
1070 auth_tok->session_key.decrypted_key_size =
1071 auth_tok->session_key.encrypted_key_size;
1072 rc = virt_to_scatterlist(auth_tok->session_key.decrypted_key,
1073 auth_tok->session_key.decrypted_key_size,
1074 &dst_sg, 1);
1075 if (rc != 1) {
1076 printk(KERN_ERR "Internal error whilst attempting to convert "
1077 "auth_tok->session_key.decrypted_key to scatterlist; "
1078 "expected rc = 1; got rc = [%d]\n", rc);
1079 goto out;
1080 }
1081 mutex_lock(tfm_mutex);
1082 rc = crypto_blkcipher_setkey(
1083 desc.tfm, auth_tok->token.password.session_key_encryption_key,
1084 crypt_stat->key_size);
1085 if (unlikely(rc < 0)) {
1086 mutex_unlock(tfm_mutex);
983 printk(KERN_ERR "Error setting key for crypto context\n"); 1087 printk(KERN_ERR "Error setting key for crypto context\n");
984 rc = -EINVAL; 1088 rc = -EINVAL;
985 goto out_free_tfm; 1089 goto out;
986 }
987 /* TODO: virt_to_scatterlist */
988 encrypted_session_key = (char *)__get_free_page(GFP_KERNEL);
989 if (!encrypted_session_key) {
990 ecryptfs_printk(KERN_ERR, "Out of memory\n");
991 rc = -ENOMEM;
992 goto out_free_tfm;
993 } 1090 }
994 session_key = (char *)__get_free_page(GFP_KERNEL); 1091 rc = crypto_blkcipher_decrypt(&desc, &dst_sg, &src_sg,
995 if (!session_key) {
996 kfree(encrypted_session_key);
997 ecryptfs_printk(KERN_ERR, "Out of memory\n");
998 rc = -ENOMEM;
999 goto out_free_tfm;
1000 }
1001 memcpy(encrypted_session_key, auth_tok->session_key.encrypted_key,
1002 auth_tok->session_key.encrypted_key_size);
1003 src_sg[0].page = virt_to_page(encrypted_session_key);
1004 src_sg[0].offset = 0;
1005 BUG_ON(auth_tok->session_key.encrypted_key_size > PAGE_CACHE_SIZE);
1006 src_sg[0].length = auth_tok->session_key.encrypted_key_size;
1007 dst_sg[0].page = virt_to_page(session_key);
1008 dst_sg[0].offset = 0;
1009 auth_tok->session_key.decrypted_key_size =
1010 auth_tok->session_key.encrypted_key_size;
1011 dst_sg[0].length = auth_tok->session_key.encrypted_key_size;
1012 rc = crypto_blkcipher_decrypt(&desc, dst_sg, src_sg,
1013 auth_tok->session_key.encrypted_key_size); 1092 auth_tok->session_key.encrypted_key_size);
1014 if (rc) { 1093 mutex_unlock(tfm_mutex);
1094 if (unlikely(rc)) {
1015 printk(KERN_ERR "Error decrypting; rc = [%d]\n", rc); 1095 printk(KERN_ERR "Error decrypting; rc = [%d]\n", rc);
1016 goto out_free_memory; 1096 goto out;
1017 } 1097 }
1018 auth_tok->session_key.decrypted_key_size =
1019 auth_tok->session_key.encrypted_key_size;
1020 memcpy(auth_tok->session_key.decrypted_key, session_key,
1021 auth_tok->session_key.decrypted_key_size);
1022 auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY; 1098 auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY;
1023 memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key, 1099 memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key,
1024 auth_tok->session_key.decrypted_key_size); 1100 auth_tok->session_key.decrypted_key_size);
1025 crypt_stat->flags |= ECRYPTFS_KEY_VALID; 1101 crypt_stat->flags |= ECRYPTFS_KEY_VALID;
1026 ecryptfs_printk(KERN_DEBUG, "Decrypted session key:\n"); 1102 if (unlikely(ecryptfs_verbosity > 0)) {
1027 if (ecryptfs_verbosity > 0) 1103 ecryptfs_printk(KERN_DEBUG, "FEK of size [%d]:\n",
1104 crypt_stat->key_size);
1028 ecryptfs_dump_hex(crypt_stat->key, 1105 ecryptfs_dump_hex(crypt_stat->key,
1029 crypt_stat->key_size); 1106 crypt_stat->key_size);
1030out_free_memory: 1107 }
1031 memset(encrypted_session_key, 0, PAGE_CACHE_SIZE);
1032 free_page((unsigned long)encrypted_session_key);
1033 memset(session_key, 0, PAGE_CACHE_SIZE);
1034 free_page((unsigned long)session_key);
1035out_free_tfm:
1036 if (tfm_mutex)
1037 mutex_unlock(tfm_mutex);
1038 else
1039 crypto_free_blkcipher(desc.tfm);
1040out: 1108out:
1041 return rc; 1109 return rc;
1042} 1110}
1043 1111
1044/** 1112/**
1045 * ecryptfs_parse_packet_set 1113 * ecryptfs_parse_packet_set
1046 * @dest: The header page in memory 1114 * @crypt_stat: The cryptographic context
1047 * @version: Version of file format, to guide parsing behavior 1115 * @src: Virtual address of region of memory containing the packets
1116 * @ecryptfs_dentry: The eCryptfs dentry associated with the packet set
1048 * 1117 *
1049 * Get crypt_stat to have the file's session key if the requisite key 1118 * Get crypt_stat to have the file's session key if the requisite key
1050 * is available to decrypt the session key. 1119 * is available to decrypt the session key.
@@ -1058,25 +1127,22 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
1058 struct dentry *ecryptfs_dentry) 1127 struct dentry *ecryptfs_dentry)
1059{ 1128{
1060 size_t i = 0; 1129 size_t i = 0;
1061 size_t found_auth_tok = 0; 1130 size_t found_auth_tok;
1062 size_t next_packet_is_auth_tok_packet; 1131 size_t next_packet_is_auth_tok_packet;
1063 char sig[ECRYPTFS_SIG_SIZE_HEX];
1064 struct list_head auth_tok_list; 1132 struct list_head auth_tok_list;
1065 struct list_head *walker; 1133 struct ecryptfs_auth_tok *matching_auth_tok;
1066 struct ecryptfs_auth_tok *chosen_auth_tok = NULL; 1134 struct ecryptfs_auth_tok *candidate_auth_tok;
1067 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = 1135 char *candidate_auth_tok_sig;
1068 &ecryptfs_superblock_to_private(
1069 ecryptfs_dentry->d_sb)->mount_crypt_stat;
1070 struct ecryptfs_auth_tok *candidate_auth_tok = NULL;
1071 size_t packet_size; 1136 size_t packet_size;
1072 struct ecryptfs_auth_tok *new_auth_tok; 1137 struct ecryptfs_auth_tok *new_auth_tok;
1073 unsigned char sig_tmp_space[ECRYPTFS_SIG_SIZE]; 1138 unsigned char sig_tmp_space[ECRYPTFS_SIG_SIZE];
1139 struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
1074 size_t tag_11_contents_size; 1140 size_t tag_11_contents_size;
1075 size_t tag_11_packet_size; 1141 size_t tag_11_packet_size;
1076 int rc = 0; 1142 int rc = 0;
1077 1143
1078 INIT_LIST_HEAD(&auth_tok_list); 1144 INIT_LIST_HEAD(&auth_tok_list);
1079 /* Parse the header to find as many packets as we can, these will be 1145 /* Parse the header to find as many packets as we can; these will be
1080 * added the our &auth_tok_list */ 1146 * added the our &auth_tok_list */
1081 next_packet_is_auth_tok_packet = 1; 1147 next_packet_is_auth_tok_packet = 1;
1082 while (next_packet_is_auth_tok_packet) { 1148 while (next_packet_is_auth_tok_packet) {
@@ -1155,73 +1221,85 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
1155 } 1221 }
1156 } 1222 }
1157 if (list_empty(&auth_tok_list)) { 1223 if (list_empty(&auth_tok_list)) {
1158 rc = -EINVAL; /* Do not support non-encrypted files in 1224 printk(KERN_ERR "The lower file appears to be a non-encrypted "
1159 * the 0.1 release */ 1225 "eCryptfs file; this is not supported in this version "
1226 "of the eCryptfs kernel module\n");
1227 rc = -EINVAL;
1160 goto out; 1228 goto out;
1161 } 1229 }
1162 /* If we have a global auth tok, then we should try to use 1230 /* auth_tok_list contains the set of authentication tokens
1163 * it */ 1231 * parsed from the metadata. We need to find a matching
1164 if (mount_crypt_stat->global_auth_tok) { 1232 * authentication token that has the secret component(s)
1165 memcpy(sig, mount_crypt_stat->global_auth_tok_sig, 1233 * necessary to decrypt the EFEK in the auth_tok parsed from
1166 ECRYPTFS_SIG_SIZE_HEX); 1234 * the metadata. There may be several potential matches, but
1167 chosen_auth_tok = mount_crypt_stat->global_auth_tok; 1235 * just one will be sufficient to decrypt to get the FEK. */
1168 } else 1236find_next_matching_auth_tok:
1169 BUG(); /* We should always have a global auth tok in 1237 found_auth_tok = 0;
1170 * the 0.1 release */ 1238 list_for_each_entry(auth_tok_list_item, &auth_tok_list, list) {
1171 /* Scan list to see if our chosen_auth_tok works */
1172 list_for_each(walker, &auth_tok_list) {
1173 struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
1174 auth_tok_list_item =
1175 list_entry(walker, struct ecryptfs_auth_tok_list_item,
1176 list);
1177 candidate_auth_tok = &auth_tok_list_item->auth_tok; 1239 candidate_auth_tok = &auth_tok_list_item->auth_tok;
1178 if (unlikely(ecryptfs_verbosity > 0)) { 1240 if (unlikely(ecryptfs_verbosity > 0)) {
1179 ecryptfs_printk(KERN_DEBUG, 1241 ecryptfs_printk(KERN_DEBUG,
1180 "Considering cadidate auth tok:\n"); 1242 "Considering cadidate auth tok:\n");
1181 ecryptfs_dump_auth_tok(candidate_auth_tok); 1243 ecryptfs_dump_auth_tok(candidate_auth_tok);
1182 } 1244 }
1183 /* TODO: Replace ECRYPTFS_SIG_SIZE_HEX w/ dynamic value */ 1245 rc = ecryptfs_get_auth_tok_sig(&candidate_auth_tok_sig,
1184 if (candidate_auth_tok->token_type == ECRYPTFS_PASSWORD 1246 candidate_auth_tok);
1185 && !strncmp(candidate_auth_tok->token.password.signature, 1247 if (rc) {
1186 sig, ECRYPTFS_SIG_SIZE_HEX)) { 1248 printk(KERN_ERR
1187 found_auth_tok = 1; 1249 "Unrecognized candidate auth tok type: [%d]\n",
1188 goto leave_list; 1250 candidate_auth_tok->token_type);
1189 /* TODO: Transfer the common salt into the 1251 rc = -EINVAL;
1190 * crypt_stat salt */ 1252 goto out_wipe_list;
1191 } else if ((candidate_auth_tok->token_type 1253 }
1192 == ECRYPTFS_PRIVATE_KEY) 1254 ecryptfs_find_auth_tok_for_sig(&matching_auth_tok, crypt_stat,
1193 && !strncmp(candidate_auth_tok->token.private_key.signature, 1255 candidate_auth_tok_sig);
1194 sig, ECRYPTFS_SIG_SIZE_HEX)) { 1256 if (matching_auth_tok) {
1195 found_auth_tok = 1; 1257 found_auth_tok = 1;
1196 goto leave_list; 1258 goto found_matching_auth_tok;
1197 } 1259 }
1198 } 1260 }
1199 if (!found_auth_tok) { 1261 if (!found_auth_tok) {
1200 ecryptfs_printk(KERN_ERR, "Could not find authentication " 1262 ecryptfs_printk(KERN_ERR, "Could not find a usable "
1201 "token on temporary list for sig [%.*s]\n", 1263 "authentication token\n");
1202 ECRYPTFS_SIG_SIZE_HEX, sig);
1203 rc = -EIO; 1264 rc = -EIO;
1204 goto out_wipe_list; 1265 goto out_wipe_list;
1205 } 1266 }
1206leave_list: 1267found_matching_auth_tok:
1207 rc = -ENOTSUPP;
1208 if (candidate_auth_tok->token_type == ECRYPTFS_PRIVATE_KEY) { 1268 if (candidate_auth_tok->token_type == ECRYPTFS_PRIVATE_KEY) {
1209 memcpy(&(candidate_auth_tok->token.private_key), 1269 memcpy(&(candidate_auth_tok->token.private_key),
1210 &(chosen_auth_tok->token.private_key), 1270 &(matching_auth_tok->token.private_key),
1211 sizeof(struct ecryptfs_private_key)); 1271 sizeof(struct ecryptfs_private_key));
1212 rc = decrypt_pki_encrypted_session_key(mount_crypt_stat, 1272 rc = decrypt_pki_encrypted_session_key(candidate_auth_tok,
1213 candidate_auth_tok,
1214 crypt_stat); 1273 crypt_stat);
1215 } else if (candidate_auth_tok->token_type == ECRYPTFS_PASSWORD) { 1274 } else if (candidate_auth_tok->token_type == ECRYPTFS_PASSWORD) {
1216 memcpy(&(candidate_auth_tok->token.password), 1275 memcpy(&(candidate_auth_tok->token.password),
1217 &(chosen_auth_tok->token.password), 1276 &(matching_auth_tok->token.password),
1218 sizeof(struct ecryptfs_password)); 1277 sizeof(struct ecryptfs_password));
1219 rc = decrypt_session_key(candidate_auth_tok, crypt_stat); 1278 rc = decrypt_passphrase_encrypted_session_key(
1279 candidate_auth_tok, crypt_stat);
1220 } 1280 }
1221 if (rc) { 1281 if (rc) {
1222 ecryptfs_printk(KERN_ERR, "Error decrypting the " 1282 struct ecryptfs_auth_tok_list_item *auth_tok_list_item_tmp;
1223 "session key; rc = [%d]\n", rc); 1283
1224 goto out_wipe_list; 1284 ecryptfs_printk(KERN_WARNING, "Error decrypting the "
1285 "session key for authentication token with sig "
1286 "[%.*s]; rc = [%d]. Removing auth tok "
1287 "candidate from the list and searching for "
1288 "the next match.\n", candidate_auth_tok_sig,
1289 ECRYPTFS_SIG_SIZE_HEX, rc);
1290 list_for_each_entry_safe(auth_tok_list_item,
1291 auth_tok_list_item_tmp,
1292 &auth_tok_list, list) {
1293 if (candidate_auth_tok
1294 == &auth_tok_list_item->auth_tok) {
1295 list_del(&auth_tok_list_item->list);
1296 kmem_cache_free(
1297 ecryptfs_auth_tok_list_item_cache,
1298 auth_tok_list_item);
1299 goto find_next_matching_auth_tok;
1300 }
1301 }
1302 BUG();
1225 } 1303 }
1226 rc = ecryptfs_compute_root_iv(crypt_stat); 1304 rc = ecryptfs_compute_root_iv(crypt_stat);
1227 if (rc) { 1305 if (rc) {
@@ -1240,6 +1318,7 @@ out_wipe_list:
1240out: 1318out:
1241 return rc; 1319 return rc;
1242} 1320}
1321
1243static int 1322static int
1244pki_encrypt_session_key(struct ecryptfs_auth_tok *auth_tok, 1323pki_encrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
1245 struct ecryptfs_crypt_stat *crypt_stat, 1324 struct ecryptfs_crypt_stat *crypt_stat,
@@ -1284,22 +1363,25 @@ out:
1284/** 1363/**
1285 * write_tag_1_packet - Write an RFC2440-compatible tag 1 (public key) packet 1364 * write_tag_1_packet - Write an RFC2440-compatible tag 1 (public key) packet
1286 * @dest: Buffer into which to write the packet 1365 * @dest: Buffer into which to write the packet
1287 * @max: Maximum number of bytes that can be writtn 1366 * @remaining_bytes: Maximum number of bytes that can be writtn
1367 * @auth_tok: The authentication token used for generating the tag 1 packet
1368 * @crypt_stat: The cryptographic context
1369 * @key_rec: The key record struct for the tag 1 packet
1288 * @packet_size: This function will write the number of bytes that end 1370 * @packet_size: This function will write the number of bytes that end
1289 * up constituting the packet; set to zero on error 1371 * up constituting the packet; set to zero on error
1290 * 1372 *
1291 * Returns zero on success; non-zero on error. 1373 * Returns zero on success; non-zero on error.
1292 */ 1374 */
1293static int 1375static int
1294write_tag_1_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok, 1376write_tag_1_packet(char *dest, size_t *remaining_bytes,
1377 struct ecryptfs_auth_tok *auth_tok,
1295 struct ecryptfs_crypt_stat *crypt_stat, 1378 struct ecryptfs_crypt_stat *crypt_stat,
1296 struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
1297 struct ecryptfs_key_record *key_rec, size_t *packet_size) 1379 struct ecryptfs_key_record *key_rec, size_t *packet_size)
1298{ 1380{
1299 size_t i; 1381 size_t i;
1300 size_t encrypted_session_key_valid = 0; 1382 size_t encrypted_session_key_valid = 0;
1301 size_t key_rec_size;
1302 size_t packet_size_length; 1383 size_t packet_size_length;
1384 size_t max_packet_size;
1303 int rc = 0; 1385 int rc = 0;
1304 1386
1305 (*packet_size) = 0; 1387 (*packet_size) = 0;
@@ -1329,37 +1411,23 @@ write_tag_1_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
1329 ecryptfs_dump_hex(key_rec->enc_key, key_rec->enc_key_size); 1411 ecryptfs_dump_hex(key_rec->enc_key, key_rec->enc_key_size);
1330 } 1412 }
1331encrypted_session_key_set: 1413encrypted_session_key_set:
1332 /* Now we have a valid key_rec. Append it to the 1414 /* This format is inspired by OpenPGP; see RFC 2440
1333 * key_rec set. */ 1415 * packet tag 1 */
1334 key_rec_size = (sizeof(struct ecryptfs_key_record) 1416 max_packet_size = (1 /* Tag 1 identifier */
1335 - ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 1417 + 3 /* Max Tag 1 packet size */
1336 + (key_rec->enc_key_size)); 1418 + 1 /* Version */
1337 /* TODO: Include a packet size limit as a parameter to this 1419 + ECRYPTFS_SIG_SIZE /* Key identifier */
1338 * function once we have multi-packet headers (for versions 1420 + 1 /* Cipher identifier */
1339 * later than 0.1 */ 1421 + key_rec->enc_key_size); /* Encrypted key size */
1340 if (key_rec_size >= ECRYPTFS_MAX_KEYSET_SIZE) { 1422 if (max_packet_size > (*remaining_bytes)) {
1341 ecryptfs_printk(KERN_ERR, "Keyset too large\n"); 1423 printk(KERN_ERR "Packet length larger than maximum allowable; "
1342 rc = -EINVAL; 1424 "need up to [%td] bytes, but there are only [%td] "
1343 goto out; 1425 "available\n", max_packet_size, (*remaining_bytes));
1344 }
1345 /* ***** TAG 1 Packet Format *****
1346 * | version number | 1 byte |
1347 * | key ID | 8 bytes |
1348 * | public key algorithm | 1 byte |
1349 * | encrypted session key | arbitrary |
1350 */
1351 if ((0x02 + ECRYPTFS_SIG_SIZE + key_rec->enc_key_size) >= max) {
1352 ecryptfs_printk(KERN_ERR,
1353 "Authentication token is too large\n");
1354 rc = -EINVAL; 1426 rc = -EINVAL;
1355 goto out; 1427 goto out;
1356 } 1428 }
1357 dest[(*packet_size)++] = ECRYPTFS_TAG_1_PACKET_TYPE; 1429 dest[(*packet_size)++] = ECRYPTFS_TAG_1_PACKET_TYPE;
1358 /* This format is inspired by OpenPGP; see RFC 2440 1430 rc = write_packet_length(&dest[(*packet_size)], (max_packet_size - 4),
1359 * packet tag 1 */
1360 rc = write_packet_length(&dest[(*packet_size)],
1361 (0x02 + ECRYPTFS_SIG_SIZE +
1362 key_rec->enc_key_size),
1363 &packet_size_length); 1431 &packet_size_length);
1364 if (rc) { 1432 if (rc) {
1365 ecryptfs_printk(KERN_ERR, "Error generating tag 1 packet " 1433 ecryptfs_printk(KERN_ERR, "Error generating tag 1 packet "
@@ -1377,13 +1445,15 @@ encrypted_session_key_set:
1377out: 1445out:
1378 if (rc) 1446 if (rc)
1379 (*packet_size) = 0; 1447 (*packet_size) = 0;
1448 else
1449 (*remaining_bytes) -= (*packet_size);
1380 return rc; 1450 return rc;
1381} 1451}
1382 1452
1383/** 1453/**
1384 * write_tag_11_packet 1454 * write_tag_11_packet
1385 * @dest: Target into which Tag 11 packet is to be written 1455 * @dest: Target into which Tag 11 packet is to be written
1386 * @max: Maximum packet length 1456 * @remaining_bytes: Maximum packet length
1387 * @contents: Byte array of contents to copy in 1457 * @contents: Byte array of contents to copy in
1388 * @contents_length: Number of bytes in contents 1458 * @contents_length: Number of bytes in contents
1389 * @packet_length: Length of the Tag 11 packet written; zero on error 1459 * @packet_length: Length of the Tag 11 packet written; zero on error
@@ -1391,54 +1461,59 @@ out:
1391 * Returns zero on success; non-zero on error. 1461 * Returns zero on success; non-zero on error.
1392 */ 1462 */
1393static int 1463static int
1394write_tag_11_packet(char *dest, int max, char *contents, size_t contents_length, 1464write_tag_11_packet(char *dest, size_t *remaining_bytes, char *contents,
1395 size_t *packet_length) 1465 size_t contents_length, size_t *packet_length)
1396{ 1466{
1397 size_t packet_size_length; 1467 size_t packet_size_length;
1468 size_t max_packet_size;
1398 int rc = 0; 1469 int rc = 0;
1399 1470
1400 (*packet_length) = 0; 1471 (*packet_length) = 0;
1401 if ((13 + contents_length) > max) { 1472 /* This format is inspired by OpenPGP; see RFC 2440
1473 * packet tag 11 */
1474 max_packet_size = (1 /* Tag 11 identifier */
1475 + 3 /* Max Tag 11 packet size */
1476 + 1 /* Binary format specifier */
1477 + 1 /* Filename length */
1478 + 8 /* Filename ("_CONSOLE") */
1479 + 4 /* Modification date */
1480 + contents_length); /* Literal data */
1481 if (max_packet_size > (*remaining_bytes)) {
1482 printk(KERN_ERR "Packet length larger than maximum allowable; "
1483 "need up to [%td] bytes, but there are only [%td] "
1484 "available\n", max_packet_size, (*remaining_bytes));
1402 rc = -EINVAL; 1485 rc = -EINVAL;
1403 ecryptfs_printk(KERN_ERR, "Packet length larger than "
1404 "maximum allowable\n");
1405 goto out; 1486 goto out;
1406 } 1487 }
1407 /* General packet header */
1408 /* Packet tag */
1409 dest[(*packet_length)++] = ECRYPTFS_TAG_11_PACKET_TYPE; 1488 dest[(*packet_length)++] = ECRYPTFS_TAG_11_PACKET_TYPE;
1410 /* Packet length */
1411 rc = write_packet_length(&dest[(*packet_length)], 1489 rc = write_packet_length(&dest[(*packet_length)],
1412 (13 + contents_length), &packet_size_length); 1490 (max_packet_size - 4), &packet_size_length);
1413 if (rc) { 1491 if (rc) {
1414 ecryptfs_printk(KERN_ERR, "Error generating tag 11 packet " 1492 printk(KERN_ERR "Error generating tag 11 packet header; cannot "
1415 "header; cannot generate packet length\n"); 1493 "generate packet length. rc = [%d]\n", rc);
1416 goto out; 1494 goto out;
1417 } 1495 }
1418 (*packet_length) += packet_size_length; 1496 (*packet_length) += packet_size_length;
1419 /* Tag 11 specific */ 1497 dest[(*packet_length)++] = 0x62; /* binary data format specifier */
1420 /* One-octet field that describes how the data is formatted */
1421 dest[(*packet_length)++] = 0x62; /* binary data */
1422 /* One-octet filename length followed by filename */
1423 dest[(*packet_length)++] = 8; 1498 dest[(*packet_length)++] = 8;
1424 memcpy(&dest[(*packet_length)], "_CONSOLE", 8); 1499 memcpy(&dest[(*packet_length)], "_CONSOLE", 8);
1425 (*packet_length) += 8; 1500 (*packet_length) += 8;
1426 /* Four-octet number indicating modification date */
1427 memset(&dest[(*packet_length)], 0x00, 4); 1501 memset(&dest[(*packet_length)], 0x00, 4);
1428 (*packet_length) += 4; 1502 (*packet_length) += 4;
1429 /* Remainder is literal data */
1430 memcpy(&dest[(*packet_length)], contents, contents_length); 1503 memcpy(&dest[(*packet_length)], contents, contents_length);
1431 (*packet_length) += contents_length; 1504 (*packet_length) += contents_length;
1432 out: 1505 out:
1433 if (rc) 1506 if (rc)
1434 (*packet_length) = 0; 1507 (*packet_length) = 0;
1508 else
1509 (*remaining_bytes) -= (*packet_length);
1435 return rc; 1510 return rc;
1436} 1511}
1437 1512
1438/** 1513/**
1439 * write_tag_3_packet 1514 * write_tag_3_packet
1440 * @dest: Buffer into which to write the packet 1515 * @dest: Buffer into which to write the packet
1441 * @max: Maximum number of bytes that can be written 1516 * @remaining_bytes: Maximum number of bytes that can be written
1442 * @auth_tok: Authentication token 1517 * @auth_tok: Authentication token
1443 * @crypt_stat: The cryptographic context 1518 * @crypt_stat: The cryptographic context
1444 * @key_rec: encrypted key 1519 * @key_rec: encrypted key
@@ -1448,19 +1523,22 @@ write_tag_11_packet(char *dest, int max, char *contents, size_t contents_length,
1448 * Returns zero on success; non-zero on error. 1523 * Returns zero on success; non-zero on error.
1449 */ 1524 */
1450static int 1525static int
1451write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok, 1526write_tag_3_packet(char *dest, size_t *remaining_bytes,
1527 struct ecryptfs_auth_tok *auth_tok,
1452 struct ecryptfs_crypt_stat *crypt_stat, 1528 struct ecryptfs_crypt_stat *crypt_stat,
1453 struct ecryptfs_key_record *key_rec, size_t *packet_size) 1529 struct ecryptfs_key_record *key_rec, size_t *packet_size)
1454{ 1530{
1455 size_t i; 1531 size_t i;
1456 size_t encrypted_session_key_valid = 0; 1532 size_t encrypted_session_key_valid = 0;
1457 char session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES]; 1533 char session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES];
1458 struct scatterlist dest_sg[2]; 1534 struct scatterlist dst_sg;
1459 struct scatterlist src_sg[2]; 1535 struct scatterlist src_sg;
1460 struct mutex *tfm_mutex = NULL; 1536 struct mutex *tfm_mutex = NULL;
1461 size_t key_rec_size;
1462 size_t packet_size_length;
1463 size_t cipher_code; 1537 size_t cipher_code;
1538 size_t packet_size_length;
1539 size_t max_packet_size;
1540 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
1541 crypt_stat->mount_crypt_stat;
1464 struct blkcipher_desc desc = { 1542 struct blkcipher_desc desc = {
1465 .tfm = NULL, 1543 .tfm = NULL,
1466 .flags = CRYPTO_TFM_REQ_MAY_SLEEP 1544 .flags = CRYPTO_TFM_REQ_MAY_SLEEP
@@ -1470,16 +1548,25 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
1470 (*packet_size) = 0; 1548 (*packet_size) = 0;
1471 ecryptfs_from_hex(key_rec->sig, auth_tok->token.password.signature, 1549 ecryptfs_from_hex(key_rec->sig, auth_tok->token.password.signature,
1472 ECRYPTFS_SIG_SIZE); 1550 ECRYPTFS_SIG_SIZE);
1473 encrypted_session_key_valid = 0; 1551 rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&desc.tfm, &tfm_mutex,
1474 for (i = 0; i < crypt_stat->key_size; i++) 1552 crypt_stat->cipher);
1475 encrypted_session_key_valid |= 1553 if (unlikely(rc)) {
1476 auth_tok->session_key.encrypted_key[i]; 1554 printk(KERN_ERR "Internal error whilst attempting to get "
1477 if (encrypted_session_key_valid) { 1555 "tfm and mutex for cipher name [%s]; rc = [%d]\n",
1478 memcpy(key_rec->enc_key, 1556 crypt_stat->cipher, rc);
1479 auth_tok->session_key.encrypted_key, 1557 goto out;
1480 auth_tok->session_key.encrypted_key_size); 1558 }
1481 goto encrypted_session_key_set; 1559 if (mount_crypt_stat->global_default_cipher_key_size == 0) {
1560 struct blkcipher_alg *alg = crypto_blkcipher_alg(desc.tfm);
1561
1562 printk(KERN_WARNING "No key size specified at mount; "
1563 "defaulting to [%d]\n", alg->max_keysize);
1564 mount_crypt_stat->global_default_cipher_key_size =
1565 alg->max_keysize;
1482 } 1566 }
1567 if (crypt_stat->key_size == 0)
1568 crypt_stat->key_size =
1569 mount_crypt_stat->global_default_cipher_key_size;
1483 if (auth_tok->session_key.encrypted_key_size == 0) 1570 if (auth_tok->session_key.encrypted_key_size == 0)
1484 auth_tok->session_key.encrypted_key_size = 1571 auth_tok->session_key.encrypted_key_size =
1485 crypt_stat->key_size; 1572 crypt_stat->key_size;
@@ -1487,9 +1574,24 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
1487 && strcmp("aes", crypt_stat->cipher) == 0) { 1574 && strcmp("aes", crypt_stat->cipher) == 0) {
1488 memset((crypt_stat->key + 24), 0, 8); 1575 memset((crypt_stat->key + 24), 0, 8);
1489 auth_tok->session_key.encrypted_key_size = 32; 1576 auth_tok->session_key.encrypted_key_size = 32;
1490 } 1577 } else
1578 auth_tok->session_key.encrypted_key_size = crypt_stat->key_size;
1491 key_rec->enc_key_size = 1579 key_rec->enc_key_size =
1492 auth_tok->session_key.encrypted_key_size; 1580 auth_tok->session_key.encrypted_key_size;
1581 encrypted_session_key_valid = 0;
1582 for (i = 0; i < auth_tok->session_key.encrypted_key_size; i++)
1583 encrypted_session_key_valid |=
1584 auth_tok->session_key.encrypted_key[i];
1585 if (encrypted_session_key_valid) {
1586 ecryptfs_printk(KERN_DEBUG, "encrypted_session_key_valid != 0; "
1587 "using auth_tok->session_key.encrypted_key, "
1588 "where key_rec->enc_key_size = [%d]\n",
1589 key_rec->enc_key_size);
1590 memcpy(key_rec->enc_key,
1591 auth_tok->session_key.encrypted_key,
1592 key_rec->enc_key_size);
1593 goto encrypted_session_key_set;
1594 }
1493 if (auth_tok->token.password.flags & 1595 if (auth_tok->token.password.flags &
1494 ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET) { 1596 ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET) {
1495 ecryptfs_printk(KERN_DEBUG, "Using previously generated " 1597 ecryptfs_printk(KERN_DEBUG, "Using previously generated "
@@ -1508,54 +1610,32 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
1508 ecryptfs_printk(KERN_DEBUG, "Session key encryption key:\n"); 1610 ecryptfs_printk(KERN_DEBUG, "Session key encryption key:\n");
1509 ecryptfs_dump_hex(session_key_encryption_key, 16); 1611 ecryptfs_dump_hex(session_key_encryption_key, 16);
1510 } 1612 }
1511 rc = virt_to_scatterlist(crypt_stat->key, 1613 rc = virt_to_scatterlist(crypt_stat->key, key_rec->enc_key_size,
1512 key_rec->enc_key_size, src_sg, 2); 1614 &src_sg, 1);
1513 if (!rc) { 1615 if (rc != 1) {
1514 ecryptfs_printk(KERN_ERR, "Error generating scatterlist " 1616 ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
1515 "for crypt_stat session key\n"); 1617 "for crypt_stat session key; expected rc = 1; "
1618 "got rc = [%d]. key_rec->enc_key_size = [%d]\n",
1619 rc, key_rec->enc_key_size);
1516 rc = -ENOMEM; 1620 rc = -ENOMEM;
1517 goto out; 1621 goto out;
1518 } 1622 }
1519 rc = virt_to_scatterlist(key_rec->enc_key, 1623 rc = virt_to_scatterlist(key_rec->enc_key, key_rec->enc_key_size,
1520 key_rec->enc_key_size, dest_sg, 2); 1624 &dst_sg, 1);
1521 if (!rc) { 1625 if (rc != 1) {
1522 ecryptfs_printk(KERN_ERR, "Error generating scatterlist " 1626 ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
1523 "for crypt_stat encrypted session key\n"); 1627 "for crypt_stat encrypted session key; "
1628 "expected rc = 1; got rc = [%d]. "
1629 "key_rec->enc_key_size = [%d]\n", rc,
1630 key_rec->enc_key_size);
1524 rc = -ENOMEM; 1631 rc = -ENOMEM;
1525 goto out; 1632 goto out;
1526 } 1633 }
1527 if (!strcmp(crypt_stat->cipher, 1634 mutex_lock(tfm_mutex);
1528 crypt_stat->mount_crypt_stat->global_default_cipher_name)
1529 && crypt_stat->mount_crypt_stat->global_key_tfm) {
1530 desc.tfm = crypt_stat->mount_crypt_stat->global_key_tfm;
1531 tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex;
1532 } else {
1533 char *full_alg_name;
1534
1535 rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name,
1536 crypt_stat->cipher,
1537 "ecb");
1538 if (rc)
1539 goto out;
1540 desc.tfm = crypto_alloc_blkcipher(full_alg_name, 0,
1541 CRYPTO_ALG_ASYNC);
1542 kfree(full_alg_name);
1543 if (IS_ERR(desc.tfm)) {
1544 rc = PTR_ERR(desc.tfm);
1545 ecryptfs_printk(KERN_ERR, "Could not initialize crypto "
1546 "context for cipher [%s]; rc = [%d]\n",
1547 crypt_stat->cipher, rc);
1548 goto out;
1549 }
1550 crypto_blkcipher_set_flags(desc.tfm, CRYPTO_TFM_REQ_WEAK_KEY);
1551 }
1552 if (tfm_mutex)
1553 mutex_lock(tfm_mutex);
1554 rc = crypto_blkcipher_setkey(desc.tfm, session_key_encryption_key, 1635 rc = crypto_blkcipher_setkey(desc.tfm, session_key_encryption_key,
1555 crypt_stat->key_size); 1636 crypt_stat->key_size);
1556 if (rc < 0) { 1637 if (rc < 0) {
1557 if (tfm_mutex) 1638 mutex_unlock(tfm_mutex);
1558 mutex_unlock(tfm_mutex);
1559 ecryptfs_printk(KERN_ERR, "Error setting key for crypto " 1639 ecryptfs_printk(KERN_ERR, "Error setting key for crypto "
1560 "context; rc = [%d]\n", rc); 1640 "context; rc = [%d]\n", rc);
1561 goto out; 1641 goto out;
@@ -1563,56 +1643,53 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
1563 rc = 0; 1643 rc = 0;
1564 ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n", 1644 ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n",
1565 crypt_stat->key_size); 1645 crypt_stat->key_size);
1566 rc = crypto_blkcipher_encrypt(&desc, dest_sg, src_sg, 1646 rc = crypto_blkcipher_encrypt(&desc, &dst_sg, &src_sg,
1567 (*key_rec).enc_key_size); 1647 (*key_rec).enc_key_size);
1648 mutex_unlock(tfm_mutex);
1568 if (rc) { 1649 if (rc) {
1569 printk(KERN_ERR "Error encrypting; rc = [%d]\n", rc); 1650 printk(KERN_ERR "Error encrypting; rc = [%d]\n", rc);
1570 goto out; 1651 goto out;
1571 } 1652 }
1572 if (tfm_mutex)
1573 mutex_unlock(tfm_mutex);
1574 ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n"); 1653 ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n");
1575 if (ecryptfs_verbosity > 0) 1654 if (ecryptfs_verbosity > 0) {
1655 ecryptfs_printk(KERN_DEBUG, "EFEK of size [%d]:\n",
1656 key_rec->enc_key_size);
1576 ecryptfs_dump_hex(key_rec->enc_key, 1657 ecryptfs_dump_hex(key_rec->enc_key,
1577 key_rec->enc_key_size); 1658 key_rec->enc_key_size);
1578encrypted_session_key_set:
1579 /* Now we have a valid key_rec. Append it to the
1580 * key_rec set. */
1581 key_rec_size = (sizeof(struct ecryptfs_key_record)
1582 - ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES
1583 + (key_rec->enc_key_size));
1584 /* TODO: Include a packet size limit as a parameter to this
1585 * function once we have multi-packet headers (for versions
1586 * later than 0.1 */
1587 if (key_rec_size >= ECRYPTFS_MAX_KEYSET_SIZE) {
1588 ecryptfs_printk(KERN_ERR, "Keyset too large\n");
1589 rc = -EINVAL;
1590 goto out;
1591 } 1659 }
1592 /* TODO: Packet size limit */ 1660encrypted_session_key_set:
1593 /* We have 5 bytes of surrounding packet data */ 1661 /* This format is inspired by OpenPGP; see RFC 2440
1594 if ((0x05 + ECRYPTFS_SALT_SIZE 1662 * packet tag 3 */
1595 + key_rec->enc_key_size) >= max) { 1663 max_packet_size = (1 /* Tag 3 identifier */
1596 ecryptfs_printk(KERN_ERR, "Authentication token is too " 1664 + 3 /* Max Tag 3 packet size */
1597 "large\n"); 1665 + 1 /* Version */
1666 + 1 /* Cipher code */
1667 + 1 /* S2K specifier */
1668 + 1 /* Hash identifier */
1669 + ECRYPTFS_SALT_SIZE /* Salt */
1670 + 1 /* Hash iterations */
1671 + key_rec->enc_key_size); /* Encrypted key size */
1672 if (max_packet_size > (*remaining_bytes)) {
1673 printk(KERN_ERR "Packet too large; need up to [%td] bytes, but "
1674 "there are only [%td] available\n", max_packet_size,
1675 (*remaining_bytes));
1598 rc = -EINVAL; 1676 rc = -EINVAL;
1599 goto out; 1677 goto out;
1600 } 1678 }
1601 /* This format is inspired by OpenPGP; see RFC 2440
1602 * packet tag 3 */
1603 dest[(*packet_size)++] = ECRYPTFS_TAG_3_PACKET_TYPE; 1679 dest[(*packet_size)++] = ECRYPTFS_TAG_3_PACKET_TYPE;
1604 /* ver+cipher+s2k+hash+salt+iter+enc_key */ 1680 /* Chop off the Tag 3 identifier(1) and Tag 3 packet size(3)
1605 rc = write_packet_length(&dest[(*packet_size)], 1681 * to get the number of octets in the actual Tag 3 packet */
1606 (0x05 + ECRYPTFS_SALT_SIZE 1682 rc = write_packet_length(&dest[(*packet_size)], (max_packet_size - 4),
1607 + key_rec->enc_key_size),
1608 &packet_size_length); 1683 &packet_size_length);
1609 if (rc) { 1684 if (rc) {
1610 ecryptfs_printk(KERN_ERR, "Error generating tag 3 packet " 1685 printk(KERN_ERR "Error generating tag 3 packet header; cannot "
1611 "header; cannot generate packet length\n"); 1686 "generate packet length. rc = [%d]\n", rc);
1612 goto out; 1687 goto out;
1613 } 1688 }
1614 (*packet_size) += packet_size_length; 1689 (*packet_size) += packet_size_length;
1615 dest[(*packet_size)++] = 0x04; /* version 4 */ 1690 dest[(*packet_size)++] = 0x04; /* version 4 */
1691 /* TODO: Break from RFC2440 so that arbitrary ciphers can be
1692 * specified with strings */
1616 cipher_code = ecryptfs_code_for_cipher_string(crypt_stat); 1693 cipher_code = ecryptfs_code_for_cipher_string(crypt_stat);
1617 if (cipher_code == 0) { 1694 if (cipher_code == 0) {
1618 ecryptfs_printk(KERN_WARNING, "Unable to generate code for " 1695 ecryptfs_printk(KERN_WARNING, "Unable to generate code for "
@@ -1631,10 +1708,10 @@ encrypted_session_key_set:
1631 key_rec->enc_key_size); 1708 key_rec->enc_key_size);
1632 (*packet_size) += key_rec->enc_key_size; 1709 (*packet_size) += key_rec->enc_key_size;
1633out: 1710out:
1634 if (desc.tfm && !tfm_mutex)
1635 crypto_free_blkcipher(desc.tfm);
1636 if (rc) 1711 if (rc)
1637 (*packet_size) = 0; 1712 (*packet_size) = 0;
1713 else
1714 (*remaining_bytes) -= (*packet_size);
1638 return rc; 1715 return rc;
1639} 1716}
1640 1717
@@ -1642,7 +1719,7 @@ struct kmem_cache *ecryptfs_key_record_cache;
1642 1719
1643/** 1720/**
1644 * ecryptfs_generate_key_packet_set 1721 * ecryptfs_generate_key_packet_set
1645 * @dest: Virtual address from which to write the key record set 1722 * @dest_base: Virtual address from which to write the key record set
1646 * @crypt_stat: The cryptographic context from which the 1723 * @crypt_stat: The cryptographic context from which the
1647 * authentication tokens will be retrieved 1724 * authentication tokens will be retrieved
1648 * @ecryptfs_dentry: The dentry, used to retrieve the mount crypt stat 1725 * @ecryptfs_dentry: The dentry, used to retrieve the mount crypt stat
@@ -1662,24 +1739,43 @@ ecryptfs_generate_key_packet_set(char *dest_base,
1662 size_t max) 1739 size_t max)
1663{ 1740{
1664 struct ecryptfs_auth_tok *auth_tok; 1741 struct ecryptfs_auth_tok *auth_tok;
1742 struct ecryptfs_global_auth_tok *global_auth_tok;
1665 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = 1743 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
1666 &ecryptfs_superblock_to_private( 1744 &ecryptfs_superblock_to_private(
1667 ecryptfs_dentry->d_sb)->mount_crypt_stat; 1745 ecryptfs_dentry->d_sb)->mount_crypt_stat;
1668 size_t written; 1746 size_t written;
1669 struct ecryptfs_key_record *key_rec; 1747 struct ecryptfs_key_record *key_rec;
1748 struct ecryptfs_key_sig *key_sig;
1670 int rc = 0; 1749 int rc = 0;
1671 1750
1672 (*len) = 0; 1751 (*len) = 0;
1752 mutex_lock(&crypt_stat->keysig_list_mutex);
1673 key_rec = kmem_cache_alloc(ecryptfs_key_record_cache, GFP_KERNEL); 1753 key_rec = kmem_cache_alloc(ecryptfs_key_record_cache, GFP_KERNEL);
1674 if (!key_rec) { 1754 if (!key_rec) {
1675 rc = -ENOMEM; 1755 rc = -ENOMEM;
1676 goto out; 1756 goto out;
1677 } 1757 }
1678 if (mount_crypt_stat->global_auth_tok) { 1758 list_for_each_entry(key_sig, &crypt_stat->keysig_list,
1679 auth_tok = mount_crypt_stat->global_auth_tok; 1759 crypt_stat_list) {
1760 memset(key_rec, 0, sizeof(*key_rec));
1761 rc = ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok,
1762 mount_crypt_stat,
1763 key_sig->keysig);
1764 if (rc) {
1765 printk(KERN_ERR "Error attempting to get the global "
1766 "auth_tok; rc = [%d]\n", rc);
1767 goto out_free;
1768 }
1769 if (global_auth_tok->flags & ECRYPTFS_AUTH_TOK_INVALID) {
1770 printk(KERN_WARNING
1771 "Skipping invalid auth tok with sig = [%s]\n",
1772 global_auth_tok->sig);
1773 continue;
1774 }
1775 auth_tok = global_auth_tok->global_auth_tok;
1680 if (auth_tok->token_type == ECRYPTFS_PASSWORD) { 1776 if (auth_tok->token_type == ECRYPTFS_PASSWORD) {
1681 rc = write_tag_3_packet((dest_base + (*len)), 1777 rc = write_tag_3_packet((dest_base + (*len)),
1682 max, auth_tok, 1778 &max, auth_tok,
1683 crypt_stat, key_rec, 1779 crypt_stat, key_rec,
1684 &written); 1780 &written);
1685 if (rc) { 1781 if (rc) {
@@ -1689,10 +1785,9 @@ ecryptfs_generate_key_packet_set(char *dest_base,
1689 } 1785 }
1690 (*len) += written; 1786 (*len) += written;
1691 /* Write auth tok signature packet */ 1787 /* Write auth tok signature packet */
1692 rc = write_tag_11_packet( 1788 rc = write_tag_11_packet((dest_base + (*len)), &max,
1693 (dest_base + (*len)), 1789 key_rec->sig,
1694 (max - (*len)), 1790 ECRYPTFS_SIG_SIZE, &written);
1695 key_rec->sig, ECRYPTFS_SIG_SIZE, &written);
1696 if (rc) { 1791 if (rc) {
1697 ecryptfs_printk(KERN_ERR, "Error writing " 1792 ecryptfs_printk(KERN_ERR, "Error writing "
1698 "auth tok signature packet\n"); 1793 "auth tok signature packet\n");
@@ -1701,9 +1796,8 @@ ecryptfs_generate_key_packet_set(char *dest_base,
1701 (*len) += written; 1796 (*len) += written;
1702 } else if (auth_tok->token_type == ECRYPTFS_PRIVATE_KEY) { 1797 } else if (auth_tok->token_type == ECRYPTFS_PRIVATE_KEY) {
1703 rc = write_tag_1_packet(dest_base + (*len), 1798 rc = write_tag_1_packet(dest_base + (*len),
1704 max, auth_tok, 1799 &max, auth_tok,
1705 crypt_stat,mount_crypt_stat, 1800 crypt_stat, key_rec, &written);
1706 key_rec, &written);
1707 if (rc) { 1801 if (rc) {
1708 ecryptfs_printk(KERN_WARNING, "Error " 1802 ecryptfs_printk(KERN_WARNING, "Error "
1709 "writing tag 1 packet\n"); 1803 "writing tag 1 packet\n");
@@ -1716,19 +1810,69 @@ ecryptfs_generate_key_packet_set(char *dest_base,
1716 rc = -EINVAL; 1810 rc = -EINVAL;
1717 goto out_free; 1811 goto out_free;
1718 } 1812 }
1719 } else 1813 }
1720 BUG(); 1814 if (likely(max > 0)) {
1721 if (likely((max - (*len)) > 0)) {
1722 dest_base[(*len)] = 0x00; 1815 dest_base[(*len)] = 0x00;
1723 } else { 1816 } else {
1724 ecryptfs_printk(KERN_ERR, "Error writing boundary byte\n"); 1817 ecryptfs_printk(KERN_ERR, "Error writing boundary byte\n");
1725 rc = -EIO; 1818 rc = -EIO;
1726 } 1819 }
1727
1728out_free: 1820out_free:
1729 kmem_cache_free(ecryptfs_key_record_cache, key_rec); 1821 kmem_cache_free(ecryptfs_key_record_cache, key_rec);
1730out: 1822out:
1731 if (rc) 1823 if (rc)
1732 (*len) = 0; 1824 (*len) = 0;
1825 mutex_unlock(&crypt_stat->keysig_list_mutex);
1733 return rc; 1826 return rc;
1734} 1827}
1828
1829struct kmem_cache *ecryptfs_key_sig_cache;
1830
1831int ecryptfs_add_keysig(struct ecryptfs_crypt_stat *crypt_stat, char *sig)
1832{
1833 struct ecryptfs_key_sig *new_key_sig;
1834 int rc = 0;
1835
1836 new_key_sig = kmem_cache_alloc(ecryptfs_key_sig_cache, GFP_KERNEL);
1837 if (!new_key_sig) {
1838 rc = -ENOMEM;
1839 printk(KERN_ERR
1840 "Error allocating from ecryptfs_key_sig_cache\n");
1841 goto out;
1842 }
1843 memcpy(new_key_sig->keysig, sig, ECRYPTFS_SIG_SIZE_HEX);
1844 mutex_lock(&crypt_stat->keysig_list_mutex);
1845 list_add(&new_key_sig->crypt_stat_list, &crypt_stat->keysig_list);
1846 mutex_unlock(&crypt_stat->keysig_list_mutex);
1847out:
1848 return rc;
1849}
1850
1851struct kmem_cache *ecryptfs_global_auth_tok_cache;
1852
1853int
1854ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
1855 char *sig)
1856{
1857 struct ecryptfs_global_auth_tok *new_auth_tok;
1858 int rc = 0;
1859
1860 new_auth_tok = kmem_cache_alloc(ecryptfs_global_auth_tok_cache,
1861 GFP_KERNEL);
1862 if (!new_auth_tok) {
1863 rc = -ENOMEM;
1864 printk(KERN_ERR "Error allocating from "
1865 "ecryptfs_global_auth_tok_cache\n");
1866 goto out;
1867 }
1868 memcpy(new_auth_tok->sig, sig, ECRYPTFS_SIG_SIZE_HEX);
1869 new_auth_tok->sig[ECRYPTFS_SIG_SIZE_HEX] = '\0';
1870 mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
1871 list_add(&new_auth_tok->mount_crypt_stat_list,
1872 &mount_crypt_stat->global_auth_tok_list);
1873 mount_crypt_stat->num_global_auth_toks++;
1874 mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
1875out:
1876 return rc;
1877}
1878
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index a98497264fe8..97e6801f722c 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -99,6 +99,64 @@ void __ecryptfs_printk(const char *fmt, ...)
99} 99}
100 100
101/** 101/**
102 * ecryptfs_init_persistent_file
103 * @ecryptfs_dentry: Fully initialized eCryptfs dentry object, with
104 * the lower dentry and the lower mount set
105 *
106 * eCryptfs only ever keeps a single open file for every lower
107 * inode. All I/O operations to the lower inode occur through that
108 * file. When the first eCryptfs dentry that interposes with the first
109 * lower dentry for that inode is created, this function creates the
110 * persistent file struct and associates it with the eCryptfs
111 * inode. When the eCryptfs inode is destroyed, the file is closed.
112 *
113 * The persistent file will be opened with read/write permissions, if
114 * possible. Otherwise, it is opened read-only.
115 *
116 * This function does nothing if a lower persistent file is already
117 * associated with the eCryptfs inode.
118 *
119 * Returns zero on success; non-zero otherwise
120 */
121int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
122{
123 struct ecryptfs_inode_info *inode_info =
124 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
125 int rc = 0;
126
127 mutex_lock(&inode_info->lower_file_mutex);
128 if (!inode_info->lower_file) {
129 struct dentry *lower_dentry;
130 struct vfsmount *lower_mnt =
131 ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
132
133 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
134 /* Corresponding dput() and mntput() are done when the
135 * persistent file is fput() when the eCryptfs inode
136 * is destroyed. */
137 dget(lower_dentry);
138 mntget(lower_mnt);
139 inode_info->lower_file = dentry_open(lower_dentry,
140 lower_mnt,
141 (O_RDWR | O_LARGEFILE));
142 if (IS_ERR(inode_info->lower_file))
143 inode_info->lower_file = dentry_open(lower_dentry,
144 lower_mnt,
145 (O_RDONLY
146 | O_LARGEFILE));
147 if (IS_ERR(inode_info->lower_file)) {
148 printk(KERN_ERR "Error opening lower persistent file "
149 "for lower_dentry [0x%p] and lower_mnt [0x%p]\n",
150 lower_dentry, lower_mnt);
151 rc = PTR_ERR(inode_info->lower_file);
152 inode_info->lower_file = NULL;
153 }
154 }
155 mutex_unlock(&inode_info->lower_file_mutex);
156 return rc;
157}
158
159/**
102 * ecryptfs_interpose 160 * ecryptfs_interpose
103 * @lower_dentry: Existing dentry in the lower filesystem 161 * @lower_dentry: Existing dentry in the lower filesystem
104 * @dentry: ecryptfs' dentry 162 * @dentry: ecryptfs' dentry
@@ -155,6 +213,13 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
155 /* This size will be overwritten for real files w/ headers and 213 /* This size will be overwritten for real files w/ headers and
156 * other metadata */ 214 * other metadata */
157 fsstack_copy_inode_size(inode, lower_inode); 215 fsstack_copy_inode_size(inode, lower_inode);
216 rc = ecryptfs_init_persistent_file(dentry);
217 if (rc) {
218 printk(KERN_ERR "%s: Error attempting to initialize the "
219 "persistent file for the dentry with name [%s]; "
220 "rc = [%d]\n", __FUNCTION__, dentry->d_name.name, rc);
221 goto out;
222 }
158out: 223out:
159 return rc; 224 return rc;
160} 225}
@@ -179,38 +244,41 @@ static match_table_t tokens = {
179 {ecryptfs_opt_err, NULL} 244 {ecryptfs_opt_err, NULL}
180}; 245};
181 246
182/** 247static int ecryptfs_init_global_auth_toks(
183 * ecryptfs_verify_version 248 struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
184 * @version: The version number to confirm
185 *
186 * Returns zero on good version; non-zero otherwise
187 */
188static int ecryptfs_verify_version(u16 version)
189{ 249{
250 struct ecryptfs_global_auth_tok *global_auth_tok;
190 int rc = 0; 251 int rc = 0;
191 unsigned char major; 252
192 unsigned char minor; 253 list_for_each_entry(global_auth_tok,
193 254 &mount_crypt_stat->global_auth_tok_list,
194 major = ((version >> 8) & 0xFF); 255 mount_crypt_stat_list) {
195 minor = (version & 0xFF); 256 rc = ecryptfs_keyring_auth_tok_for_sig(
196 if (major != ECRYPTFS_VERSION_MAJOR) { 257 &global_auth_tok->global_auth_tok_key,
197 ecryptfs_printk(KERN_ERR, "Major version number mismatch. " 258 &global_auth_tok->global_auth_tok,
198 "Expected [%d]; got [%d]\n", 259 global_auth_tok->sig);
199 ECRYPTFS_VERSION_MAJOR, major); 260 if (rc) {
200 rc = -EINVAL; 261 printk(KERN_ERR "Could not find valid key in user "
201 goto out; 262 "session keyring for sig specified in mount "
202 } 263 "option: [%s]\n", global_auth_tok->sig);
203 if (minor != ECRYPTFS_VERSION_MINOR) { 264 global_auth_tok->flags |= ECRYPTFS_AUTH_TOK_INVALID;
204 ecryptfs_printk(KERN_ERR, "Minor version number mismatch. " 265 rc = 0;
205 "Expected [%d]; got [%d]\n", 266 } else
206 ECRYPTFS_VERSION_MINOR, minor); 267 global_auth_tok->flags &= ~ECRYPTFS_AUTH_TOK_INVALID;
207 rc = -EINVAL;
208 goto out;
209 } 268 }
210out:
211 return rc; 269 return rc;
212} 270}
213 271
272static void ecryptfs_init_mount_crypt_stat(
273 struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
274{
275 memset((void *)mount_crypt_stat, 0,
276 sizeof(struct ecryptfs_mount_crypt_stat));
277 INIT_LIST_HEAD(&mount_crypt_stat->global_auth_tok_list);
278 mutex_init(&mount_crypt_stat->global_auth_tok_list_mutex);
279 mount_crypt_stat->flags |= ECRYPTFS_MOUNT_CRYPT_STAT_INITIALIZED;
280}
281
214/** 282/**
215 * ecryptfs_parse_options 283 * ecryptfs_parse_options
216 * @sb: The ecryptfs super block 284 * @sb: The ecryptfs super block
@@ -238,14 +306,11 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
238 int cipher_name_set = 0; 306 int cipher_name_set = 0;
239 int cipher_key_bytes; 307 int cipher_key_bytes;
240 int cipher_key_bytes_set = 0; 308 int cipher_key_bytes_set = 0;
241 struct key *auth_tok_key = NULL;
242 struct ecryptfs_auth_tok *auth_tok = NULL;
243 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = 309 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
244 &ecryptfs_superblock_to_private(sb)->mount_crypt_stat; 310 &ecryptfs_superblock_to_private(sb)->mount_crypt_stat;
245 substring_t args[MAX_OPT_ARGS]; 311 substring_t args[MAX_OPT_ARGS];
246 int token; 312 int token;
247 char *sig_src; 313 char *sig_src;
248 char *sig_dst;
249 char *debug_src; 314 char *debug_src;
250 char *cipher_name_dst; 315 char *cipher_name_dst;
251 char *cipher_name_src; 316 char *cipher_name_src;
@@ -256,6 +321,7 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
256 rc = -EINVAL; 321 rc = -EINVAL;
257 goto out; 322 goto out;
258 } 323 }
324 ecryptfs_init_mount_crypt_stat(mount_crypt_stat);
259 while ((p = strsep(&options, ",")) != NULL) { 325 while ((p = strsep(&options, ",")) != NULL) {
260 if (!*p) 326 if (!*p)
261 continue; 327 continue;
@@ -264,14 +330,13 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
264 case ecryptfs_opt_sig: 330 case ecryptfs_opt_sig:
265 case ecryptfs_opt_ecryptfs_sig: 331 case ecryptfs_opt_ecryptfs_sig:
266 sig_src = args[0].from; 332 sig_src = args[0].from;
267 sig_dst = 333 rc = ecryptfs_add_global_auth_tok(mount_crypt_stat,
268 mount_crypt_stat->global_auth_tok_sig; 334 sig_src);
269 memcpy(sig_dst, sig_src, ECRYPTFS_SIG_SIZE_HEX); 335 if (rc) {
270 sig_dst[ECRYPTFS_SIG_SIZE_HEX] = '\0'; 336 printk(KERN_ERR "Error attempting to register "
271 ecryptfs_printk(KERN_DEBUG, 337 "global sig; rc = [%d]\n", rc);
272 "The mount_crypt_stat " 338 goto out;
273 "global_auth_tok_sig set to: " 339 }
274 "[%s]\n", sig_dst);
275 sig_set = 1; 340 sig_set = 1;
276 break; 341 break;
277 case ecryptfs_opt_debug: 342 case ecryptfs_opt_debug:
@@ -333,12 +398,10 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
333 p); 398 p);
334 } 399 }
335 } 400 }
336 /* Do not support lack of mount-wide signature in 0.1
337 * release */
338 if (!sig_set) { 401 if (!sig_set) {
339 rc = -EINVAL; 402 rc = -EINVAL;
340 ecryptfs_printk(KERN_ERR, "You must supply a valid " 403 ecryptfs_printk(KERN_ERR, "You must supply at least one valid "
341 "passphrase auth tok signature as a mount " 404 "auth tok signature as a mount "
342 "parameter; see the eCryptfs README\n"); 405 "parameter; see the eCryptfs README\n");
343 goto out; 406 goto out;
344 } 407 }
@@ -358,55 +421,23 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
358 if (!cipher_key_bytes_set) { 421 if (!cipher_key_bytes_set) {
359 mount_crypt_stat->global_default_cipher_key_size = 0; 422 mount_crypt_stat->global_default_cipher_key_size = 0;
360 } 423 }
361 rc = ecryptfs_process_cipher( 424 rc = ecryptfs_add_new_key_tfm(
362 &mount_crypt_stat->global_key_tfm, 425 NULL, mount_crypt_stat->global_default_cipher_name,
363 mount_crypt_stat->global_default_cipher_name, 426 mount_crypt_stat->global_default_cipher_key_size);
364 &mount_crypt_stat->global_default_cipher_key_size);
365 if (rc) { 427 if (rc) {
366 printk(KERN_ERR "Error attempting to initialize cipher [%s] " 428 printk(KERN_ERR "Error attempting to initialize cipher with "
367 "with key size [%Zd] bytes; rc = [%d]\n", 429 "name = [%s] and key size = [%td]; rc = [%d]\n",
368 mount_crypt_stat->global_default_cipher_name, 430 mount_crypt_stat->global_default_cipher_name,
369 mount_crypt_stat->global_default_cipher_key_size, rc); 431 mount_crypt_stat->global_default_cipher_key_size, rc);
370 mount_crypt_stat->global_key_tfm = NULL;
371 mount_crypt_stat->global_auth_tok_key = NULL;
372 rc = -EINVAL; 432 rc = -EINVAL;
373 goto out; 433 goto out;
374 } 434 }
375 mutex_init(&mount_crypt_stat->global_key_tfm_mutex); 435 rc = ecryptfs_init_global_auth_toks(mount_crypt_stat);
376 ecryptfs_printk(KERN_DEBUG, "Requesting the key with description: " 436 if (rc) {
377 "[%s]\n", mount_crypt_stat->global_auth_tok_sig); 437 printk(KERN_WARNING "One or more global auth toks could not "
378 /* The reference to this key is held until umount is done The 438 "properly register; rc = [%d]\n", rc);
379 * call to key_put is done in ecryptfs_put_super() */
380 auth_tok_key = request_key(&key_type_user,
381 mount_crypt_stat->global_auth_tok_sig,
382 NULL);
383 if (!auth_tok_key || IS_ERR(auth_tok_key)) {
384 ecryptfs_printk(KERN_ERR, "Could not find key with "
385 "description: [%s]\n",
386 mount_crypt_stat->global_auth_tok_sig);
387 process_request_key_err(PTR_ERR(auth_tok_key));
388 rc = -EINVAL;
389 goto out;
390 }
391 auth_tok = ecryptfs_get_key_payload_data(auth_tok_key);
392 if (ecryptfs_verify_version(auth_tok->version)) {
393 ecryptfs_printk(KERN_ERR, "Data structure version mismatch. "
394 "Userspace tools must match eCryptfs kernel "
395 "module with major version [%d] and minor "
396 "version [%d]\n", ECRYPTFS_VERSION_MAJOR,
397 ECRYPTFS_VERSION_MINOR);
398 rc = -EINVAL;
399 goto out;
400 }
401 if (auth_tok->token_type != ECRYPTFS_PASSWORD
402 && auth_tok->token_type != ECRYPTFS_PRIVATE_KEY) {
403 ecryptfs_printk(KERN_ERR, "Invalid auth_tok structure "
404 "returned from key query\n");
405 rc = -EINVAL;
406 goto out;
407 } 439 }
408 mount_crypt_stat->global_auth_tok_key = auth_tok_key; 440 rc = 0;
409 mount_crypt_stat->global_auth_tok = auth_tok;
410out: 441out:
411 return rc; 442 return rc;
412} 443}
@@ -495,7 +526,8 @@ static int ecryptfs_read_super(struct super_block *sb, const char *dev_name)
495 sb->s_maxbytes = lower_root->d_sb->s_maxbytes; 526 sb->s_maxbytes = lower_root->d_sb->s_maxbytes;
496 ecryptfs_set_dentry_lower(sb->s_root, lower_root); 527 ecryptfs_set_dentry_lower(sb->s_root, lower_root);
497 ecryptfs_set_dentry_lower_mnt(sb->s_root, lower_mnt); 528 ecryptfs_set_dentry_lower_mnt(sb->s_root, lower_mnt);
498 if ((rc = ecryptfs_interpose(lower_root, sb->s_root, sb, 0))) 529 rc = ecryptfs_interpose(lower_root, sb->s_root, sb, 0);
530 if (rc)
499 goto out_free; 531 goto out_free;
500 rc = 0; 532 rc = 0;
501 goto out; 533 goto out;
@@ -639,15 +671,25 @@ static struct ecryptfs_cache_info {
639 .size = PAGE_CACHE_SIZE, 671 .size = PAGE_CACHE_SIZE,
640 }, 672 },
641 { 673 {
642 .cache = &ecryptfs_lower_page_cache,
643 .name = "ecryptfs_lower_page_cache",
644 .size = PAGE_CACHE_SIZE,
645 },
646 {
647 .cache = &ecryptfs_key_record_cache, 674 .cache = &ecryptfs_key_record_cache,
648 .name = "ecryptfs_key_record_cache", 675 .name = "ecryptfs_key_record_cache",
649 .size = sizeof(struct ecryptfs_key_record), 676 .size = sizeof(struct ecryptfs_key_record),
650 }, 677 },
678 {
679 .cache = &ecryptfs_key_sig_cache,
680 .name = "ecryptfs_key_sig_cache",
681 .size = sizeof(struct ecryptfs_key_sig),
682 },
683 {
684 .cache = &ecryptfs_global_auth_tok_cache,
685 .name = "ecryptfs_global_auth_tok_cache",
686 .size = sizeof(struct ecryptfs_global_auth_tok),
687 },
688 {
689 .cache = &ecryptfs_key_tfm_cache,
690 .name = "ecryptfs_key_tfm_cache",
691 .size = sizeof(struct ecryptfs_key_tfm),
692 },
651}; 693};
652 694
653static void ecryptfs_free_kmem_caches(void) 695static void ecryptfs_free_kmem_caches(void)
@@ -750,7 +792,8 @@ static struct ecryptfs_version_str_map_elem {
750 {ECRYPTFS_VERSIONING_PUBKEY, "pubkey"}, 792 {ECRYPTFS_VERSIONING_PUBKEY, "pubkey"},
751 {ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH, "plaintext passthrough"}, 793 {ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH, "plaintext passthrough"},
752 {ECRYPTFS_VERSIONING_POLICY, "policy"}, 794 {ECRYPTFS_VERSIONING_POLICY, "policy"},
753 {ECRYPTFS_VERSIONING_XATTR, "metadata in extended attribute"} 795 {ECRYPTFS_VERSIONING_XATTR, "metadata in extended attribute"},
796 {ECRYPTFS_VERSIONING_MULTKEY, "multiple keys per file"}
754}; 797};
755 798
756static ssize_t version_str_show(struct ecryptfs_obj *obj, char *buff) 799static ssize_t version_str_show(struct ecryptfs_obj *obj, char *buff)
@@ -786,7 +829,8 @@ static int do_sysfs_registration(void)
786{ 829{
787 int rc; 830 int rc;
788 831
789 if ((rc = subsystem_register(&ecryptfs_subsys))) { 832 rc = subsystem_register(&ecryptfs_subsys);
833 if (rc) {
790 printk(KERN_ERR 834 printk(KERN_ERR
791 "Unable to register ecryptfs sysfs subsystem\n"); 835 "Unable to register ecryptfs sysfs subsystem\n");
792 goto out; 836 goto out;
@@ -845,33 +889,49 @@ static int __init ecryptfs_init(void)
845 rc = register_filesystem(&ecryptfs_fs_type); 889 rc = register_filesystem(&ecryptfs_fs_type);
846 if (rc) { 890 if (rc) {
847 printk(KERN_ERR "Failed to register filesystem\n"); 891 printk(KERN_ERR "Failed to register filesystem\n");
848 ecryptfs_free_kmem_caches(); 892 goto out_free_kmem_caches;
849 goto out;
850 } 893 }
851 kobj_set_kset_s(&ecryptfs_subsys, fs_subsys); 894 kobj_set_kset_s(&ecryptfs_subsys, fs_subsys);
852 rc = do_sysfs_registration(); 895 rc = do_sysfs_registration();
853 if (rc) { 896 if (rc) {
854 printk(KERN_ERR "sysfs registration failed\n"); 897 printk(KERN_ERR "sysfs registration failed\n");
855 unregister_filesystem(&ecryptfs_fs_type); 898 goto out_unregister_filesystem;
856 ecryptfs_free_kmem_caches();
857 goto out;
858 } 899 }
859 rc = ecryptfs_init_messaging(ecryptfs_transport); 900 rc = ecryptfs_init_messaging(ecryptfs_transport);
860 if (rc) { 901 if (rc) {
861 ecryptfs_printk(KERN_ERR, "Failure occured while attempting to " 902 ecryptfs_printk(KERN_ERR, "Failure occured while attempting to "
862 "initialize the eCryptfs netlink socket\n"); 903 "initialize the eCryptfs netlink socket\n");
863 do_sysfs_unregistration(); 904 goto out_do_sysfs_unregistration;
864 unregister_filesystem(&ecryptfs_fs_type); 905 }
865 ecryptfs_free_kmem_caches(); 906 rc = ecryptfs_init_crypto();
907 if (rc) {
908 printk(KERN_ERR "Failure whilst attempting to init crypto; "
909 "rc = [%d]\n", rc);
910 goto out_release_messaging;
866 } 911 }
912 goto out;
913out_release_messaging:
914 ecryptfs_release_messaging(ecryptfs_transport);
915out_do_sysfs_unregistration:
916 do_sysfs_unregistration();
917out_unregister_filesystem:
918 unregister_filesystem(&ecryptfs_fs_type);
919out_free_kmem_caches:
920 ecryptfs_free_kmem_caches();
867out: 921out:
868 return rc; 922 return rc;
869} 923}
870 924
871static void __exit ecryptfs_exit(void) 925static void __exit ecryptfs_exit(void)
872{ 926{
873 do_sysfs_unregistration(); 927 int rc;
928
929 rc = ecryptfs_destroy_crypto();
930 if (rc)
931 printk(KERN_ERR "Failure whilst attempting to destroy crypto; "
932 "rc = [%d]\n", rc);
874 ecryptfs_release_messaging(ecryptfs_transport); 933 ecryptfs_release_messaging(ecryptfs_transport);
934 do_sysfs_unregistration();
875 unregister_filesystem(&ecryptfs_fs_type); 935 unregister_filesystem(&ecryptfs_fs_type);
876 ecryptfs_free_kmem_caches(); 936 ecryptfs_free_kmem_caches();
877} 937}
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index a9d87c47f72d..a96d341d154d 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -419,8 +419,9 @@ int ecryptfs_init_messaging(unsigned int transport)
419 } 419 }
420 mutex_init(&ecryptfs_daemon_id_hash_mux); 420 mutex_init(&ecryptfs_daemon_id_hash_mux);
421 mutex_lock(&ecryptfs_daemon_id_hash_mux); 421 mutex_lock(&ecryptfs_daemon_id_hash_mux);
422 ecryptfs_hash_buckets = 0; 422 ecryptfs_hash_buckets = 1;
423 while (ecryptfs_number_of_users >> ++ecryptfs_hash_buckets); 423 while (ecryptfs_number_of_users >> ecryptfs_hash_buckets)
424 ecryptfs_hash_buckets++;
424 ecryptfs_daemon_id_hash = kmalloc(sizeof(struct hlist_head) 425 ecryptfs_daemon_id_hash = kmalloc(sizeof(struct hlist_head)
425 * ecryptfs_hash_buckets, GFP_KERNEL); 426 * ecryptfs_hash_buckets, GFP_KERNEL);
426 if (!ecryptfs_daemon_id_hash) { 427 if (!ecryptfs_daemon_id_hash) {
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index fd3f94d4a668..16a7a555f392 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -37,130 +37,27 @@
37struct kmem_cache *ecryptfs_lower_page_cache; 37struct kmem_cache *ecryptfs_lower_page_cache;
38 38
39/** 39/**
40 * ecryptfs_get1page 40 * ecryptfs_get_locked_page
41 * 41 *
42 * Get one page from cache or lower f/s, return error otherwise. 42 * Get one page from cache or lower f/s, return error otherwise.
43 * 43 *
44 * Returns unlocked and up-to-date page (if ok), with increased 44 * Returns locked and up-to-date page (if ok), with increased
45 * refcnt. 45 * refcnt.
46 */ 46 */
47static struct page *ecryptfs_get1page(struct file *file, int index) 47struct page *ecryptfs_get_locked_page(struct file *file, loff_t index)
48{ 48{
49 struct dentry *dentry; 49 struct dentry *dentry;
50 struct inode *inode; 50 struct inode *inode;
51 struct address_space *mapping; 51 struct address_space *mapping;
52 struct page *page;
52 53
53 dentry = file->f_path.dentry; 54 dentry = file->f_path.dentry;
54 inode = dentry->d_inode; 55 inode = dentry->d_inode;
55 mapping = inode->i_mapping; 56 mapping = inode->i_mapping;
56 return read_mapping_page(mapping, index, (void *)file); 57 page = read_mapping_page(mapping, index, (void *)file);
57} 58 if (!IS_ERR(page))
58 59 lock_page(page);
59/** 60 return page;
60 * ecryptfs_fill_zeros
61 * @file: The ecryptfs file
62 * @new_length: The new length of the data in the underlying file;
63 * everything between the prior end of the file and the
64 * new end of the file will be filled with zero's.
65 * new_length must be greater than current length
66 *
67 * Function for handling lseek-ing past the end of the file.
68 *
69 * This function does not support shrinking, only growing a file.
70 *
71 * Returns zero on success; non-zero otherwise.
72 */
73int ecryptfs_fill_zeros(struct file *file, loff_t new_length)
74{
75 int rc = 0;
76 struct dentry *dentry = file->f_path.dentry;
77 struct inode *inode = dentry->d_inode;
78 pgoff_t old_end_page_index = 0;
79 pgoff_t index = old_end_page_index;
80 int old_end_pos_in_page = -1;
81 pgoff_t new_end_page_index;
82 int new_end_pos_in_page;
83 loff_t cur_length = i_size_read(inode);
84
85 if (cur_length != 0) {
86 index = old_end_page_index =
87 ((cur_length - 1) >> PAGE_CACHE_SHIFT);
88 old_end_pos_in_page = ((cur_length - 1) & ~PAGE_CACHE_MASK);
89 }
90 new_end_page_index = ((new_length - 1) >> PAGE_CACHE_SHIFT);
91 new_end_pos_in_page = ((new_length - 1) & ~PAGE_CACHE_MASK);
92 ecryptfs_printk(KERN_DEBUG, "old_end_page_index = [0x%.16x]; "
93 "old_end_pos_in_page = [%d]; "
94 "new_end_page_index = [0x%.16x]; "
95 "new_end_pos_in_page = [%d]\n",
96 old_end_page_index, old_end_pos_in_page,
97 new_end_page_index, new_end_pos_in_page);
98 if (old_end_page_index == new_end_page_index) {
99 /* Start and end are in the same page; we just need to
100 * set a portion of the existing page to zero's */
101 rc = ecryptfs_write_zeros(file, index,
102 (old_end_pos_in_page + 1),
103 (new_end_pos_in_page
104 - old_end_pos_in_page));
105 if (rc)
106 ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros("
107 "file=[%p], "
108 "index=[0x%.16x], "
109 "old_end_pos_in_page=[d], "
110 "(PAGE_CACHE_SIZE - new_end_pos_in_page"
111 "=[%d]"
112 ")=[d]) returned [%d]\n", file, index,
113 old_end_pos_in_page,
114 new_end_pos_in_page,
115 (PAGE_CACHE_SIZE - new_end_pos_in_page),
116 rc);
117 goto out;
118 }
119 /* Fill the remainder of the previous last page with zeros */
120 rc = ecryptfs_write_zeros(file, index, (old_end_pos_in_page + 1),
121 ((PAGE_CACHE_SIZE - 1) - old_end_pos_in_page));
122 if (rc) {
123 ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros(file=[%p], "
124 "index=[0x%.16x], old_end_pos_in_page=[d], "
125 "(PAGE_CACHE_SIZE - old_end_pos_in_page)=[d]) "
126 "returned [%d]\n", file, index,
127 old_end_pos_in_page,
128 (PAGE_CACHE_SIZE - old_end_pos_in_page), rc);
129 goto out;
130 }
131 index++;
132 while (index < new_end_page_index) {
133 /* Fill all intermediate pages with zeros */
134 rc = ecryptfs_write_zeros(file, index, 0, PAGE_CACHE_SIZE);
135 if (rc) {
136 ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros("
137 "file=[%p], "
138 "index=[0x%.16x], "
139 "old_end_pos_in_page=[d], "
140 "(PAGE_CACHE_SIZE - new_end_pos_in_page"
141 "=[%d]"
142 ")=[d]) returned [%d]\n", file, index,
143 old_end_pos_in_page,
144 new_end_pos_in_page,
145 (PAGE_CACHE_SIZE - new_end_pos_in_page),
146 rc);
147 goto out;
148 }
149 index++;
150 }
151 /* Fill the portion at the beginning of the last new page with
152 * zero's */
153 rc = ecryptfs_write_zeros(file, index, 0, (new_end_pos_in_page + 1));
154 if (rc) {
155 ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros(file="
156 "[%p], index=[0x%.16x], 0, "
157 "new_end_pos_in_page=[%d]"
158 "returned [%d]\n", file, index,
159 new_end_pos_in_page, rc);
160 goto out;
161 }
162out:
163 return rc;
164} 61}
165 62
166/** 63/**
@@ -171,13 +68,9 @@ out:
171 */ 68 */
172static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc) 69static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc)
173{ 70{
174 struct ecryptfs_page_crypt_context ctx;
175 int rc; 71 int rc;
176 72
177 ctx.page = page; 73 rc = ecryptfs_encrypt_page(page);
178 ctx.mode = ECRYPTFS_WRITEPAGE_MODE;
179 ctx.param.wbc = wbc;
180 rc = ecryptfs_encrypt_page(&ctx);
181 if (rc) { 74 if (rc) {
182 ecryptfs_printk(KERN_WARNING, "Error encrypting " 75 ecryptfs_printk(KERN_WARNING, "Error encrypting "
183 "page (upper index [0x%.16x])\n", page->index); 76 "page (upper index [0x%.16x])\n", page->index);
@@ -191,58 +84,6 @@ out:
191} 84}
192 85
193/** 86/**
194 * Reads the data from the lower file file at index lower_page_index
195 * and copies that data into page.
196 *
197 * @param page Page to fill
198 * @param lower_page_index Index of the page in the lower file to get
199 */
200int ecryptfs_do_readpage(struct file *file, struct page *page,
201 pgoff_t lower_page_index)
202{
203 int rc;
204 struct dentry *dentry;
205 struct file *lower_file;
206 struct dentry *lower_dentry;
207 struct inode *inode;
208 struct inode *lower_inode;
209 char *page_data;
210 struct page *lower_page = NULL;
211 char *lower_page_data;
212 const struct address_space_operations *lower_a_ops;
213
214 dentry = file->f_path.dentry;
215 lower_file = ecryptfs_file_to_lower(file);
216 lower_dentry = ecryptfs_dentry_to_lower(dentry);
217 inode = dentry->d_inode;
218 lower_inode = ecryptfs_inode_to_lower(inode);
219 lower_a_ops = lower_inode->i_mapping->a_ops;
220 lower_page = read_cache_page(lower_inode->i_mapping, lower_page_index,
221 (filler_t *)lower_a_ops->readpage,
222 (void *)lower_file);
223 if (IS_ERR(lower_page)) {
224 rc = PTR_ERR(lower_page);
225 lower_page = NULL;
226 ecryptfs_printk(KERN_ERR, "Error reading from page cache\n");
227 goto out;
228 }
229 page_data = kmap_atomic(page, KM_USER0);
230 lower_page_data = kmap_atomic(lower_page, KM_USER1);
231 memcpy(page_data, lower_page_data, PAGE_CACHE_SIZE);
232 kunmap_atomic(lower_page_data, KM_USER1);
233 kunmap_atomic(page_data, KM_USER0);
234 flush_dcache_page(page);
235 rc = 0;
236out:
237 if (likely(lower_page))
238 page_cache_release(lower_page);
239 if (rc == 0)
240 SetPageUptodate(page);
241 else
242 ClearPageUptodate(page);
243 return rc;
244}
245/**
246 * Header Extent: 87 * Header Extent:
247 * Octets 0-7: Unencrypted file size (big-endian) 88 * Octets 0-7: Unencrypted file size (big-endian)
248 * Octets 8-15: eCryptfs special marker 89 * Octets 8-15: eCryptfs special marker
@@ -271,9 +112,77 @@ static void set_header_info(char *page_virt,
271} 112}
272 113
273/** 114/**
115 * ecryptfs_copy_up_encrypted_with_header
116 * @page: Sort of a ``virtual'' representation of the encrypted lower
117 * file. The actual lower file does not have the metadata in
118 * the header. This is locked.
119 * @crypt_stat: The eCryptfs inode's cryptographic context
120 *
121 * The ``view'' is the version of the file that userspace winds up
122 * seeing, with the header information inserted.
123 */
124static int
125ecryptfs_copy_up_encrypted_with_header(struct page *page,
126 struct ecryptfs_crypt_stat *crypt_stat)
127{
128 loff_t extent_num_in_page = 0;
129 loff_t num_extents_per_page = (PAGE_CACHE_SIZE
130 / crypt_stat->extent_size);
131 int rc = 0;
132
133 while (extent_num_in_page < num_extents_per_page) {
134 loff_t view_extent_num = ((((loff_t)page->index)
135 * num_extents_per_page)
136 + extent_num_in_page);
137
138 if (view_extent_num < crypt_stat->num_header_extents_at_front) {
139 /* This is a header extent */
140 char *page_virt;
141
142 page_virt = kmap_atomic(page, KM_USER0);
143 memset(page_virt, 0, PAGE_CACHE_SIZE);
144 /* TODO: Support more than one header extent */
145 if (view_extent_num == 0) {
146 rc = ecryptfs_read_xattr_region(
147 page_virt, page->mapping->host);
148 set_header_info(page_virt, crypt_stat);
149 }
150 kunmap_atomic(page_virt, KM_USER0);
151 flush_dcache_page(page);
152 if (rc) {
153 printk(KERN_ERR "%s: Error reading xattr "
154 "region; rc = [%d]\n", __FUNCTION__, rc);
155 goto out;
156 }
157 } else {
158 /* This is an encrypted data extent */
159 loff_t lower_offset =
160 ((view_extent_num -
161 crypt_stat->num_header_extents_at_front)
162 * crypt_stat->extent_size);
163
164 rc = ecryptfs_read_lower_page_segment(
165 page, (lower_offset >> PAGE_CACHE_SHIFT),
166 (lower_offset & ~PAGE_CACHE_MASK),
167 crypt_stat->extent_size, page->mapping->host);
168 if (rc) {
169 printk(KERN_ERR "%s: Error attempting to read "
170 "extent at offset [%lld] in the lower "
171 "file; rc = [%d]\n", __FUNCTION__,
172 lower_offset, rc);
173 goto out;
174 }
175 }
176 extent_num_in_page++;
177 }
178out:
179 return rc;
180}
181
182/**
274 * ecryptfs_readpage 183 * ecryptfs_readpage
275 * @file: This is an ecryptfs file 184 * @file: An eCryptfs file
276 * @page: ecryptfs associated page to stick the read data into 185 * @page: Page from eCryptfs inode mapping into which to stick the read data
277 * 186 *
278 * Read in a page, decrypting if necessary. 187 * Read in a page, decrypting if necessary.
279 * 188 *
@@ -281,59 +190,35 @@ static void set_header_info(char *page_virt,
281 */ 190 */
282static int ecryptfs_readpage(struct file *file, struct page *page) 191static int ecryptfs_readpage(struct file *file, struct page *page)
283{ 192{
193 struct ecryptfs_crypt_stat *crypt_stat =
194 &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)->crypt_stat;
284 int rc = 0; 195 int rc = 0;
285 struct ecryptfs_crypt_stat *crypt_stat;
286 196
287 BUG_ON(!(file && file->f_path.dentry && file->f_path.dentry->d_inode));
288 crypt_stat = &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)
289 ->crypt_stat;
290 if (!crypt_stat 197 if (!crypt_stat
291 || !(crypt_stat->flags & ECRYPTFS_ENCRYPTED) 198 || !(crypt_stat->flags & ECRYPTFS_ENCRYPTED)
292 || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) { 199 || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) {
293 ecryptfs_printk(KERN_DEBUG, 200 ecryptfs_printk(KERN_DEBUG,
294 "Passing through unencrypted page\n"); 201 "Passing through unencrypted page\n");
295 rc = ecryptfs_do_readpage(file, page, page->index); 202 rc = ecryptfs_read_lower_page_segment(page, page->index, 0,
296 if (rc) { 203 PAGE_CACHE_SIZE,
297 ecryptfs_printk(KERN_ERR, "Error reading page; rc = " 204 page->mapping->host);
298 "[%d]\n", rc);
299 goto out;
300 }
301 } else if (crypt_stat->flags & ECRYPTFS_VIEW_AS_ENCRYPTED) { 205 } else if (crypt_stat->flags & ECRYPTFS_VIEW_AS_ENCRYPTED) {
302 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) { 206 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) {
303 int num_pages_in_header_region = 207 rc = ecryptfs_copy_up_encrypted_with_header(page,
304 (crypt_stat->header_extent_size 208 crypt_stat);
305 / PAGE_CACHE_SIZE); 209 if (rc) {
306 210 printk(KERN_ERR "%s: Error attempting to copy "
307 if (page->index < num_pages_in_header_region) { 211 "the encrypted content from the lower "
308 char *page_virt; 212 "file whilst inserting the metadata "
309 213 "from the xattr into the header; rc = "
310 page_virt = kmap_atomic(page, KM_USER0); 214 "[%d]\n", __FUNCTION__, rc);
311 memset(page_virt, 0, PAGE_CACHE_SIZE); 215 goto out;
312 if (page->index == 0) {
313 rc = ecryptfs_read_xattr_region(
314 page_virt, file->f_path.dentry);
315 set_header_info(page_virt, crypt_stat);
316 }
317 kunmap_atomic(page_virt, KM_USER0);
318 flush_dcache_page(page);
319 if (rc) {
320 printk(KERN_ERR "Error reading xattr "
321 "region\n");
322 goto out;
323 }
324 } else {
325 rc = ecryptfs_do_readpage(
326 file, page,
327 (page->index
328 - num_pages_in_header_region));
329 if (rc) {
330 printk(KERN_ERR "Error reading page; "
331 "rc = [%d]\n", rc);
332 goto out;
333 }
334 } 216 }
217
335 } else { 218 } else {
336 rc = ecryptfs_do_readpage(file, page, page->index); 219 rc = ecryptfs_read_lower_page_segment(
220 page, page->index, 0, PAGE_CACHE_SIZE,
221 page->mapping->host);
337 if (rc) { 222 if (rc) {
338 printk(KERN_ERR "Error reading page; rc = " 223 printk(KERN_ERR "Error reading page; rc = "
339 "[%d]\n", rc); 224 "[%d]\n", rc);
@@ -341,17 +226,18 @@ static int ecryptfs_readpage(struct file *file, struct page *page)
341 } 226 }
342 } 227 }
343 } else { 228 } else {
344 rc = ecryptfs_decrypt_page(file, page); 229 rc = ecryptfs_decrypt_page(page);
345 if (rc) { 230 if (rc) {
346 ecryptfs_printk(KERN_ERR, "Error decrypting page; " 231 ecryptfs_printk(KERN_ERR, "Error decrypting page; "
347 "rc = [%d]\n", rc); 232 "rc = [%d]\n", rc);
348 goto out; 233 goto out;
349 } 234 }
350 } 235 }
351 SetPageUptodate(page);
352out: 236out:
353 if (rc) 237 if (rc)
354 ClearPageUptodate(page); 238 ClearPageUptodate(page);
239 else
240 SetPageUptodate(page);
355 ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n", 241 ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n",
356 page->index); 242 page->index);
357 unlock_page(page); 243 unlock_page(page);
@@ -377,27 +263,6 @@ out:
377 return 0; 263 return 0;
378} 264}
379 265
380/**
381 * eCryptfs does not currently support holes. When writing after a
382 * seek past the end of the file, eCryptfs fills in 0's through to the
383 * current location. The code to fill in the 0's to all the
384 * intermediate pages calls ecryptfs_prepare_write_no_truncate().
385 */
386static int
387ecryptfs_prepare_write_no_truncate(struct file *file, struct page *page,
388 unsigned from, unsigned to)
389{
390 int rc = 0;
391
392 if (from == 0 && to == PAGE_CACHE_SIZE)
393 goto out; /* If we are writing a full page, it will be
394 up to date. */
395 if (!PageUptodate(page))
396 rc = ecryptfs_do_readpage(file, page, page->index);
397out:
398 return rc;
399}
400
401static int ecryptfs_prepare_write(struct file *file, struct page *page, 266static int ecryptfs_prepare_write(struct file *file, struct page *page,
402 unsigned from, unsigned to) 267 unsigned from, unsigned to)
403{ 268{
@@ -406,10 +271,21 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
406 if (from == 0 && to == PAGE_CACHE_SIZE) 271 if (from == 0 && to == PAGE_CACHE_SIZE)
407 goto out; /* If we are writing a full page, it will be 272 goto out; /* If we are writing a full page, it will be
408 up to date. */ 273 up to date. */
409 if (!PageUptodate(page)) 274 if (!PageUptodate(page)) {
410 rc = ecryptfs_do_readpage(file, page, page->index); 275 rc = ecryptfs_read_lower_page_segment(page, page->index, 0,
276 PAGE_CACHE_SIZE,
277 page->mapping->host);
278 if (rc) {
279 printk(KERN_ERR "%s: Error attemping to read lower "
280 "page segment; rc = [%d]\n", __FUNCTION__, rc);
281 ClearPageUptodate(page);
282 goto out;
283 } else
284 SetPageUptodate(page);
285 }
411 if (page->index != 0) { 286 if (page->index != 0) {
412 loff_t end_of_prev_pg_pos = page_offset(page) - 1; 287 loff_t end_of_prev_pg_pos =
288 (((loff_t)page->index << PAGE_CACHE_SHIFT) - 1);
413 289
414 if (end_of_prev_pg_pos > i_size_read(page->mapping->host)) { 290 if (end_of_prev_pg_pos > i_size_read(page->mapping->host)) {
415 rc = ecryptfs_truncate(file->f_path.dentry, 291 rc = ecryptfs_truncate(file->f_path.dentry,
@@ -428,32 +304,6 @@ out:
428 return rc; 304 return rc;
429} 305}
430 306
431int ecryptfs_writepage_and_release_lower_page(struct page *lower_page,
432 struct inode *lower_inode,
433 struct writeback_control *wbc)
434{
435 int rc = 0;
436
437 rc = lower_inode->i_mapping->a_ops->writepage(lower_page, wbc);
438 if (rc) {
439 ecryptfs_printk(KERN_ERR, "Error calling lower writepage(); "
440 "rc = [%d]\n", rc);
441 goto out;
442 }
443 lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
444 page_cache_release(lower_page);
445out:
446 return rc;
447}
448
449static
450void ecryptfs_release_lower_page(struct page *lower_page, int page_locked)
451{
452 if (page_locked)
453 unlock_page(lower_page);
454 page_cache_release(lower_page);
455}
456
457/** 307/**
458 * ecryptfs_write_inode_size_to_header 308 * ecryptfs_write_inode_size_to_header
459 * 309 *
@@ -461,67 +311,48 @@ void ecryptfs_release_lower_page(struct page *lower_page, int page_locked)
461 * 311 *
462 * Returns zero on success; non-zero on error. 312 * Returns zero on success; non-zero on error.
463 */ 313 */
464static int ecryptfs_write_inode_size_to_header(struct file *lower_file, 314static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
465 struct inode *lower_inode,
466 struct inode *inode)
467{ 315{
468 int rc = 0;
469 struct page *header_page;
470 char *header_virt;
471 const struct address_space_operations *lower_a_ops;
472 u64 file_size; 316 u64 file_size;
317 char *file_size_virt;
318 int rc;
473 319
474retry: 320 file_size_virt = kmalloc(sizeof(u64), GFP_KERNEL);
475 header_page = grab_cache_page(lower_inode->i_mapping, 0); 321 if (!file_size_virt) {
476 if (!header_page) { 322 rc = -ENOMEM;
477 ecryptfs_printk(KERN_ERR, "grab_cache_page for "
478 "lower_page_index 0 failed\n");
479 rc = -EINVAL;
480 goto out;
481 }
482 lower_a_ops = lower_inode->i_mapping->a_ops;
483 rc = lower_a_ops->prepare_write(lower_file, header_page, 0, 8);
484 if (rc) {
485 if (rc == AOP_TRUNCATED_PAGE) {
486 ecryptfs_release_lower_page(header_page, 0);
487 goto retry;
488 } else
489 ecryptfs_release_lower_page(header_page, 1);
490 goto out; 323 goto out;
491 } 324 }
492 file_size = (u64)i_size_read(inode); 325 file_size = (u64)i_size_read(ecryptfs_inode);
493 ecryptfs_printk(KERN_DEBUG, "Writing size: [0x%.16x]\n", file_size);
494 file_size = cpu_to_be64(file_size); 326 file_size = cpu_to_be64(file_size);
495 header_virt = kmap_atomic(header_page, KM_USER0); 327 memcpy(file_size_virt, &file_size, sizeof(u64));
496 memcpy(header_virt, &file_size, sizeof(u64)); 328 rc = ecryptfs_write_lower(ecryptfs_inode, file_size_virt, 0,
497 kunmap_atomic(header_virt, KM_USER0); 329 sizeof(u64));
498 flush_dcache_page(header_page); 330 kfree(file_size_virt);
499 rc = lower_a_ops->commit_write(lower_file, header_page, 0, 8); 331 if (rc)
500 if (rc < 0) 332 printk(KERN_ERR "%s: Error writing file size to header; "
501 ecryptfs_printk(KERN_ERR, "Error commiting header page " 333 "rc = [%d]\n", __FUNCTION__, rc);
502 "write\n");
503 if (rc == AOP_TRUNCATED_PAGE) {
504 ecryptfs_release_lower_page(header_page, 0);
505 goto retry;
506 } else
507 ecryptfs_release_lower_page(header_page, 1);
508 lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
509 mark_inode_dirty_sync(inode);
510out: 334out:
511 return rc; 335 return rc;
512} 336}
513 337
514static int ecryptfs_write_inode_size_to_xattr(struct inode *lower_inode, 338struct kmem_cache *ecryptfs_xattr_cache;
515 struct inode *inode, 339
516 struct dentry *ecryptfs_dentry, 340static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
517 int lower_i_mutex_held)
518{ 341{
519 ssize_t size; 342 ssize_t size;
520 void *xattr_virt; 343 void *xattr_virt;
521 struct dentry *lower_dentry; 344 struct dentry *lower_dentry =
345 ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry;
346 struct inode *lower_inode = lower_dentry->d_inode;
522 u64 file_size; 347 u64 file_size;
523 int rc; 348 int rc;
524 349
350 if (!lower_inode->i_op->getxattr || !lower_inode->i_op->setxattr) {
351 printk(KERN_WARNING
352 "No support for setting xattr in lower filesystem\n");
353 rc = -ENOSYS;
354 goto out;
355 }
525 xattr_virt = kmem_cache_alloc(ecryptfs_xattr_cache, GFP_KERNEL); 356 xattr_virt = kmem_cache_alloc(ecryptfs_xattr_cache, GFP_KERNEL);
526 if (!xattr_virt) { 357 if (!xattr_virt) {
527 printk(KERN_ERR "Out of memory whilst attempting to write " 358 printk(KERN_ERR "Out of memory whilst attempting to write "
@@ -529,35 +360,17 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *lower_inode,
529 rc = -ENOMEM; 360 rc = -ENOMEM;
530 goto out; 361 goto out;
531 } 362 }
532 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); 363 mutex_lock(&lower_inode->i_mutex);
533 if (!lower_dentry->d_inode->i_op->getxattr || 364 size = lower_inode->i_op->getxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
534 !lower_dentry->d_inode->i_op->setxattr) { 365 xattr_virt, PAGE_CACHE_SIZE);
535 printk(KERN_WARNING
536 "No support for setting xattr in lower filesystem\n");
537 rc = -ENOSYS;
538 kmem_cache_free(ecryptfs_xattr_cache, xattr_virt);
539 goto out;
540 }
541 if (!lower_i_mutex_held)
542 mutex_lock(&lower_dentry->d_inode->i_mutex);
543 size = lower_dentry->d_inode->i_op->getxattr(lower_dentry,
544 ECRYPTFS_XATTR_NAME,
545 xattr_virt,
546 PAGE_CACHE_SIZE);
547 if (!lower_i_mutex_held)
548 mutex_unlock(&lower_dentry->d_inode->i_mutex);
549 if (size < 0) 366 if (size < 0)
550 size = 8; 367 size = 8;
551 file_size = (u64)i_size_read(inode); 368 file_size = (u64)i_size_read(ecryptfs_inode);
552 file_size = cpu_to_be64(file_size); 369 file_size = cpu_to_be64(file_size);
553 memcpy(xattr_virt, &file_size, sizeof(u64)); 370 memcpy(xattr_virt, &file_size, sizeof(u64));
554 if (!lower_i_mutex_held) 371 rc = lower_inode->i_op->setxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
555 mutex_lock(&lower_dentry->d_inode->i_mutex); 372 xattr_virt, size, 0);
556 rc = lower_dentry->d_inode->i_op->setxattr(lower_dentry, 373 mutex_unlock(&lower_inode->i_mutex);
557 ECRYPTFS_XATTR_NAME,
558 xattr_virt, size, 0);
559 if (!lower_i_mutex_held)
560 mutex_unlock(&lower_dentry->d_inode->i_mutex);
561 if (rc) 374 if (rc)
562 printk(KERN_ERR "Error whilst attempting to write inode size " 375 printk(KERN_ERR "Error whilst attempting to write inode size "
563 "to lower file xattr; rc = [%d]\n", rc); 376 "to lower file xattr; rc = [%d]\n", rc);
@@ -566,122 +379,18 @@ out:
566 return rc; 379 return rc;
567} 380}
568 381
569int 382int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode)
570ecryptfs_write_inode_size_to_metadata(struct file *lower_file,
571 struct inode *lower_inode,
572 struct inode *inode,
573 struct dentry *ecryptfs_dentry,
574 int lower_i_mutex_held)
575{ 383{
576 struct ecryptfs_crypt_stat *crypt_stat; 384 struct ecryptfs_crypt_stat *crypt_stat;
577 385
578 crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; 386 crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
579 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) 387 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
580 return ecryptfs_write_inode_size_to_xattr(lower_inode, inode, 388 return ecryptfs_write_inode_size_to_xattr(ecryptfs_inode);
581 ecryptfs_dentry,
582 lower_i_mutex_held);
583 else 389 else
584 return ecryptfs_write_inode_size_to_header(lower_file, 390 return ecryptfs_write_inode_size_to_header(ecryptfs_inode);
585 lower_inode,
586 inode);
587}
588
589int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode,
590 struct file *lower_file,
591 unsigned long lower_page_index, int byte_offset,
592 int region_bytes)
593{
594 int rc = 0;
595
596retry:
597 *lower_page = grab_cache_page(lower_inode->i_mapping, lower_page_index);
598 if (!(*lower_page)) {
599 rc = -EINVAL;
600 ecryptfs_printk(KERN_ERR, "Error attempting to grab "
601 "lower page with index [0x%.16x]\n",
602 lower_page_index);
603 goto out;
604 }
605 rc = lower_inode->i_mapping->a_ops->prepare_write(lower_file,
606 (*lower_page),
607 byte_offset,
608 region_bytes);
609 if (rc) {
610 if (rc == AOP_TRUNCATED_PAGE) {
611 ecryptfs_release_lower_page(*lower_page, 0);
612 goto retry;
613 } else {
614 ecryptfs_printk(KERN_ERR, "prepare_write for "
615 "lower_page_index = [0x%.16x] failed; rc = "
616 "[%d]\n", lower_page_index, rc);
617 ecryptfs_release_lower_page(*lower_page, 1);
618 (*lower_page) = NULL;
619 }
620 }
621out:
622 return rc;
623}
624
625/**
626 * ecryptfs_commit_lower_page
627 *
628 * Returns zero on success; non-zero on error
629 */
630int
631ecryptfs_commit_lower_page(struct page *lower_page, struct inode *lower_inode,
632 struct file *lower_file, int byte_offset,
633 int region_size)
634{
635 int page_locked = 1;
636 int rc = 0;
637
638 rc = lower_inode->i_mapping->a_ops->commit_write(
639 lower_file, lower_page, byte_offset, region_size);
640 if (rc == AOP_TRUNCATED_PAGE)
641 page_locked = 0;
642 if (rc < 0) {
643 ecryptfs_printk(KERN_ERR,
644 "Error committing write; rc = [%d]\n", rc);
645 } else
646 rc = 0;
647 ecryptfs_release_lower_page(lower_page, page_locked);
648 return rc;
649} 391}
650 392
651/** 393/**
652 * ecryptfs_copy_page_to_lower
653 *
654 * Used for plaintext pass-through; no page index interpolation
655 * required.
656 */
657int ecryptfs_copy_page_to_lower(struct page *page, struct inode *lower_inode,
658 struct file *lower_file)
659{
660 int rc = 0;
661 struct page *lower_page;
662
663 rc = ecryptfs_get_lower_page(&lower_page, lower_inode, lower_file,
664 page->index, 0, PAGE_CACHE_SIZE);
665 if (rc) {
666 ecryptfs_printk(KERN_ERR, "Error attempting to get page "
667 "at index [0x%.16x]\n", page->index);
668 goto out;
669 }
670 /* TODO: aops */
671 memcpy((char *)page_address(lower_page), page_address(page),
672 PAGE_CACHE_SIZE);
673 rc = ecryptfs_commit_lower_page(lower_page, lower_inode, lower_file,
674 0, PAGE_CACHE_SIZE);
675 if (rc)
676 ecryptfs_printk(KERN_ERR, "Error attempting to commit page "
677 "at index [0x%.16x]\n", page->index);
678out:
679 return rc;
680}
681
682struct kmem_cache *ecryptfs_xattr_cache;
683
684/**
685 * ecryptfs_commit_write 394 * ecryptfs_commit_write
686 * @file: The eCryptfs file object 395 * @file: The eCryptfs file object
687 * @page: The eCryptfs page 396 * @page: The eCryptfs page
@@ -695,20 +404,12 @@ struct kmem_cache *ecryptfs_xattr_cache;
695static int ecryptfs_commit_write(struct file *file, struct page *page, 404static int ecryptfs_commit_write(struct file *file, struct page *page,
696 unsigned from, unsigned to) 405 unsigned from, unsigned to)
697{ 406{
698 struct ecryptfs_page_crypt_context ctx;
699 loff_t pos; 407 loff_t pos;
700 struct inode *inode; 408 struct inode *ecryptfs_inode = page->mapping->host;
701 struct inode *lower_inode; 409 struct ecryptfs_crypt_stat *crypt_stat =
702 struct file *lower_file; 410 &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)->crypt_stat;
703 struct ecryptfs_crypt_stat *crypt_stat;
704 int rc; 411 int rc;
705 412
706 inode = page->mapping->host;
707 lower_inode = ecryptfs_inode_to_lower(inode);
708 lower_file = ecryptfs_file_to_lower(file);
709 mutex_lock(&lower_inode->i_mutex);
710 crypt_stat = &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)
711 ->crypt_stat;
712 if (crypt_stat->flags & ECRYPTFS_NEW_FILE) { 413 if (crypt_stat->flags & ECRYPTFS_NEW_FILE) {
713 ecryptfs_printk(KERN_DEBUG, "ECRYPTFS_NEW_FILE flag set in " 414 ecryptfs_printk(KERN_DEBUG, "ECRYPTFS_NEW_FILE flag set in "
714 "crypt_stat at memory location [%p]\n", crypt_stat); 415 "crypt_stat at memory location [%p]\n", crypt_stat);
@@ -718,6 +419,7 @@ static int ecryptfs_commit_write(struct file *file, struct page *page,
718 ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" 419 ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
719 "(page w/ index = [0x%.16x], to = [%d])\n", page->index, 420 "(page w/ index = [0x%.16x], to = [%d])\n", page->index,
720 to); 421 to);
422 /* Fills in zeros if 'to' goes beyond inode size */
721 rc = fill_zeros_to_end_of_page(page, to); 423 rc = fill_zeros_to_end_of_page(page, to);
722 if (rc) { 424 if (rc) {
723 ecryptfs_printk(KERN_WARNING, "Error attempting to fill " 425 ecryptfs_printk(KERN_WARNING, "Error attempting to fill "
@@ -725,82 +427,22 @@ static int ecryptfs_commit_write(struct file *file, struct page *page,
725 page->index); 427 page->index);
726 goto out; 428 goto out;
727 } 429 }
728 ctx.page = page; 430 rc = ecryptfs_encrypt_page(page);
729 ctx.mode = ECRYPTFS_PREPARE_COMMIT_MODE;
730 ctx.param.lower_file = lower_file;
731 rc = ecryptfs_encrypt_page(&ctx);
732 if (rc) { 431 if (rc) {
733 ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper " 432 ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper "
734 "index [0x%.16x])\n", page->index); 433 "index [0x%.16x])\n", page->index);
735 goto out; 434 goto out;
736 } 435 }
737 inode->i_blocks = lower_inode->i_blocks; 436 pos = (((loff_t)page->index) << PAGE_CACHE_SHIFT) + to;
738 pos = page_offset(page) + to; 437 if (pos > i_size_read(ecryptfs_inode)) {
739 if (pos > i_size_read(inode)) { 438 i_size_write(ecryptfs_inode, pos);
740 i_size_write(inode, pos);
741 ecryptfs_printk(KERN_DEBUG, "Expanded file size to " 439 ecryptfs_printk(KERN_DEBUG, "Expanded file size to "
742 "[0x%.16x]\n", i_size_read(inode)); 440 "[0x%.16x]\n", i_size_read(ecryptfs_inode));
743 } 441 }
744 rc = ecryptfs_write_inode_size_to_metadata(lower_file, lower_inode, 442 rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode);
745 inode, file->f_dentry,
746 ECRYPTFS_LOWER_I_MUTEX_HELD);
747 if (rc) 443 if (rc)
748 printk(KERN_ERR "Error writing inode size to metadata; " 444 printk(KERN_ERR "Error writing inode size to metadata; "
749 "rc = [%d]\n", rc); 445 "rc = [%d]\n", rc);
750 lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
751 mark_inode_dirty_sync(inode);
752out:
753 if (rc < 0)
754 ClearPageUptodate(page);
755 else
756 SetPageUptodate(page);
757 mutex_unlock(&lower_inode->i_mutex);
758 return rc;
759}
760
761/**
762 * ecryptfs_write_zeros
763 * @file: The ecryptfs file
764 * @index: The index in which we are writing
765 * @start: The position after the last block of data
766 * @num_zeros: The number of zeros to write
767 *
768 * Write a specified number of zero's to a page.
769 *
770 * (start + num_zeros) must be less than or equal to PAGE_CACHE_SIZE
771 */
772int
773ecryptfs_write_zeros(struct file *file, pgoff_t index, int start, int num_zeros)
774{
775 int rc = 0;
776 struct page *tmp_page;
777
778 tmp_page = ecryptfs_get1page(file, index);
779 if (IS_ERR(tmp_page)) {
780 ecryptfs_printk(KERN_ERR, "Error getting page at index "
781 "[0x%.16x]\n", index);
782 rc = PTR_ERR(tmp_page);
783 goto out;
784 }
785 if ((rc = ecryptfs_prepare_write_no_truncate(file, tmp_page, start,
786 (start + num_zeros)))) {
787 ecryptfs_printk(KERN_ERR, "Error preparing to write zero's "
788 "to page at index [0x%.16x]\n",
789 index);
790 page_cache_release(tmp_page);
791 goto out;
792 }
793 zero_user_page(tmp_page, start, num_zeros, KM_USER0);
794 rc = ecryptfs_commit_write(file, tmp_page, start, start + num_zeros);
795 if (rc < 0) {
796 ecryptfs_printk(KERN_ERR, "Error attempting to write zero's "
797 "to remainder of page at index [0x%.16x]\n",
798 index);
799 page_cache_release(tmp_page);
800 goto out;
801 }
802 rc = 0;
803 page_cache_release(tmp_page);
804out: 446out:
805 return rc; 447 return rc;
806} 448}
@@ -819,34 +461,10 @@ static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block)
819 return rc; 461 return rc;
820} 462}
821 463
822static void ecryptfs_sync_page(struct page *page)
823{
824 struct inode *inode;
825 struct inode *lower_inode;
826 struct page *lower_page;
827
828 inode = page->mapping->host;
829 lower_inode = ecryptfs_inode_to_lower(inode);
830 /* NOTE: Recently swapped with grab_cache_page(), since
831 * sync_page() just makes sure that pending I/O gets done. */
832 lower_page = find_lock_page(lower_inode->i_mapping, page->index);
833 if (!lower_page) {
834 ecryptfs_printk(KERN_DEBUG, "find_lock_page failed\n");
835 return;
836 }
837 if (lower_page->mapping->a_ops->sync_page)
838 lower_page->mapping->a_ops->sync_page(lower_page);
839 ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n",
840 lower_page->index);
841 unlock_page(lower_page);
842 page_cache_release(lower_page);
843}
844
845struct address_space_operations ecryptfs_aops = { 464struct address_space_operations ecryptfs_aops = {
846 .writepage = ecryptfs_writepage, 465 .writepage = ecryptfs_writepage,
847 .readpage = ecryptfs_readpage, 466 .readpage = ecryptfs_readpage,
848 .prepare_write = ecryptfs_prepare_write, 467 .prepare_write = ecryptfs_prepare_write,
849 .commit_write = ecryptfs_commit_write, 468 .commit_write = ecryptfs_commit_write,
850 .bmap = ecryptfs_bmap, 469 .bmap = ecryptfs_bmap,
851 .sync_page = ecryptfs_sync_page,
852}; 470};
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
new file mode 100644
index 000000000000..2150edf9a58e
--- /dev/null
+++ b/fs/ecryptfs/read_write.c
@@ -0,0 +1,358 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 2007 International Business Machines Corp.
5 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 * 02111-1307, USA.
21 */
22
23#include <linux/fs.h>
24#include <linux/pagemap.h>
25#include "ecryptfs_kernel.h"
26
27/**
28 * ecryptfs_write_lower
29 * @ecryptfs_inode: The eCryptfs inode
30 * @data: Data to write
31 * @offset: Byte offset in the lower file to which to write the data
32 * @size: Number of bytes from @data to write at @offset in the lower
33 * file
34 *
35 * Write data to the lower file.
36 *
37 * Returns zero on success; non-zero on error
38 */
39int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
40 loff_t offset, size_t size)
41{
42 struct ecryptfs_inode_info *inode_info;
43 ssize_t octets_written;
44 mm_segment_t fs_save;
45 int rc = 0;
46
47 inode_info = ecryptfs_inode_to_private(ecryptfs_inode);
48 mutex_lock(&inode_info->lower_file_mutex);
49 BUG_ON(!inode_info->lower_file);
50 inode_info->lower_file->f_pos = offset;
51 fs_save = get_fs();
52 set_fs(get_ds());
53 octets_written = vfs_write(inode_info->lower_file, data, size,
54 &inode_info->lower_file->f_pos);
55 set_fs(fs_save);
56 if (octets_written < 0) {
57 printk(KERN_ERR "%s: octets_written = [%td]; "
58 "expected [%td]\n", __FUNCTION__, octets_written, size);
59 rc = -EINVAL;
60 }
61 mutex_unlock(&inode_info->lower_file_mutex);
62 mark_inode_dirty_sync(ecryptfs_inode);
63 return rc;
64}
65
66/**
67 * ecryptfs_write_lower_page_segment
68 * @ecryptfs_inode: The eCryptfs inode
69 * @page_for_lower: The page containing the data to be written to the
70 * lower file
71 * @offset_in_page: The offset in the @page_for_lower from which to
72 * start writing the data
73 * @size: The amount of data from @page_for_lower to write to the
74 * lower file
75 *
76 * Determines the byte offset in the file for the given page and
77 * offset within the page, maps the page, and makes the call to write
78 * the contents of @page_for_lower to the lower inode.
79 *
80 * Returns zero on success; non-zero otherwise
81 */
82int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode,
83 struct page *page_for_lower,
84 size_t offset_in_page, size_t size)
85{
86 char *virt;
87 loff_t offset;
88 int rc;
89
90 offset = ((((off_t)page_for_lower->index) << PAGE_CACHE_SHIFT)
91 + offset_in_page);
92 virt = kmap(page_for_lower);
93 rc = ecryptfs_write_lower(ecryptfs_inode, virt, offset, size);
94 kunmap(page_for_lower);
95 return rc;
96}
97
98/**
99 * ecryptfs_write
100 * @ecryptfs_file: The eCryptfs file into which to write
101 * @data: Virtual address where data to write is located
102 * @offset: Offset in the eCryptfs file at which to begin writing the
103 * data from @data
104 * @size: The number of bytes to write from @data
105 *
106 * Write an arbitrary amount of data to an arbitrary location in the
107 * eCryptfs inode page cache. This is done on a page-by-page, and then
108 * by an extent-by-extent, basis; individual extents are encrypted and
109 * written to the lower page cache (via VFS writes). This function
110 * takes care of all the address translation to locations in the lower
111 * filesystem; it also handles truncate events, writing out zeros
112 * where necessary.
113 *
114 * Returns zero on success; non-zero otherwise
115 */
116int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
117 size_t size)
118{
119 struct page *ecryptfs_page;
120 char *ecryptfs_page_virt;
121 loff_t ecryptfs_file_size =
122 i_size_read(ecryptfs_file->f_dentry->d_inode);
123 loff_t data_offset = 0;
124 loff_t pos;
125 int rc = 0;
126
127 if (offset > ecryptfs_file_size)
128 pos = ecryptfs_file_size;
129 else
130 pos = offset;
131 while (pos < (offset + size)) {
132 pgoff_t ecryptfs_page_idx = (pos >> PAGE_CACHE_SHIFT);
133 size_t start_offset_in_page = (pos & ~PAGE_CACHE_MASK);
134 size_t num_bytes = (PAGE_CACHE_SIZE - start_offset_in_page);
135 size_t total_remaining_bytes = ((offset + size) - pos);
136
137 if (num_bytes > total_remaining_bytes)
138 num_bytes = total_remaining_bytes;
139 if (pos < offset) {
140 size_t total_remaining_zeros = (offset - pos);
141
142 if (num_bytes > total_remaining_zeros)
143 num_bytes = total_remaining_zeros;
144 }
145 ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_file,
146 ecryptfs_page_idx);
147 if (IS_ERR(ecryptfs_page)) {
148 rc = PTR_ERR(ecryptfs_page);
149 printk(KERN_ERR "%s: Error getting page at "
150 "index [%ld] from eCryptfs inode "
151 "mapping; rc = [%d]\n", __FUNCTION__,
152 ecryptfs_page_idx, rc);
153 goto out;
154 }
155 if (start_offset_in_page) {
156 /* Read in the page from the lower
157 * into the eCryptfs inode page cache,
158 * decrypting */
159 rc = ecryptfs_decrypt_page(ecryptfs_page);
160 if (rc) {
161 printk(KERN_ERR "%s: Error decrypting "
162 "page; rc = [%d]\n",
163 __FUNCTION__, rc);
164 ClearPageUptodate(ecryptfs_page);
165 page_cache_release(ecryptfs_page);
166 goto out;
167 }
168 }
169 ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0);
170 if (pos >= offset) {
171 memcpy(((char *)ecryptfs_page_virt
172 + start_offset_in_page),
173 (data + data_offset), num_bytes);
174 data_offset += num_bytes;
175 } else {
176 /* We are extending past the previous end of the file.
177 * Fill in zero values up to the start of where we
178 * will be writing data. */
179 memset(((char *)ecryptfs_page_virt
180 + start_offset_in_page), 0, num_bytes);
181 }
182 kunmap_atomic(ecryptfs_page_virt, KM_USER0);
183 flush_dcache_page(ecryptfs_page);
184 SetPageUptodate(ecryptfs_page);
185 unlock_page(ecryptfs_page);
186 rc = ecryptfs_encrypt_page(ecryptfs_page);
187 page_cache_release(ecryptfs_page);
188 if (rc) {
189 printk(KERN_ERR "%s: Error encrypting "
190 "page; rc = [%d]\n", __FUNCTION__, rc);
191 goto out;
192 }
193 pos += num_bytes;
194 }
195 if ((offset + size) > ecryptfs_file_size) {
196 i_size_write(ecryptfs_file->f_dentry->d_inode, (offset + size));
197 rc = ecryptfs_write_inode_size_to_metadata(
198 ecryptfs_file->f_dentry->d_inode);
199 if (rc) {
200 printk(KERN_ERR "Problem with "
201 "ecryptfs_write_inode_size_to_metadata; "
202 "rc = [%d]\n", rc);
203 goto out;
204 }
205 }
206out:
207 return rc;
208}
209
210/**
211 * ecryptfs_read_lower
212 * @data: The read data is stored here by this function
213 * @offset: Byte offset in the lower file from which to read the data
214 * @size: Number of bytes to read from @offset of the lower file and
215 * store into @data
216 * @ecryptfs_inode: The eCryptfs inode
217 *
218 * Read @size bytes of data at byte offset @offset from the lower
219 * inode into memory location @data.
220 *
221 * Returns zero on success; non-zero on error
222 */
223int ecryptfs_read_lower(char *data, loff_t offset, size_t size,
224 struct inode *ecryptfs_inode)
225{
226 struct ecryptfs_inode_info *inode_info =
227 ecryptfs_inode_to_private(ecryptfs_inode);
228 ssize_t octets_read;
229 mm_segment_t fs_save;
230 int rc = 0;
231
232 mutex_lock(&inode_info->lower_file_mutex);
233 BUG_ON(!inode_info->lower_file);
234 inode_info->lower_file->f_pos = offset;
235 fs_save = get_fs();
236 set_fs(get_ds());
237 octets_read = vfs_read(inode_info->lower_file, data, size,
238 &inode_info->lower_file->f_pos);
239 set_fs(fs_save);
240 if (octets_read < 0) {
241 printk(KERN_ERR "%s: octets_read = [%td]; "
242 "expected [%td]\n", __FUNCTION__, octets_read, size);
243 rc = -EINVAL;
244 }
245 mutex_unlock(&inode_info->lower_file_mutex);
246 return rc;
247}
248
249/**
250 * ecryptfs_read_lower_page_segment
251 * @page_for_ecryptfs: The page into which data for eCryptfs will be
252 * written
253 * @offset_in_page: Offset in @page_for_ecryptfs from which to start
254 * writing
255 * @size: The number of bytes to write into @page_for_ecryptfs
256 * @ecryptfs_inode: The eCryptfs inode
257 *
258 * Determines the byte offset in the file for the given page and
259 * offset within the page, maps the page, and makes the call to read
260 * the contents of @page_for_ecryptfs from the lower inode.
261 *
262 * Returns zero on success; non-zero otherwise
263 */
264int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
265 pgoff_t page_index,
266 size_t offset_in_page, size_t size,
267 struct inode *ecryptfs_inode)
268{
269 char *virt;
270 loff_t offset;
271 int rc;
272
273 offset = ((((loff_t)page_index) << PAGE_CACHE_SHIFT) + offset_in_page);
274 virt = kmap(page_for_ecryptfs);
275 rc = ecryptfs_read_lower(virt, offset, size, ecryptfs_inode);
276 kunmap(page_for_ecryptfs);
277 flush_dcache_page(page_for_ecryptfs);
278 return rc;
279}
280
281/**
282 * ecryptfs_read
283 * @data: The virtual address into which to write the data read (and
284 * possibly decrypted) from the lower file
285 * @offset: The offset in the decrypted view of the file from which to
286 * read into @data
287 * @size: The number of bytes to read into @data
288 * @ecryptfs_file: The eCryptfs file from which to read
289 *
290 * Read an arbitrary amount of data from an arbitrary location in the
291 * eCryptfs page cache. This is done on an extent-by-extent basis;
292 * individual extents are decrypted and read from the lower page
293 * cache (via VFS reads). This function takes care of all the
294 * address translation to locations in the lower filesystem.
295 *
296 * Returns zero on success; non-zero otherwise
297 */
298int ecryptfs_read(char *data, loff_t offset, size_t size,
299 struct file *ecryptfs_file)
300{
301 struct page *ecryptfs_page;
302 char *ecryptfs_page_virt;
303 loff_t ecryptfs_file_size =
304 i_size_read(ecryptfs_file->f_dentry->d_inode);
305 loff_t data_offset = 0;
306 loff_t pos;
307 int rc = 0;
308
309 if ((offset + size) > ecryptfs_file_size) {
310 rc = -EINVAL;
311 printk(KERN_ERR "%s: Attempt to read data past the end of the "
312 "file; offset = [%lld]; size = [%td]; "
313 "ecryptfs_file_size = [%lld]\n",
314 __FUNCTION__, offset, size, ecryptfs_file_size);
315 goto out;
316 }
317 pos = offset;
318 while (pos < (offset + size)) {
319 pgoff_t ecryptfs_page_idx = (pos >> PAGE_CACHE_SHIFT);
320 size_t start_offset_in_page = (pos & ~PAGE_CACHE_MASK);
321 size_t num_bytes = (PAGE_CACHE_SIZE - start_offset_in_page);
322 size_t total_remaining_bytes = ((offset + size) - pos);
323
324 if (num_bytes > total_remaining_bytes)
325 num_bytes = total_remaining_bytes;
326 ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_file,
327 ecryptfs_page_idx);
328 if (IS_ERR(ecryptfs_page)) {
329 rc = PTR_ERR(ecryptfs_page);
330 printk(KERN_ERR "%s: Error getting page at "
331 "index [%ld] from eCryptfs inode "
332 "mapping; rc = [%d]\n", __FUNCTION__,
333 ecryptfs_page_idx, rc);
334 goto out;
335 }
336 rc = ecryptfs_decrypt_page(ecryptfs_page);
337 if (rc) {
338 printk(KERN_ERR "%s: Error decrypting "
339 "page; rc = [%d]\n", __FUNCTION__, rc);
340 ClearPageUptodate(ecryptfs_page);
341 page_cache_release(ecryptfs_page);
342 goto out;
343 }
344 ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0);
345 memcpy((data + data_offset),
346 ((char *)ecryptfs_page_virt + start_offset_in_page),
347 num_bytes);
348 kunmap_atomic(ecryptfs_page_virt, KM_USER0);
349 flush_dcache_page(ecryptfs_page);
350 SetPageUptodate(ecryptfs_page);
351 unlock_page(ecryptfs_page);
352 page_cache_release(ecryptfs_page);
353 pos += num_bytes;
354 data_offset += num_bytes;
355 }
356out:
357 return rc;
358}
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 7b3f0cc09a6f..f8cdab2bee3d 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -27,6 +27,7 @@
27#include <linux/mount.h> 27#include <linux/mount.h>
28#include <linux/key.h> 28#include <linux/key.h>
29#include <linux/seq_file.h> 29#include <linux/seq_file.h>
30#include <linux/file.h>
30#include <linux/crypto.h> 31#include <linux/crypto.h>
31#include "ecryptfs_kernel.h" 32#include "ecryptfs_kernel.h"
32 33
@@ -46,15 +47,16 @@ struct kmem_cache *ecryptfs_inode_info_cache;
46 */ 47 */
47static struct inode *ecryptfs_alloc_inode(struct super_block *sb) 48static struct inode *ecryptfs_alloc_inode(struct super_block *sb)
48{ 49{
49 struct ecryptfs_inode_info *ecryptfs_inode; 50 struct ecryptfs_inode_info *inode_info;
50 struct inode *inode = NULL; 51 struct inode *inode = NULL;
51 52
52 ecryptfs_inode = kmem_cache_alloc(ecryptfs_inode_info_cache, 53 inode_info = kmem_cache_alloc(ecryptfs_inode_info_cache, GFP_KERNEL);
53 GFP_KERNEL); 54 if (unlikely(!inode_info))
54 if (unlikely(!ecryptfs_inode))
55 goto out; 55 goto out;
56 ecryptfs_init_crypt_stat(&ecryptfs_inode->crypt_stat); 56 ecryptfs_init_crypt_stat(&inode_info->crypt_stat);
57 inode = &ecryptfs_inode->vfs_inode; 57 mutex_init(&inode_info->lower_file_mutex);
58 inode_info->lower_file = NULL;
59 inode = &inode_info->vfs_inode;
58out: 60out:
59 return inode; 61 return inode;
60} 62}
@@ -63,9 +65,10 @@ out:
63 * ecryptfs_destroy_inode 65 * ecryptfs_destroy_inode
64 * @inode: The ecryptfs inode 66 * @inode: The ecryptfs inode
65 * 67 *
66 * This is used during the final destruction of the inode. 68 * This is used during the final destruction of the inode. All
67 * All allocation of memory related to the inode, including allocated 69 * allocation of memory related to the inode, including allocated
68 * memory in the crypt_stat struct, will be released here. 70 * memory in the crypt_stat struct, will be released here. This
71 * function also fput()'s the persistent file for the lower inode.
69 * There should be no chance that this deallocation will be missed. 72 * There should be no chance that this deallocation will be missed.
70 */ 73 */
71static void ecryptfs_destroy_inode(struct inode *inode) 74static void ecryptfs_destroy_inode(struct inode *inode)
@@ -73,7 +76,21 @@ static void ecryptfs_destroy_inode(struct inode *inode)
73 struct ecryptfs_inode_info *inode_info; 76 struct ecryptfs_inode_info *inode_info;
74 77
75 inode_info = ecryptfs_inode_to_private(inode); 78 inode_info = ecryptfs_inode_to_private(inode);
76 ecryptfs_destruct_crypt_stat(&inode_info->crypt_stat); 79 mutex_lock(&inode_info->lower_file_mutex);
80 if (inode_info->lower_file) {
81 struct dentry *lower_dentry =
82 inode_info->lower_file->f_dentry;
83
84 BUG_ON(!lower_dentry);
85 if (lower_dentry->d_inode) {
86 fput(inode_info->lower_file);
87 inode_info->lower_file = NULL;
88 d_drop(lower_dentry);
89 d_delete(lower_dentry);
90 }
91 }
92 mutex_unlock(&inode_info->lower_file_mutex);
93 ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
77 kmem_cache_free(ecryptfs_inode_info_cache, inode_info); 94 kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
78} 95}
79 96
@@ -104,7 +121,7 @@ static void ecryptfs_put_super(struct super_block *sb)
104{ 121{
105 struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb); 122 struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb);
106 123
107 ecryptfs_destruct_mount_crypt_stat(&sb_info->mount_crypt_stat); 124 ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat);
108 kmem_cache_free(ecryptfs_sb_info_cache, sb_info); 125 kmem_cache_free(ecryptfs_sb_info_cache, sb_info);
109 ecryptfs_set_superblock_private(sb, NULL); 126 ecryptfs_set_superblock_private(sb, NULL);
110} 127}
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 2bf49d7ef841..05d9342bb64e 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -22,7 +22,9 @@
22 */ 22 */
23 23
24#include "ext2.h" 24#include "ext2.h"
25#include <linux/buffer_head.h>
25#include <linux/pagemap.h> 26#include <linux/pagemap.h>
27#include <linux/swap.h>
26 28
27typedef struct ext2_dir_entry_2 ext2_dirent; 29typedef struct ext2_dir_entry_2 ext2_dirent;
28 30
@@ -61,16 +63,25 @@ ext2_last_byte(struct inode *inode, unsigned long page_nr)
61 return last_byte; 63 return last_byte;
62} 64}
63 65
64static int ext2_commit_chunk(struct page *page, unsigned from, unsigned to) 66static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len)
65{ 67{
66 struct inode *dir = page->mapping->host; 68 struct address_space *mapping = page->mapping;
69 struct inode *dir = mapping->host;
67 int err = 0; 70 int err = 0;
71
68 dir->i_version++; 72 dir->i_version++;
69 page->mapping->a_ops->commit_write(NULL, page, from, to); 73 block_write_end(NULL, mapping, pos, len, len, page, NULL);
74
75 if (pos+len > dir->i_size) {
76 i_size_write(dir, pos+len);
77 mark_inode_dirty(dir);
78 }
79
70 if (IS_DIRSYNC(dir)) 80 if (IS_DIRSYNC(dir))
71 err = write_one_page(page, 1); 81 err = write_one_page(page, 1);
72 else 82 else
73 unlock_page(page); 83 unlock_page(page);
84
74 return err; 85 return err;
75} 86}
76 87
@@ -412,16 +423,18 @@ ino_t ext2_inode_by_name(struct inode * dir, struct dentry *dentry)
412void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, 423void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
413 struct page *page, struct inode *inode) 424 struct page *page, struct inode *inode)
414{ 425{
415 unsigned from = (char *) de - (char *) page_address(page); 426 loff_t pos = page_offset(page) +
416 unsigned to = from + le16_to_cpu(de->rec_len); 427 (char *) de - (char *) page_address(page);
428 unsigned len = le16_to_cpu(de->rec_len);
417 int err; 429 int err;
418 430
419 lock_page(page); 431 lock_page(page);
420 err = page->mapping->a_ops->prepare_write(NULL, page, from, to); 432 err = __ext2_write_begin(NULL, page->mapping, pos, len,
433 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
421 BUG_ON(err); 434 BUG_ON(err);
422 de->inode = cpu_to_le32(inode->i_ino); 435 de->inode = cpu_to_le32(inode->i_ino);
423 ext2_set_de_type (de, inode); 436 ext2_set_de_type(de, inode);
424 err = ext2_commit_chunk(page, from, to); 437 err = ext2_commit_chunk(page, pos, len);
425 ext2_put_page(page); 438 ext2_put_page(page);
426 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; 439 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
427 EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; 440 EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
@@ -444,7 +457,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
444 unsigned long npages = dir_pages(dir); 457 unsigned long npages = dir_pages(dir);
445 unsigned long n; 458 unsigned long n;
446 char *kaddr; 459 char *kaddr;
447 unsigned from, to; 460 loff_t pos;
448 int err; 461 int err;
449 462
450 /* 463 /*
@@ -497,9 +510,10 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
497 return -EINVAL; 510 return -EINVAL;
498 511
499got_it: 512got_it:
500 from = (char*)de - (char*)page_address(page); 513 pos = page_offset(page) +
501 to = from + rec_len; 514 (char*)de - (char*)page_address(page);
502 err = page->mapping->a_ops->prepare_write(NULL, page, from, to); 515 err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0,
516 &page, NULL);
503 if (err) 517 if (err)
504 goto out_unlock; 518 goto out_unlock;
505 if (de->inode) { 519 if (de->inode) {
@@ -509,10 +523,10 @@ got_it:
509 de = de1; 523 de = de1;
510 } 524 }
511 de->name_len = namelen; 525 de->name_len = namelen;
512 memcpy (de->name, name, namelen); 526 memcpy(de->name, name, namelen);
513 de->inode = cpu_to_le32(inode->i_ino); 527 de->inode = cpu_to_le32(inode->i_ino);
514 ext2_set_de_type (de, inode); 528 ext2_set_de_type (de, inode);
515 err = ext2_commit_chunk(page, from, to); 529 err = ext2_commit_chunk(page, pos, rec_len);
516 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; 530 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
517 EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; 531 EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
518 mark_inode_dirty(dir); 532 mark_inode_dirty(dir);
@@ -537,6 +551,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
537 char *kaddr = page_address(page); 551 char *kaddr = page_address(page);
538 unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); 552 unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1);
539 unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len); 553 unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len);
554 loff_t pos;
540 ext2_dirent * pde = NULL; 555 ext2_dirent * pde = NULL;
541 ext2_dirent * de = (ext2_dirent *) (kaddr + from); 556 ext2_dirent * de = (ext2_dirent *) (kaddr + from);
542 int err; 557 int err;
@@ -553,13 +568,15 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
553 } 568 }
554 if (pde) 569 if (pde)
555 from = (char*)pde - (char*)page_address(page); 570 from = (char*)pde - (char*)page_address(page);
571 pos = page_offset(page) + from;
556 lock_page(page); 572 lock_page(page);
557 err = mapping->a_ops->prepare_write(NULL, page, from, to); 573 err = __ext2_write_begin(NULL, page->mapping, pos, to - from, 0,
574 &page, NULL);
558 BUG_ON(err); 575 BUG_ON(err);
559 if (pde) 576 if (pde)
560 pde->rec_len = cpu_to_le16(to-from); 577 pde->rec_len = cpu_to_le16(to - from);
561 dir->inode = 0; 578 dir->inode = 0;
562 err = ext2_commit_chunk(page, from, to); 579 err = ext2_commit_chunk(page, pos, to - from);
563 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; 580 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
564 EXT2_I(inode)->i_flags &= ~EXT2_BTREE_FL; 581 EXT2_I(inode)->i_flags &= ~EXT2_BTREE_FL;
565 mark_inode_dirty(inode); 582 mark_inode_dirty(inode);
@@ -582,7 +599,9 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
582 599
583 if (!page) 600 if (!page)
584 return -ENOMEM; 601 return -ENOMEM;
585 err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size); 602
603 err = __ext2_write_begin(NULL, page->mapping, 0, chunk_size, 0,
604 &page, NULL);
586 if (err) { 605 if (err) {
587 unlock_page(page); 606 unlock_page(page);
588 goto fail; 607 goto fail;
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 9fd0ec5ba0d0..a08052d2c008 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -134,6 +134,9 @@ extern void ext2_truncate (struct inode *);
134extern int ext2_setattr (struct dentry *, struct iattr *); 134extern int ext2_setattr (struct dentry *, struct iattr *);
135extern void ext2_set_inode_flags(struct inode *inode); 135extern void ext2_set_inode_flags(struct inode *inode);
136extern void ext2_get_inode_flags(struct ext2_inode_info *); 136extern void ext2_get_inode_flags(struct ext2_inode_info *);
137int __ext2_write_begin(struct file *file, struct address_space *mapping,
138 loff_t pos, unsigned len, unsigned flags,
139 struct page **pagep, void **fsdata);
137 140
138/* ioctl.c */ 141/* ioctl.c */
139extern int ext2_ioctl (struct inode *, struct file *, unsigned int, 142extern int ext2_ioctl (struct inode *, struct file *, unsigned int,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 0079b2cd5314..1b102a1ccebb 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -642,18 +642,35 @@ ext2_readpages(struct file *file, struct address_space *mapping,
642 return mpage_readpages(mapping, pages, nr_pages, ext2_get_block); 642 return mpage_readpages(mapping, pages, nr_pages, ext2_get_block);
643} 643}
644 644
645int __ext2_write_begin(struct file *file, struct address_space *mapping,
646 loff_t pos, unsigned len, unsigned flags,
647 struct page **pagep, void **fsdata)
648{
649 return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
650 ext2_get_block);
651}
652
645static int 653static int
646ext2_prepare_write(struct file *file, struct page *page, 654ext2_write_begin(struct file *file, struct address_space *mapping,
647 unsigned from, unsigned to) 655 loff_t pos, unsigned len, unsigned flags,
656 struct page **pagep, void **fsdata)
648{ 657{
649 return block_prepare_write(page,from,to,ext2_get_block); 658 *pagep = NULL;
659 return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata);
650} 660}
651 661
652static int 662static int
653ext2_nobh_prepare_write(struct file *file, struct page *page, 663ext2_nobh_write_begin(struct file *file, struct address_space *mapping,
654 unsigned from, unsigned to) 664 loff_t pos, unsigned len, unsigned flags,
665 struct page **pagep, void **fsdata)
655{ 666{
656 return nobh_prepare_write(page,from,to,ext2_get_block); 667 /*
668 * Dir-in-pagecache still uses ext2_write_begin. Would have to rework
669 * directory handling code to pass around offsets rather than struct
670 * pages in order to make this work easily.
671 */
672 return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
673 ext2_get_block);
657} 674}
658 675
659static int ext2_nobh_writepage(struct page *page, 676static int ext2_nobh_writepage(struct page *page,
@@ -689,8 +706,8 @@ const struct address_space_operations ext2_aops = {
689 .readpages = ext2_readpages, 706 .readpages = ext2_readpages,
690 .writepage = ext2_writepage, 707 .writepage = ext2_writepage,
691 .sync_page = block_sync_page, 708 .sync_page = block_sync_page,
692 .prepare_write = ext2_prepare_write, 709 .write_begin = ext2_write_begin,
693 .commit_write = generic_commit_write, 710 .write_end = generic_write_end,
694 .bmap = ext2_bmap, 711 .bmap = ext2_bmap,
695 .direct_IO = ext2_direct_IO, 712 .direct_IO = ext2_direct_IO,
696 .writepages = ext2_writepages, 713 .writepages = ext2_writepages,
@@ -707,8 +724,8 @@ const struct address_space_operations ext2_nobh_aops = {
707 .readpages = ext2_readpages, 724 .readpages = ext2_readpages,
708 .writepage = ext2_nobh_writepage, 725 .writepage = ext2_nobh_writepage,
709 .sync_page = block_sync_page, 726 .sync_page = block_sync_page,
710 .prepare_write = ext2_nobh_prepare_write, 727 .write_begin = ext2_nobh_write_begin,
711 .commit_write = nobh_commit_write, 728 .write_end = nobh_write_end,
712 .bmap = ext2_bmap, 729 .bmap = ext2_bmap,
713 .direct_IO = ext2_direct_IO, 730 .direct_IO = ext2_direct_IO,
714 .writepages = ext2_writepages, 731 .writepages = ext2_writepages,
@@ -925,7 +942,8 @@ void ext2_truncate (struct inode * inode)
925 if (mapping_is_xip(inode->i_mapping)) 942 if (mapping_is_xip(inode->i_mapping))
926 xip_truncate_page(inode->i_mapping, inode->i_size); 943 xip_truncate_page(inode->i_mapping, inode->i_size);
927 else if (test_opt(inode->i_sb, NOBH)) 944 else if (test_opt(inode->i_sb, NOBH))
928 nobh_truncate_page(inode->i_mapping, inode->i_size); 945 nobh_truncate_page(inode->i_mapping,
946 inode->i_size, ext2_get_block);
929 else 947 else
930 block_truncate_page(inode->i_mapping, 948 block_truncate_page(inode->i_mapping,
931 inode->i_size, ext2_get_block); 949 inode->i_size, ext2_get_block);
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index c00723a99f44..c2c3491b18cf 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -143,7 +143,7 @@ static int ext3_readdir(struct file * filp,
143 sb->s_bdev->bd_inode->i_mapping, 143 sb->s_bdev->bd_inode->i_mapping,
144 &filp->f_ra, filp, 144 &filp->f_ra, filp,
145 index, 1); 145 index, 1);
146 filp->f_ra.prev_index = index; 146 filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
147 bh = ext3_bread(NULL, inode, blk, 0, &err); 147 bh = ext3_bread(NULL, inode, blk, 0, &err);
148 } 148 }
149 149
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index de4e3161e479..2f2b6864db10 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1147,51 +1147,68 @@ static int do_journal_get_write_access(handle_t *handle,
1147 return ext3_journal_get_write_access(handle, bh); 1147 return ext3_journal_get_write_access(handle, bh);
1148} 1148}
1149 1149
1150static int ext3_prepare_write(struct file *file, struct page *page, 1150static int ext3_write_begin(struct file *file, struct address_space *mapping,
1151 unsigned from, unsigned to) 1151 loff_t pos, unsigned len, unsigned flags,
1152 struct page **pagep, void **fsdata)
1152{ 1153{
1153 struct inode *inode = page->mapping->host; 1154 struct inode *inode = mapping->host;
1154 int ret, needed_blocks = ext3_writepage_trans_blocks(inode); 1155 int ret, needed_blocks = ext3_writepage_trans_blocks(inode);
1155 handle_t *handle; 1156 handle_t *handle;
1156 int retries = 0; 1157 int retries = 0;
1158 struct page *page;
1159 pgoff_t index;
1160 unsigned from, to;
1161
1162 index = pos >> PAGE_CACHE_SHIFT;
1163 from = pos & (PAGE_CACHE_SIZE - 1);
1164 to = from + len;
1157 1165
1158retry: 1166retry:
1167 page = __grab_cache_page(mapping, index);
1168 if (!page)
1169 return -ENOMEM;
1170 *pagep = page;
1171
1159 handle = ext3_journal_start(inode, needed_blocks); 1172 handle = ext3_journal_start(inode, needed_blocks);
1160 if (IS_ERR(handle)) { 1173 if (IS_ERR(handle)) {
1174 unlock_page(page);
1175 page_cache_release(page);
1161 ret = PTR_ERR(handle); 1176 ret = PTR_ERR(handle);
1162 goto out; 1177 goto out;
1163 } 1178 }
1164 if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode)) 1179 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
1165 ret = nobh_prepare_write(page, from, to, ext3_get_block); 1180 ext3_get_block);
1166 else
1167 ret = block_prepare_write(page, from, to, ext3_get_block);
1168 if (ret) 1181 if (ret)
1169 goto prepare_write_failed; 1182 goto write_begin_failed;
1170 1183
1171 if (ext3_should_journal_data(inode)) { 1184 if (ext3_should_journal_data(inode)) {
1172 ret = walk_page_buffers(handle, page_buffers(page), 1185 ret = walk_page_buffers(handle, page_buffers(page),
1173 from, to, NULL, do_journal_get_write_access); 1186 from, to, NULL, do_journal_get_write_access);
1174 } 1187 }
1175prepare_write_failed: 1188write_begin_failed:
1176 if (ret) 1189 if (ret) {
1177 ext3_journal_stop(handle); 1190 ext3_journal_stop(handle);
1191 unlock_page(page);
1192 page_cache_release(page);
1193 }
1178 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) 1194 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
1179 goto retry; 1195 goto retry;
1180out: 1196out:
1181 return ret; 1197 return ret;
1182} 1198}
1183 1199
1200
1184int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh) 1201int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1185{ 1202{
1186 int err = journal_dirty_data(handle, bh); 1203 int err = journal_dirty_data(handle, bh);
1187 if (err) 1204 if (err)
1188 ext3_journal_abort_handle(__FUNCTION__, __FUNCTION__, 1205 ext3_journal_abort_handle(__FUNCTION__, __FUNCTION__,
1189 bh, handle,err); 1206 bh, handle, err);
1190 return err; 1207 return err;
1191} 1208}
1192 1209
1193/* For commit_write() in data=journal mode */ 1210/* For write_end() in data=journal mode */
1194static int commit_write_fn(handle_t *handle, struct buffer_head *bh) 1211static int write_end_fn(handle_t *handle, struct buffer_head *bh)
1195{ 1212{
1196 if (!buffer_mapped(bh) || buffer_freed(bh)) 1213 if (!buffer_mapped(bh) || buffer_freed(bh))
1197 return 0; 1214 return 0;
@@ -1200,84 +1217,130 @@ static int commit_write_fn(handle_t *handle, struct buffer_head *bh)
1200} 1217}
1201 1218
1202/* 1219/*
1220 * Generic write_end handler for ordered and writeback ext3 journal modes.
1221 * We can't use generic_write_end, because that unlocks the page and we need to
1222 * unlock the page after ext3_journal_stop, but ext3_journal_stop must run
1223 * after block_write_end.
1224 */
1225static int ext3_generic_write_end(struct file *file,
1226 struct address_space *mapping,
1227 loff_t pos, unsigned len, unsigned copied,
1228 struct page *page, void *fsdata)
1229{
1230 struct inode *inode = file->f_mapping->host;
1231
1232 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
1233
1234 if (pos+copied > inode->i_size) {
1235 i_size_write(inode, pos+copied);
1236 mark_inode_dirty(inode);
1237 }
1238
1239 return copied;
1240}
1241
1242/*
1203 * We need to pick up the new inode size which generic_commit_write gave us 1243 * We need to pick up the new inode size which generic_commit_write gave us
1204 * `file' can be NULL - eg, when called from page_symlink(). 1244 * `file' can be NULL - eg, when called from page_symlink().
1205 * 1245 *
1206 * ext3 never places buffers on inode->i_mapping->private_list. metadata 1246 * ext3 never places buffers on inode->i_mapping->private_list. metadata
1207 * buffers are managed internally. 1247 * buffers are managed internally.
1208 */ 1248 */
1209static int ext3_ordered_commit_write(struct file *file, struct page *page, 1249static int ext3_ordered_write_end(struct file *file,
1210 unsigned from, unsigned to) 1250 struct address_space *mapping,
1251 loff_t pos, unsigned len, unsigned copied,
1252 struct page *page, void *fsdata)
1211{ 1253{
1212 handle_t *handle = ext3_journal_current_handle(); 1254 handle_t *handle = ext3_journal_current_handle();
1213 struct inode *inode = page->mapping->host; 1255 struct inode *inode = file->f_mapping->host;
1256 unsigned from, to;
1214 int ret = 0, ret2; 1257 int ret = 0, ret2;
1215 1258
1259 from = pos & (PAGE_CACHE_SIZE - 1);
1260 to = from + len;
1261
1216 ret = walk_page_buffers(handle, page_buffers(page), 1262 ret = walk_page_buffers(handle, page_buffers(page),
1217 from, to, NULL, ext3_journal_dirty_data); 1263 from, to, NULL, ext3_journal_dirty_data);
1218 1264
1219 if (ret == 0) { 1265 if (ret == 0) {
1220 /* 1266 /*
1221 * generic_commit_write() will run mark_inode_dirty() if i_size 1267 * generic_write_end() will run mark_inode_dirty() if i_size
1222 * changes. So let's piggyback the i_disksize mark_inode_dirty 1268 * changes. So let's piggyback the i_disksize mark_inode_dirty
1223 * into that. 1269 * into that.
1224 */ 1270 */
1225 loff_t new_i_size; 1271 loff_t new_i_size;
1226 1272
1227 new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; 1273 new_i_size = pos + copied;
1228 if (new_i_size > EXT3_I(inode)->i_disksize) 1274 if (new_i_size > EXT3_I(inode)->i_disksize)
1229 EXT3_I(inode)->i_disksize = new_i_size; 1275 EXT3_I(inode)->i_disksize = new_i_size;
1230 ret = generic_commit_write(file, page, from, to); 1276 copied = ext3_generic_write_end(file, mapping, pos, len, copied,
1277 page, fsdata);
1278 if (copied < 0)
1279 ret = copied;
1231 } 1280 }
1232 ret2 = ext3_journal_stop(handle); 1281 ret2 = ext3_journal_stop(handle);
1233 if (!ret) 1282 if (!ret)
1234 ret = ret2; 1283 ret = ret2;
1235 return ret; 1284 unlock_page(page);
1285 page_cache_release(page);
1286
1287 return ret ? ret : copied;
1236} 1288}
1237 1289
1238static int ext3_writeback_commit_write(struct file *file, struct page *page, 1290static int ext3_writeback_write_end(struct file *file,
1239 unsigned from, unsigned to) 1291 struct address_space *mapping,
1292 loff_t pos, unsigned len, unsigned copied,
1293 struct page *page, void *fsdata)
1240{ 1294{
1241 handle_t *handle = ext3_journal_current_handle(); 1295 handle_t *handle = ext3_journal_current_handle();
1242 struct inode *inode = page->mapping->host; 1296 struct inode *inode = file->f_mapping->host;
1243 int ret = 0, ret2; 1297 int ret = 0, ret2;
1244 loff_t new_i_size; 1298 loff_t new_i_size;
1245 1299
1246 new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; 1300 new_i_size = pos + copied;
1247 if (new_i_size > EXT3_I(inode)->i_disksize) 1301 if (new_i_size > EXT3_I(inode)->i_disksize)
1248 EXT3_I(inode)->i_disksize = new_i_size; 1302 EXT3_I(inode)->i_disksize = new_i_size;
1249 1303
1250 if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode)) 1304 copied = ext3_generic_write_end(file, mapping, pos, len, copied,
1251 ret = nobh_commit_write(file, page, from, to); 1305 page, fsdata);
1252 else 1306 if (copied < 0)
1253 ret = generic_commit_write(file, page, from, to); 1307 ret = copied;
1254 1308
1255 ret2 = ext3_journal_stop(handle); 1309 ret2 = ext3_journal_stop(handle);
1256 if (!ret) 1310 if (!ret)
1257 ret = ret2; 1311 ret = ret2;
1258 return ret; 1312 unlock_page(page);
1313 page_cache_release(page);
1314
1315 return ret ? ret : copied;
1259} 1316}
1260 1317
1261static int ext3_journalled_commit_write(struct file *file, 1318static int ext3_journalled_write_end(struct file *file,
1262 struct page *page, unsigned from, unsigned to) 1319 struct address_space *mapping,
1320 loff_t pos, unsigned len, unsigned copied,
1321 struct page *page, void *fsdata)
1263{ 1322{
1264 handle_t *handle = ext3_journal_current_handle(); 1323 handle_t *handle = ext3_journal_current_handle();
1265 struct inode *inode = page->mapping->host; 1324 struct inode *inode = mapping->host;
1266 int ret = 0, ret2; 1325 int ret = 0, ret2;
1267 int partial = 0; 1326 int partial = 0;
1268 loff_t pos; 1327 unsigned from, to;
1269 1328
1270 /* 1329 from = pos & (PAGE_CACHE_SIZE - 1);
1271 * Here we duplicate the generic_commit_write() functionality 1330 to = from + len;
1272 */ 1331
1273 pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; 1332 if (copied < len) {
1333 if (!PageUptodate(page))
1334 copied = 0;
1335 page_zero_new_buffers(page, from+copied, to);
1336 }
1274 1337
1275 ret = walk_page_buffers(handle, page_buffers(page), from, 1338 ret = walk_page_buffers(handle, page_buffers(page), from,
1276 to, &partial, commit_write_fn); 1339 to, &partial, write_end_fn);
1277 if (!partial) 1340 if (!partial)
1278 SetPageUptodate(page); 1341 SetPageUptodate(page);
1279 if (pos > inode->i_size) 1342 if (pos+copied > inode->i_size)
1280 i_size_write(inode, pos); 1343 i_size_write(inode, pos+copied);
1281 EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; 1344 EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
1282 if (inode->i_size > EXT3_I(inode)->i_disksize) { 1345 if (inode->i_size > EXT3_I(inode)->i_disksize) {
1283 EXT3_I(inode)->i_disksize = inode->i_size; 1346 EXT3_I(inode)->i_disksize = inode->i_size;
@@ -1285,10 +1348,14 @@ static int ext3_journalled_commit_write(struct file *file,
1285 if (!ret) 1348 if (!ret)
1286 ret = ret2; 1349 ret = ret2;
1287 } 1350 }
1351
1288 ret2 = ext3_journal_stop(handle); 1352 ret2 = ext3_journal_stop(handle);
1289 if (!ret) 1353 if (!ret)
1290 ret = ret2; 1354 ret = ret2;
1291 return ret; 1355 unlock_page(page);
1356 page_cache_release(page);
1357
1358 return ret ? ret : copied;
1292} 1359}
1293 1360
1294/* 1361/*
@@ -1546,7 +1613,7 @@ static int ext3_journalled_writepage(struct page *page,
1546 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); 1613 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);
1547 1614
1548 err = walk_page_buffers(handle, page_buffers(page), 0, 1615 err = walk_page_buffers(handle, page_buffers(page), 0,
1549 PAGE_CACHE_SIZE, NULL, commit_write_fn); 1616 PAGE_CACHE_SIZE, NULL, write_end_fn);
1550 if (ret == 0) 1617 if (ret == 0)
1551 ret = err; 1618 ret = err;
1552 EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; 1619 EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
@@ -1706,8 +1773,8 @@ static const struct address_space_operations ext3_ordered_aops = {
1706 .readpages = ext3_readpages, 1773 .readpages = ext3_readpages,
1707 .writepage = ext3_ordered_writepage, 1774 .writepage = ext3_ordered_writepage,
1708 .sync_page = block_sync_page, 1775 .sync_page = block_sync_page,
1709 .prepare_write = ext3_prepare_write, 1776 .write_begin = ext3_write_begin,
1710 .commit_write = ext3_ordered_commit_write, 1777 .write_end = ext3_ordered_write_end,
1711 .bmap = ext3_bmap, 1778 .bmap = ext3_bmap,
1712 .invalidatepage = ext3_invalidatepage, 1779 .invalidatepage = ext3_invalidatepage,
1713 .releasepage = ext3_releasepage, 1780 .releasepage = ext3_releasepage,
@@ -1720,8 +1787,8 @@ static const struct address_space_operations ext3_writeback_aops = {
1720 .readpages = ext3_readpages, 1787 .readpages = ext3_readpages,
1721 .writepage = ext3_writeback_writepage, 1788 .writepage = ext3_writeback_writepage,
1722 .sync_page = block_sync_page, 1789 .sync_page = block_sync_page,
1723 .prepare_write = ext3_prepare_write, 1790 .write_begin = ext3_write_begin,
1724 .commit_write = ext3_writeback_commit_write, 1791 .write_end = ext3_writeback_write_end,
1725 .bmap = ext3_bmap, 1792 .bmap = ext3_bmap,
1726 .invalidatepage = ext3_invalidatepage, 1793 .invalidatepage = ext3_invalidatepage,
1727 .releasepage = ext3_releasepage, 1794 .releasepage = ext3_releasepage,
@@ -1734,8 +1801,8 @@ static const struct address_space_operations ext3_journalled_aops = {
1734 .readpages = ext3_readpages, 1801 .readpages = ext3_readpages,
1735 .writepage = ext3_journalled_writepage, 1802 .writepage = ext3_journalled_writepage,
1736 .sync_page = block_sync_page, 1803 .sync_page = block_sync_page,
1737 .prepare_write = ext3_prepare_write, 1804 .write_begin = ext3_write_begin,
1738 .commit_write = ext3_journalled_commit_write, 1805 .write_end = ext3_journalled_write_end,
1739 .set_page_dirty = ext3_journalled_set_page_dirty, 1806 .set_page_dirty = ext3_journalled_set_page_dirty,
1740 .bmap = ext3_bmap, 1807 .bmap = ext3_bmap,
1741 .invalidatepage = ext3_invalidatepage, 1808 .invalidatepage = ext3_invalidatepage,
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 3ab01c04e00c..e11890acfa21 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -142,7 +142,7 @@ static int ext4_readdir(struct file * filp,
142 sb->s_bdev->bd_inode->i_mapping, 142 sb->s_bdev->bd_inode->i_mapping,
143 &filp->f_ra, filp, 143 &filp->f_ra, filp,
144 index, 1); 144 index, 1);
145 filp->f_ra.prev_index = index; 145 filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
146 bh = ext4_bread(NULL, inode, blk, 0, &err); 146 bh = ext4_bread(NULL, inode, blk, 0, &err);
147 } 147 }
148 148
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a4848e04a5ed..0df2b1e06d0b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1146,34 +1146,50 @@ static int do_journal_get_write_access(handle_t *handle,
1146 return ext4_journal_get_write_access(handle, bh); 1146 return ext4_journal_get_write_access(handle, bh);
1147} 1147}
1148 1148
1149static int ext4_prepare_write(struct file *file, struct page *page, 1149static int ext4_write_begin(struct file *file, struct address_space *mapping,
1150 unsigned from, unsigned to) 1150 loff_t pos, unsigned len, unsigned flags,
1151 struct page **pagep, void **fsdata)
1151{ 1152{
1152 struct inode *inode = page->mapping->host; 1153 struct inode *inode = mapping->host;
1153 int ret, needed_blocks = ext4_writepage_trans_blocks(inode); 1154 int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
1154 handle_t *handle; 1155 handle_t *handle;
1155 int retries = 0; 1156 int retries = 0;
1157 struct page *page;
1158 pgoff_t index;
1159 unsigned from, to;
1160
1161 index = pos >> PAGE_CACHE_SHIFT;
1162 from = pos & (PAGE_CACHE_SIZE - 1);
1163 to = from + len;
1156 1164
1157retry: 1165retry:
1158 handle = ext4_journal_start(inode, needed_blocks); 1166 page = __grab_cache_page(mapping, index);
1159 if (IS_ERR(handle)) { 1167 if (!page)
1160 ret = PTR_ERR(handle); 1168 return -ENOMEM;
1161 goto out; 1169 *pagep = page;
1170
1171 handle = ext4_journal_start(inode, needed_blocks);
1172 if (IS_ERR(handle)) {
1173 unlock_page(page);
1174 page_cache_release(page);
1175 ret = PTR_ERR(handle);
1176 goto out;
1162 } 1177 }
1163 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
1164 ret = nobh_prepare_write(page, from, to, ext4_get_block);
1165 else
1166 ret = block_prepare_write(page, from, to, ext4_get_block);
1167 if (ret)
1168 goto prepare_write_failed;
1169 1178
1170 if (ext4_should_journal_data(inode)) { 1179 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
1180 ext4_get_block);
1181
1182 if (!ret && ext4_should_journal_data(inode)) {
1171 ret = walk_page_buffers(handle, page_buffers(page), 1183 ret = walk_page_buffers(handle, page_buffers(page),
1172 from, to, NULL, do_journal_get_write_access); 1184 from, to, NULL, do_journal_get_write_access);
1173 } 1185 }
1174prepare_write_failed: 1186
1175 if (ret) 1187 if (ret) {
1176 ext4_journal_stop(handle); 1188 ext4_journal_stop(handle);
1189 unlock_page(page);
1190 page_cache_release(page);
1191 }
1192
1177 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 1193 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
1178 goto retry; 1194 goto retry;
1179out: 1195out:
@@ -1185,12 +1201,12 @@ int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1185 int err = jbd2_journal_dirty_data(handle, bh); 1201 int err = jbd2_journal_dirty_data(handle, bh);
1186 if (err) 1202 if (err)
1187 ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__, 1203 ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__,
1188 bh, handle,err); 1204 bh, handle, err);
1189 return err; 1205 return err;
1190} 1206}
1191 1207
1192/* For commit_write() in data=journal mode */ 1208/* For write_end() in data=journal mode */
1193static int commit_write_fn(handle_t *handle, struct buffer_head *bh) 1209static int write_end_fn(handle_t *handle, struct buffer_head *bh)
1194{ 1210{
1195 if (!buffer_mapped(bh) || buffer_freed(bh)) 1211 if (!buffer_mapped(bh) || buffer_freed(bh))
1196 return 0; 1212 return 0;
@@ -1199,84 +1215,130 @@ static int commit_write_fn(handle_t *handle, struct buffer_head *bh)
1199} 1215}
1200 1216
1201/* 1217/*
1218 * Generic write_end handler for ordered and writeback ext4 journal modes.
1219 * We can't use generic_write_end, because that unlocks the page and we need to
1220 * unlock the page after ext4_journal_stop, but ext4_journal_stop must run
1221 * after block_write_end.
1222 */
1223static int ext4_generic_write_end(struct file *file,
1224 struct address_space *mapping,
1225 loff_t pos, unsigned len, unsigned copied,
1226 struct page *page, void *fsdata)
1227{
1228 struct inode *inode = file->f_mapping->host;
1229
1230 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
1231
1232 if (pos+copied > inode->i_size) {
1233 i_size_write(inode, pos+copied);
1234 mark_inode_dirty(inode);
1235 }
1236
1237 return copied;
1238}
1239
1240/*
1202 * We need to pick up the new inode size which generic_commit_write gave us 1241 * We need to pick up the new inode size which generic_commit_write gave us
1203 * `file' can be NULL - eg, when called from page_symlink(). 1242 * `file' can be NULL - eg, when called from page_symlink().
1204 * 1243 *
1205 * ext4 never places buffers on inode->i_mapping->private_list. metadata 1244 * ext4 never places buffers on inode->i_mapping->private_list. metadata
1206 * buffers are managed internally. 1245 * buffers are managed internally.
1207 */ 1246 */
1208static int ext4_ordered_commit_write(struct file *file, struct page *page, 1247static int ext4_ordered_write_end(struct file *file,
1209 unsigned from, unsigned to) 1248 struct address_space *mapping,
1249 loff_t pos, unsigned len, unsigned copied,
1250 struct page *page, void *fsdata)
1210{ 1251{
1211 handle_t *handle = ext4_journal_current_handle(); 1252 handle_t *handle = ext4_journal_current_handle();
1212 struct inode *inode = page->mapping->host; 1253 struct inode *inode = file->f_mapping->host;
1254 unsigned from, to;
1213 int ret = 0, ret2; 1255 int ret = 0, ret2;
1214 1256
1257 from = pos & (PAGE_CACHE_SIZE - 1);
1258 to = from + len;
1259
1215 ret = walk_page_buffers(handle, page_buffers(page), 1260 ret = walk_page_buffers(handle, page_buffers(page),
1216 from, to, NULL, ext4_journal_dirty_data); 1261 from, to, NULL, ext4_journal_dirty_data);
1217 1262
1218 if (ret == 0) { 1263 if (ret == 0) {
1219 /* 1264 /*
1220 * generic_commit_write() will run mark_inode_dirty() if i_size 1265 * generic_write_end() will run mark_inode_dirty() if i_size
1221 * changes. So let's piggyback the i_disksize mark_inode_dirty 1266 * changes. So let's piggyback the i_disksize mark_inode_dirty
1222 * into that. 1267 * into that.
1223 */ 1268 */
1224 loff_t new_i_size; 1269 loff_t new_i_size;
1225 1270
1226 new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; 1271 new_i_size = pos + copied;
1227 if (new_i_size > EXT4_I(inode)->i_disksize) 1272 if (new_i_size > EXT4_I(inode)->i_disksize)
1228 EXT4_I(inode)->i_disksize = new_i_size; 1273 EXT4_I(inode)->i_disksize = new_i_size;
1229 ret = generic_commit_write(file, page, from, to); 1274 copied = ext4_generic_write_end(file, mapping, pos, len, copied,
1275 page, fsdata);
1276 if (copied < 0)
1277 ret = copied;
1230 } 1278 }
1231 ret2 = ext4_journal_stop(handle); 1279 ret2 = ext4_journal_stop(handle);
1232 if (!ret) 1280 if (!ret)
1233 ret = ret2; 1281 ret = ret2;
1234 return ret; 1282 unlock_page(page);
1283 page_cache_release(page);
1284
1285 return ret ? ret : copied;
1235} 1286}
1236 1287
1237static int ext4_writeback_commit_write(struct file *file, struct page *page, 1288static int ext4_writeback_write_end(struct file *file,
1238 unsigned from, unsigned to) 1289 struct address_space *mapping,
1290 loff_t pos, unsigned len, unsigned copied,
1291 struct page *page, void *fsdata)
1239{ 1292{
1240 handle_t *handle = ext4_journal_current_handle(); 1293 handle_t *handle = ext4_journal_current_handle();
1241 struct inode *inode = page->mapping->host; 1294 struct inode *inode = file->f_mapping->host;
1242 int ret = 0, ret2; 1295 int ret = 0, ret2;
1243 loff_t new_i_size; 1296 loff_t new_i_size;
1244 1297
1245 new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; 1298 new_i_size = pos + copied;
1246 if (new_i_size > EXT4_I(inode)->i_disksize) 1299 if (new_i_size > EXT4_I(inode)->i_disksize)
1247 EXT4_I(inode)->i_disksize = new_i_size; 1300 EXT4_I(inode)->i_disksize = new_i_size;
1248 1301
1249 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) 1302 copied = ext4_generic_write_end(file, mapping, pos, len, copied,
1250 ret = nobh_commit_write(file, page, from, to); 1303 page, fsdata);
1251 else 1304 if (copied < 0)
1252 ret = generic_commit_write(file, page, from, to); 1305 ret = copied;
1253 1306
1254 ret2 = ext4_journal_stop(handle); 1307 ret2 = ext4_journal_stop(handle);
1255 if (!ret) 1308 if (!ret)
1256 ret = ret2; 1309 ret = ret2;
1257 return ret; 1310 unlock_page(page);
1311 page_cache_release(page);
1312
1313 return ret ? ret : copied;
1258} 1314}
1259 1315
1260static int ext4_journalled_commit_write(struct file *file, 1316static int ext4_journalled_write_end(struct file *file,
1261 struct page *page, unsigned from, unsigned to) 1317 struct address_space *mapping,
1318 loff_t pos, unsigned len, unsigned copied,
1319 struct page *page, void *fsdata)
1262{ 1320{
1263 handle_t *handle = ext4_journal_current_handle(); 1321 handle_t *handle = ext4_journal_current_handle();
1264 struct inode *inode = page->mapping->host; 1322 struct inode *inode = mapping->host;
1265 int ret = 0, ret2; 1323 int ret = 0, ret2;
1266 int partial = 0; 1324 int partial = 0;
1267 loff_t pos; 1325 unsigned from, to;
1268 1326
1269 /* 1327 from = pos & (PAGE_CACHE_SIZE - 1);
1270 * Here we duplicate the generic_commit_write() functionality 1328 to = from + len;
1271 */ 1329
1272 pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; 1330 if (copied < len) {
1331 if (!PageUptodate(page))
1332 copied = 0;
1333 page_zero_new_buffers(page, from+copied, to);
1334 }
1273 1335
1274 ret = walk_page_buffers(handle, page_buffers(page), from, 1336 ret = walk_page_buffers(handle, page_buffers(page), from,
1275 to, &partial, commit_write_fn); 1337 to, &partial, write_end_fn);
1276 if (!partial) 1338 if (!partial)
1277 SetPageUptodate(page); 1339 SetPageUptodate(page);
1278 if (pos > inode->i_size) 1340 if (pos+copied > inode->i_size)
1279 i_size_write(inode, pos); 1341 i_size_write(inode, pos+copied);
1280 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; 1342 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
1281 if (inode->i_size > EXT4_I(inode)->i_disksize) { 1343 if (inode->i_size > EXT4_I(inode)->i_disksize) {
1282 EXT4_I(inode)->i_disksize = inode->i_size; 1344 EXT4_I(inode)->i_disksize = inode->i_size;
@@ -1284,10 +1346,14 @@ static int ext4_journalled_commit_write(struct file *file,
1284 if (!ret) 1346 if (!ret)
1285 ret = ret2; 1347 ret = ret2;
1286 } 1348 }
1349
1287 ret2 = ext4_journal_stop(handle); 1350 ret2 = ext4_journal_stop(handle);
1288 if (!ret) 1351 if (!ret)
1289 ret = ret2; 1352 ret = ret2;
1290 return ret; 1353 unlock_page(page);
1354 page_cache_release(page);
1355
1356 return ret ? ret : copied;
1291} 1357}
1292 1358
1293/* 1359/*
@@ -1545,7 +1611,7 @@ static int ext4_journalled_writepage(struct page *page,
1545 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); 1611 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);
1546 1612
1547 err = walk_page_buffers(handle, page_buffers(page), 0, 1613 err = walk_page_buffers(handle, page_buffers(page), 0,
1548 PAGE_CACHE_SIZE, NULL, commit_write_fn); 1614 PAGE_CACHE_SIZE, NULL, write_end_fn);
1549 if (ret == 0) 1615 if (ret == 0)
1550 ret = err; 1616 ret = err;
1551 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; 1617 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
@@ -1705,8 +1771,8 @@ static const struct address_space_operations ext4_ordered_aops = {
1705 .readpages = ext4_readpages, 1771 .readpages = ext4_readpages,
1706 .writepage = ext4_ordered_writepage, 1772 .writepage = ext4_ordered_writepage,
1707 .sync_page = block_sync_page, 1773 .sync_page = block_sync_page,
1708 .prepare_write = ext4_prepare_write, 1774 .write_begin = ext4_write_begin,
1709 .commit_write = ext4_ordered_commit_write, 1775 .write_end = ext4_ordered_write_end,
1710 .bmap = ext4_bmap, 1776 .bmap = ext4_bmap,
1711 .invalidatepage = ext4_invalidatepage, 1777 .invalidatepage = ext4_invalidatepage,
1712 .releasepage = ext4_releasepage, 1778 .releasepage = ext4_releasepage,
@@ -1719,8 +1785,8 @@ static const struct address_space_operations ext4_writeback_aops = {
1719 .readpages = ext4_readpages, 1785 .readpages = ext4_readpages,
1720 .writepage = ext4_writeback_writepage, 1786 .writepage = ext4_writeback_writepage,
1721 .sync_page = block_sync_page, 1787 .sync_page = block_sync_page,
1722 .prepare_write = ext4_prepare_write, 1788 .write_begin = ext4_write_begin,
1723 .commit_write = ext4_writeback_commit_write, 1789 .write_end = ext4_writeback_write_end,
1724 .bmap = ext4_bmap, 1790 .bmap = ext4_bmap,
1725 .invalidatepage = ext4_invalidatepage, 1791 .invalidatepage = ext4_invalidatepage,
1726 .releasepage = ext4_releasepage, 1792 .releasepage = ext4_releasepage,
@@ -1733,8 +1799,8 @@ static const struct address_space_operations ext4_journalled_aops = {
1733 .readpages = ext4_readpages, 1799 .readpages = ext4_readpages,
1734 .writepage = ext4_journalled_writepage, 1800 .writepage = ext4_journalled_writepage,
1735 .sync_page = block_sync_page, 1801 .sync_page = block_sync_page,
1736 .prepare_write = ext4_prepare_write, 1802 .write_begin = ext4_write_begin,
1737 .commit_write = ext4_journalled_commit_write, 1803 .write_end = ext4_journalled_write_end,
1738 .set_page_dirty = ext4_journalled_set_page_dirty, 1804 .set_page_dirty = ext4_journalled_set_page_dirty,
1739 .bmap = ext4_bmap, 1805 .bmap = ext4_bmap,
1740 .invalidatepage = ext4_invalidatepage, 1806 .invalidatepage = ext4_invalidatepage,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 4baa5f205368..46b8a67f55c6 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -141,19 +141,24 @@ static int fat_readpages(struct file *file, struct address_space *mapping,
141 return mpage_readpages(mapping, pages, nr_pages, fat_get_block); 141 return mpage_readpages(mapping, pages, nr_pages, fat_get_block);
142} 142}
143 143
144static int fat_prepare_write(struct file *file, struct page *page, 144static int fat_write_begin(struct file *file, struct address_space *mapping,
145 unsigned from, unsigned to) 145 loff_t pos, unsigned len, unsigned flags,
146 struct page **pagep, void **fsdata)
146{ 147{
147 return cont_prepare_write(page, from, to, fat_get_block, 148 *pagep = NULL;
148 &MSDOS_I(page->mapping->host)->mmu_private); 149 return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
150 fat_get_block,
151 &MSDOS_I(mapping->host)->mmu_private);
149} 152}
150 153
151static int fat_commit_write(struct file *file, struct page *page, 154static int fat_write_end(struct file *file, struct address_space *mapping,
152 unsigned from, unsigned to) 155 loff_t pos, unsigned len, unsigned copied,
156 struct page *pagep, void *fsdata)
153{ 157{
154 struct inode *inode = page->mapping->host; 158 struct inode *inode = mapping->host;
155 int err = generic_commit_write(file, page, from, to); 159 int err;
156 if (!err && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) { 160 err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata);
161 if (!(err < 0) && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) {
157 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; 162 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
158 MSDOS_I(inode)->i_attrs |= ATTR_ARCH; 163 MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
159 mark_inode_dirty(inode); 164 mark_inode_dirty(inode);
@@ -202,8 +207,8 @@ static const struct address_space_operations fat_aops = {
202 .writepage = fat_writepage, 207 .writepage = fat_writepage,
203 .writepages = fat_writepages, 208 .writepages = fat_writepages,
204 .sync_page = block_sync_page, 209 .sync_page = block_sync_page,
205 .prepare_write = fat_prepare_write, 210 .write_begin = fat_write_begin,
206 .commit_write = fat_commit_write, 211 .write_end = fat_write_end,
207 .direct_IO = fat_direct_IO, 212 .direct_IO = fat_direct_IO,
208 .bmap = _fat_bmap 213 .bmap = _fat_bmap
209}; 214};
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f79de7c8cdfa..11f22a3d728a 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -444,22 +444,25 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file,
444 return outarg.size; 444 return outarg.size;
445} 445}
446 446
447static int fuse_prepare_write(struct file *file, struct page *page, 447static int fuse_write_begin(struct file *file, struct address_space *mapping,
448 unsigned offset, unsigned to) 448 loff_t pos, unsigned len, unsigned flags,
449 struct page **pagep, void **fsdata)
449{ 450{
450 /* No op */ 451 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
452
453 *pagep = __grab_cache_page(mapping, index);
454 if (!*pagep)
455 return -ENOMEM;
451 return 0; 456 return 0;
452} 457}
453 458
454static int fuse_commit_write(struct file *file, struct page *page, 459static int fuse_buffered_write(struct file *file, struct inode *inode,
455 unsigned offset, unsigned to) 460 loff_t pos, unsigned count, struct page *page)
456{ 461{
457 int err; 462 int err;
458 size_t nres; 463 size_t nres;
459 unsigned count = to - offset;
460 struct inode *inode = page->mapping->host;
461 struct fuse_conn *fc = get_fuse_conn(inode); 464 struct fuse_conn *fc = get_fuse_conn(inode);
462 loff_t pos = page_offset(page) + offset; 465 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
463 struct fuse_req *req; 466 struct fuse_req *req;
464 467
465 if (is_bad_inode(inode)) 468 if (is_bad_inode(inode))
@@ -475,20 +478,35 @@ static int fuse_commit_write(struct file *file, struct page *page,
475 nres = fuse_send_write(req, file, inode, pos, count); 478 nres = fuse_send_write(req, file, inode, pos, count);
476 err = req->out.h.error; 479 err = req->out.h.error;
477 fuse_put_request(fc, req); 480 fuse_put_request(fc, req);
478 if (!err && nres != count) 481 if (!err && !nres)
479 err = -EIO; 482 err = -EIO;
480 if (!err) { 483 if (!err) {
481 pos += count; 484 pos += nres;
482 spin_lock(&fc->lock); 485 spin_lock(&fc->lock);
483 if (pos > inode->i_size) 486 if (pos > inode->i_size)
484 i_size_write(inode, pos); 487 i_size_write(inode, pos);
485 spin_unlock(&fc->lock); 488 spin_unlock(&fc->lock);
486 489
487 if (offset == 0 && to == PAGE_CACHE_SIZE) 490 if (count == PAGE_CACHE_SIZE)
488 SetPageUptodate(page); 491 SetPageUptodate(page);
489 } 492 }
490 fuse_invalidate_attr(inode); 493 fuse_invalidate_attr(inode);
491 return err; 494 return err ? err : nres;
495}
496
497static int fuse_write_end(struct file *file, struct address_space *mapping,
498 loff_t pos, unsigned len, unsigned copied,
499 struct page *page, void *fsdata)
500{
501 struct inode *inode = mapping->host;
502 int res = 0;
503
504 if (copied)
505 res = fuse_buffered_write(file, inode, pos, copied, page);
506
507 unlock_page(page);
508 page_cache_release(page);
509 return res;
492} 510}
493 511
494static void fuse_release_user_pages(struct fuse_req *req, int write) 512static void fuse_release_user_pages(struct fuse_req *req, int write)
@@ -819,8 +837,8 @@ static const struct file_operations fuse_direct_io_file_operations = {
819 837
820static const struct address_space_operations fuse_file_aops = { 838static const struct address_space_operations fuse_file_aops = {
821 .readpage = fuse_readpage, 839 .readpage = fuse_readpage,
822 .prepare_write = fuse_prepare_write, 840 .write_begin = fuse_write_begin,
823 .commit_write = fuse_commit_write, 841 .write_end = fuse_write_end,
824 .readpages = fuse_readpages, 842 .readpages = fuse_readpages,
825 .set_page_dirty = fuse_set_page_dirty, 843 .set_page_dirty = fuse_set_page_dirty,
826 .bmap = fuse_bmap, 844 .bmap = fuse_bmap,
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 873a511ef2be..9679f8b9870d 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -17,6 +17,7 @@
17#include <linux/mpage.h> 17#include <linux/mpage.h>
18#include <linux/fs.h> 18#include <linux/fs.h>
19#include <linux/writeback.h> 19#include <linux/writeback.h>
20#include <linux/swap.h>
20#include <linux/gfs2_ondisk.h> 21#include <linux/gfs2_ondisk.h>
21#include <linux/lm_interface.h> 22#include <linux/lm_interface.h>
22 23
@@ -349,45 +350,49 @@ out_unlock:
349} 350}
350 351
351/** 352/**
352 * gfs2_prepare_write - Prepare to write a page to a file 353 * gfs2_write_begin - Begin to write to a file
353 * @file: The file to write to 354 * @file: The file to write to
354 * @page: The page which is to be prepared for writing 355 * @mapping: The mapping in which to write
355 * @from: From (byte range within page) 356 * @pos: The file offset at which to start writing
356 * @to: To (byte range within page) 357 * @len: Length of the write
358 * @flags: Various flags
359 * @pagep: Pointer to return the page
360 * @fsdata: Pointer to return fs data (unused by GFS2)
357 * 361 *
358 * Returns: errno 362 * Returns: errno
359 */ 363 */
360 364
361static int gfs2_prepare_write(struct file *file, struct page *page, 365static int gfs2_write_begin(struct file *file, struct address_space *mapping,
362 unsigned from, unsigned to) 366 loff_t pos, unsigned len, unsigned flags,
367 struct page **pagep, void **fsdata)
363{ 368{
364 struct gfs2_inode *ip = GFS2_I(page->mapping->host); 369 struct gfs2_inode *ip = GFS2_I(mapping->host);
365 struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); 370 struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
366 unsigned int data_blocks, ind_blocks, rblocks; 371 unsigned int data_blocks, ind_blocks, rblocks;
367 int alloc_required; 372 int alloc_required;
368 int error = 0; 373 int error = 0;
369 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + from;
370 loff_t end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
371 struct gfs2_alloc *al; 374 struct gfs2_alloc *al;
372 unsigned int write_len = to - from; 375 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
376 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
377 unsigned to = from + len;
378 struct page *page;
373 379
374 380 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh);
375 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh);
376 error = gfs2_glock_nq_atime(&ip->i_gh); 381 error = gfs2_glock_nq_atime(&ip->i_gh);
377 if (unlikely(error)) { 382 if (unlikely(error))
378 if (error == GLR_TRYFAILED) {
379 unlock_page(page);
380 error = AOP_TRUNCATED_PAGE;
381 yield();
382 }
383 goto out_uninit; 383 goto out_uninit;
384 }
385 384
386 gfs2_write_calc_reserv(ip, write_len, &data_blocks, &ind_blocks); 385 error = -ENOMEM;
386 page = __grab_cache_page(mapping, index);
387 *pagep = page;
388 if (!page)
389 goto out_unlock;
390
391 gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
387 392
388 error = gfs2_write_alloc_required(ip, pos, write_len, &alloc_required); 393 error = gfs2_write_alloc_required(ip, pos, len, &alloc_required);
389 if (error) 394 if (error)
390 goto out_unlock; 395 goto out_putpage;
391 396
392 397
393 ip->i_alloc.al_requested = 0; 398 ip->i_alloc.al_requested = 0;
@@ -420,7 +425,7 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
420 goto out_trans_fail; 425 goto out_trans_fail;
421 426
422 if (gfs2_is_stuffed(ip)) { 427 if (gfs2_is_stuffed(ip)) {
423 if (end > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { 428 if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
424 error = gfs2_unstuff_dinode(ip, page); 429 error = gfs2_unstuff_dinode(ip, page);
425 if (error == 0) 430 if (error == 0)
426 goto prepare_write; 431 goto prepare_write;
@@ -443,6 +448,10 @@ out_qunlock:
443out_alloc_put: 448out_alloc_put:
444 gfs2_alloc_put(ip); 449 gfs2_alloc_put(ip);
445 } 450 }
451out_putpage:
452 page_cache_release(page);
453 if (pos + len > ip->i_inode.i_size)
454 vmtruncate(&ip->i_inode, ip->i_inode.i_size);
446out_unlock: 455out_unlock:
447 gfs2_glock_dq_m(1, &ip->i_gh); 456 gfs2_glock_dq_m(1, &ip->i_gh);
448out_uninit: 457out_uninit:
@@ -478,65 +487,117 @@ static void adjust_fs_space(struct inode *inode)
478} 487}
479 488
480/** 489/**
481 * gfs2_commit_write - Commit write to a file 490 * gfs2_stuffed_write_end - Write end for stuffed files
491 * @inode: The inode
492 * @dibh: The buffer_head containing the on-disk inode
493 * @pos: The file position
494 * @len: The length of the write
495 * @copied: How much was actually copied by the VFS
496 * @page: The page
497 *
498 * This copies the data from the page into the inode block after
499 * the inode data structure itself.
500 *
501 * Returns: errno
502 */
503static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
504 loff_t pos, unsigned len, unsigned copied,
505 struct page *page)
506{
507 struct gfs2_inode *ip = GFS2_I(inode);
508 struct gfs2_sbd *sdp = GFS2_SB(inode);
509 u64 to = pos + copied;
510 void *kaddr;
511 unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
512 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
513
514 BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode)));
515 kaddr = kmap_atomic(page, KM_USER0);
516 memcpy(buf + pos, kaddr + pos, copied);
517 memset(kaddr + pos + copied, 0, len - copied);
518 flush_dcache_page(page);
519 kunmap_atomic(kaddr, KM_USER0);
520
521 if (!PageUptodate(page))
522 SetPageUptodate(page);
523 unlock_page(page);
524 page_cache_release(page);
525
526 if (inode->i_size < to) {
527 i_size_write(inode, to);
528 ip->i_di.di_size = inode->i_size;
529 di->di_size = cpu_to_be64(inode->i_size);
530 mark_inode_dirty(inode);
531 }
532
533 if (inode == sdp->sd_rindex)
534 adjust_fs_space(inode);
535
536 brelse(dibh);
537 gfs2_trans_end(sdp);
538 gfs2_glock_dq(&ip->i_gh);
539 gfs2_holder_uninit(&ip->i_gh);
540 return copied;
541}
542
543/**
544 * gfs2_write_end
482 * @file: The file to write to 545 * @file: The file to write to
483 * @page: The page containing the data 546 * @mapping: The address space to write to
484 * @from: From (byte range within page) 547 * @pos: The file position
485 * @to: To (byte range within page) 548 * @len: The length of the data
549 * @copied:
550 * @page: The page that has been written
551 * @fsdata: The fsdata (unused in GFS2)
552 *
553 * The main write_end function for GFS2. We have a separate one for
554 * stuffed files as they are slightly different, otherwise we just
555 * put our locking around the VFS provided functions.
486 * 556 *
487 * Returns: errno 557 * Returns: errno
488 */ 558 */
489 559
490static int gfs2_commit_write(struct file *file, struct page *page, 560static int gfs2_write_end(struct file *file, struct address_space *mapping,
491 unsigned from, unsigned to) 561 loff_t pos, unsigned len, unsigned copied,
562 struct page *page, void *fsdata)
492{ 563{
493 struct inode *inode = page->mapping->host; 564 struct inode *inode = page->mapping->host;
494 struct gfs2_inode *ip = GFS2_I(inode); 565 struct gfs2_inode *ip = GFS2_I(inode);
495 struct gfs2_sbd *sdp = GFS2_SB(inode); 566 struct gfs2_sbd *sdp = GFS2_SB(inode);
496 int error = -EOPNOTSUPP;
497 struct buffer_head *dibh; 567 struct buffer_head *dibh;
498 struct gfs2_alloc *al = &ip->i_alloc; 568 struct gfs2_alloc *al = &ip->i_alloc;
499 struct gfs2_dinode *di; 569 struct gfs2_dinode *di;
570 unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
571 unsigned int to = from + len;
572 int ret;
500 573
501 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_locked_by_me(ip->i_gl))) 574 BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == 0);
502 goto fail_nounlock;
503 575
504 error = gfs2_meta_inode_buffer(ip, &dibh); 576 ret = gfs2_meta_inode_buffer(ip, &dibh);
505 if (error) 577 if (unlikely(ret)) {
506 goto fail_endtrans; 578 unlock_page(page);
579 page_cache_release(page);
580 goto failed;
581 }
507 582
508 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 583 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
509 di = (struct gfs2_dinode *)dibh->b_data;
510
511 if (gfs2_is_stuffed(ip)) {
512 u64 file_size;
513 void *kaddr;
514 584
515 file_size = ((u64)page->index << PAGE_CACHE_SHIFT) + to; 585 if (gfs2_is_stuffed(ip))
586 return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
516 587
517 kaddr = kmap_atomic(page, KM_USER0); 588 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
518 memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from, 589 gfs2_page_add_databufs(ip, page, from, to);
519 kaddr + from, to - from);
520 kunmap_atomic(kaddr, KM_USER0);
521 590
522 SetPageUptodate(page); 591 ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
523 592
524 if (inode->i_size < file_size) { 593 if (likely(ret >= 0)) {
525 i_size_write(inode, file_size); 594 copied = ret;
595 if ((pos + copied) > inode->i_size) {
596 di = (struct gfs2_dinode *)dibh->b_data;
597 ip->i_di.di_size = inode->i_size;
598 di->di_size = cpu_to_be64(inode->i_size);
526 mark_inode_dirty(inode); 599 mark_inode_dirty(inode);
527 } 600 }
528 } else {
529 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED ||
530 gfs2_is_jdata(ip))
531 gfs2_page_add_databufs(ip, page, from, to);
532 error = generic_commit_write(file, page, from, to);
533 if (error)
534 goto fail;
535 }
536
537 if (ip->i_di.di_size < inode->i_size) {
538 ip->i_di.di_size = inode->i_size;
539 di->di_size = cpu_to_be64(inode->i_size);
540 } 601 }
541 602
542 if (inode == sdp->sd_rindex) 603 if (inode == sdp->sd_rindex)
@@ -544,33 +605,15 @@ static int gfs2_commit_write(struct file *file, struct page *page,
544 605
545 brelse(dibh); 606 brelse(dibh);
546 gfs2_trans_end(sdp); 607 gfs2_trans_end(sdp);
608failed:
547 if (al->al_requested) { 609 if (al->al_requested) {
548 gfs2_inplace_release(ip); 610 gfs2_inplace_release(ip);
549 gfs2_quota_unlock(ip); 611 gfs2_quota_unlock(ip);
550 gfs2_alloc_put(ip); 612 gfs2_alloc_put(ip);
551 } 613 }
552 unlock_page(page); 614 gfs2_glock_dq(&ip->i_gh);
553 gfs2_glock_dq_m(1, &ip->i_gh);
554 lock_page(page);
555 gfs2_holder_uninit(&ip->i_gh); 615 gfs2_holder_uninit(&ip->i_gh);
556 return 0; 616 return ret;
557
558fail:
559 brelse(dibh);
560fail_endtrans:
561 gfs2_trans_end(sdp);
562 if (al->al_requested) {
563 gfs2_inplace_release(ip);
564 gfs2_quota_unlock(ip);
565 gfs2_alloc_put(ip);
566 }
567 unlock_page(page);
568 gfs2_glock_dq_m(1, &ip->i_gh);
569 lock_page(page);
570 gfs2_holder_uninit(&ip->i_gh);
571fail_nounlock:
572 ClearPageUptodate(page);
573 return error;
574} 617}
575 618
576/** 619/**
@@ -799,8 +842,8 @@ const struct address_space_operations gfs2_file_aops = {
799 .readpage = gfs2_readpage, 842 .readpage = gfs2_readpage,
800 .readpages = gfs2_readpages, 843 .readpages = gfs2_readpages,
801 .sync_page = block_sync_page, 844 .sync_page = block_sync_page,
802 .prepare_write = gfs2_prepare_write, 845 .write_begin = gfs2_write_begin,
803 .commit_write = gfs2_commit_write, 846 .write_end = gfs2_write_end,
804 .set_page_dirty = gfs2_set_page_dirty, 847 .set_page_dirty = gfs2_set_page_dirty,
805 .bmap = gfs2_bmap, 848 .bmap = gfs2_bmap,
806 .invalidatepage = gfs2_invalidatepage, 849 .invalidatepage = gfs2_invalidatepage,
diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c
index 5ea6b3d45eaa..c176f67ba0a5 100644
--- a/fs/hfs/extent.c
+++ b/fs/hfs/extent.c
@@ -464,23 +464,20 @@ void hfs_file_truncate(struct inode *inode)
464 (long long)HFS_I(inode)->phys_size, inode->i_size); 464 (long long)HFS_I(inode)->phys_size, inode->i_size);
465 if (inode->i_size > HFS_I(inode)->phys_size) { 465 if (inode->i_size > HFS_I(inode)->phys_size) {
466 struct address_space *mapping = inode->i_mapping; 466 struct address_space *mapping = inode->i_mapping;
467 void *fsdata;
467 struct page *page; 468 struct page *page;
468 int res; 469 int res;
469 470
471 /* XXX: Can use generic_cont_expand? */
470 size = inode->i_size - 1; 472 size = inode->i_size - 1;
471 page = grab_cache_page(mapping, size >> PAGE_CACHE_SHIFT); 473 res = pagecache_write_begin(NULL, mapping, size+1, 0,
472 if (!page) 474 AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
473 return; 475 if (!res) {
474 size &= PAGE_CACHE_SIZE - 1; 476 res = pagecache_write_end(NULL, mapping, size+1, 0, 0,
475 size++; 477 page, fsdata);
476 res = mapping->a_ops->prepare_write(NULL, page, size, size); 478 }
477 if (!res)
478 res = mapping->a_ops->commit_write(NULL, page, size, size);
479 if (res) 479 if (res)
480 inode->i_size = HFS_I(inode)->phys_size; 480 inode->i_size = HFS_I(inode)->phys_size;
481 unlock_page(page);
482 page_cache_release(page);
483 mark_inode_dirty(inode);
484 return; 481 return;
485 } else if (inode->i_size == HFS_I(inode)->phys_size) 482 } else if (inode->i_size == HFS_I(inode)->phys_size)
486 return; 483 return;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index bc835f272a6e..97f8446c4ff4 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -35,10 +35,14 @@ static int hfs_readpage(struct file *file, struct page *page)
35 return block_read_full_page(page, hfs_get_block); 35 return block_read_full_page(page, hfs_get_block);
36} 36}
37 37
38static int hfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) 38static int hfs_write_begin(struct file *file, struct address_space *mapping,
39 loff_t pos, unsigned len, unsigned flags,
40 struct page **pagep, void **fsdata)
39{ 41{
40 return cont_prepare_write(page, from, to, hfs_get_block, 42 *pagep = NULL;
41 &HFS_I(page->mapping->host)->phys_size); 43 return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
44 hfs_get_block,
45 &HFS_I(mapping->host)->phys_size);
42} 46}
43 47
44static sector_t hfs_bmap(struct address_space *mapping, sector_t block) 48static sector_t hfs_bmap(struct address_space *mapping, sector_t block)
@@ -119,8 +123,8 @@ const struct address_space_operations hfs_btree_aops = {
119 .readpage = hfs_readpage, 123 .readpage = hfs_readpage,
120 .writepage = hfs_writepage, 124 .writepage = hfs_writepage,
121 .sync_page = block_sync_page, 125 .sync_page = block_sync_page,
122 .prepare_write = hfs_prepare_write, 126 .write_begin = hfs_write_begin,
123 .commit_write = generic_commit_write, 127 .write_end = generic_write_end,
124 .bmap = hfs_bmap, 128 .bmap = hfs_bmap,
125 .releasepage = hfs_releasepage, 129 .releasepage = hfs_releasepage,
126}; 130};
@@ -129,8 +133,8 @@ const struct address_space_operations hfs_aops = {
129 .readpage = hfs_readpage, 133 .readpage = hfs_readpage,
130 .writepage = hfs_writepage, 134 .writepage = hfs_writepage,
131 .sync_page = block_sync_page, 135 .sync_page = block_sync_page,
132 .prepare_write = hfs_prepare_write, 136 .write_begin = hfs_write_begin,
133 .commit_write = generic_commit_write, 137 .write_end = generic_write_end,
134 .bmap = hfs_bmap, 138 .bmap = hfs_bmap,
135 .direct_IO = hfs_direct_IO, 139 .direct_IO = hfs_direct_IO,
136 .writepages = hfs_writepages, 140 .writepages = hfs_writepages,
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 1a7480089e82..12e899cd7886 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -443,21 +443,18 @@ void hfsplus_file_truncate(struct inode *inode)
443 if (inode->i_size > HFSPLUS_I(inode).phys_size) { 443 if (inode->i_size > HFSPLUS_I(inode).phys_size) {
444 struct address_space *mapping = inode->i_mapping; 444 struct address_space *mapping = inode->i_mapping;
445 struct page *page; 445 struct page *page;
446 u32 size = inode->i_size - 1; 446 void *fsdata;
447 u32 size = inode->i_size;
447 int res; 448 int res;
448 449
449 page = grab_cache_page(mapping, size >> PAGE_CACHE_SHIFT); 450 res = pagecache_write_begin(NULL, mapping, size, 0,
450 if (!page) 451 AOP_FLAG_UNINTERRUPTIBLE,
451 return; 452 &page, &fsdata);
452 size &= PAGE_CACHE_SIZE - 1;
453 size++;
454 res = mapping->a_ops->prepare_write(NULL, page, size, size);
455 if (!res)
456 res = mapping->a_ops->commit_write(NULL, page, size, size);
457 if (res) 453 if (res)
458 inode->i_size = HFSPLUS_I(inode).phys_size; 454 return;
459 unlock_page(page); 455 res = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
460 page_cache_release(page); 456 if (res < 0)
457 return;
461 mark_inode_dirty(inode); 458 mark_inode_dirty(inode);
462 return; 459 return;
463 } else if (inode->i_size == HFSPLUS_I(inode).phys_size) 460 } else if (inode->i_size == HFSPLUS_I(inode).phys_size)
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 6f7c662174db..37744cf3706a 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -27,10 +27,14 @@ static int hfsplus_writepage(struct page *page, struct writeback_control *wbc)
27 return block_write_full_page(page, hfsplus_get_block, wbc); 27 return block_write_full_page(page, hfsplus_get_block, wbc);
28} 28}
29 29
30static int hfsplus_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) 30static int hfsplus_write_begin(struct file *file, struct address_space *mapping,
31 loff_t pos, unsigned len, unsigned flags,
32 struct page **pagep, void **fsdata)
31{ 33{
32 return cont_prepare_write(page, from, to, hfsplus_get_block, 34 *pagep = NULL;
33 &HFSPLUS_I(page->mapping->host).phys_size); 35 return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
36 hfsplus_get_block,
37 &HFSPLUS_I(mapping->host).phys_size);
34} 38}
35 39
36static sector_t hfsplus_bmap(struct address_space *mapping, sector_t block) 40static sector_t hfsplus_bmap(struct address_space *mapping, sector_t block)
@@ -114,8 +118,8 @@ const struct address_space_operations hfsplus_btree_aops = {
114 .readpage = hfsplus_readpage, 118 .readpage = hfsplus_readpage,
115 .writepage = hfsplus_writepage, 119 .writepage = hfsplus_writepage,
116 .sync_page = block_sync_page, 120 .sync_page = block_sync_page,
117 .prepare_write = hfsplus_prepare_write, 121 .write_begin = hfsplus_write_begin,
118 .commit_write = generic_commit_write, 122 .write_end = generic_write_end,
119 .bmap = hfsplus_bmap, 123 .bmap = hfsplus_bmap,
120 .releasepage = hfsplus_releasepage, 124 .releasepage = hfsplus_releasepage,
121}; 125};
@@ -124,8 +128,8 @@ const struct address_space_operations hfsplus_aops = {
124 .readpage = hfsplus_readpage, 128 .readpage = hfsplus_readpage,
125 .writepage = hfsplus_writepage, 129 .writepage = hfsplus_writepage,
126 .sync_page = block_sync_page, 130 .sync_page = block_sync_page,
127 .prepare_write = hfsplus_prepare_write, 131 .write_begin = hfsplus_write_begin,
128 .commit_write = generic_commit_write, 132 .write_end = generic_write_end,
129 .bmap = hfsplus_bmap, 133 .bmap = hfsplus_bmap,
130 .direct_IO = hfsplus_direct_IO, 134 .direct_IO = hfsplus_direct_IO,
131 .writepages = hfsplus_writepages, 135 .writepages = hfsplus_writepages,
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 06e5930515fe..6ae9011b95eb 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -3,7 +3,8 @@
3 3
4#include "os.h" 4#include "os.h"
5 5
6/* These are exactly the same definitions as in fs.h, but the names are 6/*
7 * These are exactly the same definitions as in fs.h, but the names are
7 * changed so that this file can be included in both kernel and user files. 8 * changed so that this file can be included in both kernel and user files.
8 */ 9 */
9 10
@@ -21,7 +22,8 @@
21#define HOSTFS_ATTR_FORCE 512 /* Not a change, but a change it */ 22#define HOSTFS_ATTR_FORCE 512 /* Not a change, but a change it */
22#define HOSTFS_ATTR_ATTR_FLAG 1024 23#define HOSTFS_ATTR_ATTR_FLAG 1024
23 24
24/* If you are very careful, you'll notice that these two are missing: 25/*
26 * If you are very careful, you'll notice that these two are missing:
25 * 27 *
26 * #define ATTR_KILL_SUID 2048 28 * #define ATTR_KILL_SUID 2048
27 * #define ATTR_KILL_SGID 4096 29 * #define ATTR_KILL_SGID 4096
@@ -76,7 +78,8 @@ extern int make_symlink(const char *from, const char *to);
76extern int unlink_file(const char *file); 78extern int unlink_file(const char *file);
77extern int do_mkdir(const char *file, int mode); 79extern int do_mkdir(const char *file, int mode);
78extern int do_rmdir(const char *file); 80extern int do_rmdir(const char *file);
79extern int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor); 81extern int do_mknod(const char *file, int mode, unsigned int major,
82 unsigned int minor);
80extern int link_file(const char *from, const char *to); 83extern int link_file(const char *from, const char *to);
81extern int do_readlink(char *file, char *buf, int size); 84extern int do_readlink(char *file, char *buf, int size);
82extern int rename_file(char *from, char *to); 85extern int rename_file(char *from, char *to);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index c77862032e84..8966b050196e 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -6,21 +6,14 @@
6 * 2003-02-10 Petr Baudis <pasky@ucw.cz> 6 * 2003-02-10 Petr Baudis <pasky@ucw.cz>
7 */ 7 */
8 8
9#include <linux/stddef.h>
10#include <linux/fs.h> 9#include <linux/fs.h>
11#include <linux/module.h> 10#include <linux/module.h>
12#include <linux/init.h> 11#include <linux/mm.h>
13#include <linux/slab.h>
14#include <linux/pagemap.h> 12#include <linux/pagemap.h>
15#include <linux/blkdev.h>
16#include <linux/list.h>
17#include <linux/statfs.h> 13#include <linux/statfs.h>
18#include <linux/kdev_t.h>
19#include <asm/uaccess.h>
20#include "hostfs.h" 14#include "hostfs.h"
21#include "kern_util.h"
22#include "kern.h"
23#include "init.h" 15#include "init.h"
16#include "kern.h"
24 17
25struct hostfs_inode_info { 18struct hostfs_inode_info {
26 char *host_filename; 19 char *host_filename;
@@ -61,18 +54,18 @@ static int __init hostfs_args(char *options, int *add)
61 char *ptr; 54 char *ptr;
62 55
63 ptr = strchr(options, ','); 56 ptr = strchr(options, ',');
64 if(ptr != NULL) 57 if (ptr != NULL)
65 *ptr++ = '\0'; 58 *ptr++ = '\0';
66 if(*options != '\0') 59 if (*options != '\0')
67 root_ino = options; 60 root_ino = options;
68 61
69 options = ptr; 62 options = ptr;
70 while(options){ 63 while (options) {
71 ptr = strchr(options, ','); 64 ptr = strchr(options, ',');
72 if(ptr != NULL) 65 if (ptr != NULL)
73 *ptr++ = '\0'; 66 *ptr++ = '\0';
74 if(*options != '\0'){ 67 if (*options != '\0') {
75 if(!strcmp(options, "append")) 68 if (!strcmp(options, "append"))
76 append = 1; 69 append = 1;
77 else printf("hostfs_args - unsupported option - %s\n", 70 else printf("hostfs_args - unsupported option - %s\n",
78 options); 71 options);
@@ -102,7 +95,7 @@ static char *dentry_name(struct dentry *dentry, int extra)
102 95
103 len = 0; 96 len = 0;
104 parent = dentry; 97 parent = dentry;
105 while(parent->d_parent != parent){ 98 while (parent->d_parent != parent) {
106 len += parent->d_name.len + 1; 99 len += parent->d_name.len + 1;
107 parent = parent->d_parent; 100 parent = parent->d_parent;
108 } 101 }
@@ -110,12 +103,12 @@ static char *dentry_name(struct dentry *dentry, int extra)
110 root = HOSTFS_I(parent->d_inode)->host_filename; 103 root = HOSTFS_I(parent->d_inode)->host_filename;
111 len += strlen(root); 104 len += strlen(root);
112 name = kmalloc(len + extra + 1, GFP_KERNEL); 105 name = kmalloc(len + extra + 1, GFP_KERNEL);
113 if(name == NULL) 106 if (name == NULL)
114 return NULL; 107 return NULL;
115 108
116 name[len] = '\0'; 109 name[len] = '\0';
117 parent = dentry; 110 parent = dentry;
118 while(parent->d_parent != parent){ 111 while (parent->d_parent != parent) {
119 len -= parent->d_name.len + 1; 112 len -= parent->d_name.len + 1;
120 name[len] = '/'; 113 name[len] = '/';
121 strncpy(&name[len + 1], parent->d_name.name, 114 strncpy(&name[len + 1], parent->d_name.name,
@@ -136,7 +129,8 @@ static char *inode_name(struct inode *ino, int extra)
136 129
137static int read_name(struct inode *ino, char *name) 130static int read_name(struct inode *ino, char *name)
138{ 131{
139 /* The non-int inode fields are copied into ints by stat_file and 132 /*
133 * The non-int inode fields are copied into ints by stat_file and
140 * then copied into the inode because passing the actual pointers 134 * then copied into the inode because passing the actual pointers
141 * in and having them treated as int * breaks on big-endian machines 135 * in and having them treated as int * breaks on big-endian machines
142 */ 136 */
@@ -149,7 +143,7 @@ static int read_name(struct inode *ino, char *name)
149 err = stat_file(name, &i_ino, &i_mode, &i_nlink, &ino->i_uid, 143 err = stat_file(name, &i_ino, &i_mode, &i_nlink, &ino->i_uid,
150 &ino->i_gid, &i_size, &ino->i_atime, &ino->i_mtime, 144 &ino->i_gid, &i_size, &ino->i_atime, &ino->i_mtime,
151 &ino->i_ctime, &i_blksize, &i_blocks, -1); 145 &ino->i_ctime, &i_blksize, &i_blocks, -1);
152 if(err) 146 if (err)
153 return err; 147 return err;
154 148
155 ino->i_ino = i_ino; 149 ino->i_ino = i_ino;
@@ -166,33 +160,33 @@ static char *follow_link(char *link)
166 char *name, *resolved, *end; 160 char *name, *resolved, *end;
167 161
168 len = 64; 162 len = 64;
169 while(1){ 163 while (1) {
170 n = -ENOMEM; 164 n = -ENOMEM;
171 name = kmalloc(len, GFP_KERNEL); 165 name = kmalloc(len, GFP_KERNEL);
172 if(name == NULL) 166 if (name == NULL)
173 goto out; 167 goto out;
174 168
175 n = do_readlink(link, name, len); 169 n = do_readlink(link, name, len);
176 if(n < len) 170 if (n < len)
177 break; 171 break;
178 len *= 2; 172 len *= 2;
179 kfree(name); 173 kfree(name);
180 } 174 }
181 if(n < 0) 175 if (n < 0)
182 goto out_free; 176 goto out_free;
183 177
184 if(*name == '/') 178 if (*name == '/')
185 return name; 179 return name;
186 180
187 end = strrchr(link, '/'); 181 end = strrchr(link, '/');
188 if(end == NULL) 182 if (end == NULL)
189 return name; 183 return name;
190 184
191 *(end + 1) = '\0'; 185 *(end + 1) = '\0';
192 len = strlen(link) + strlen(name) + 1; 186 len = strlen(link) + strlen(name) + 1;
193 187
194 resolved = kmalloc(len, GFP_KERNEL); 188 resolved = kmalloc(len, GFP_KERNEL);
195 if(resolved == NULL){ 189 if (resolved == NULL) {
196 n = -ENOMEM; 190 n = -ENOMEM;
197 goto out_free; 191 goto out_free;
198 } 192 }
@@ -213,20 +207,21 @@ static int read_inode(struct inode *ino)
213 char *name; 207 char *name;
214 int err = 0; 208 int err = 0;
215 209
216 /* Unfortunately, we are called from iget() when we don't have a dentry 210 /*
211 * Unfortunately, we are called from iget() when we don't have a dentry
217 * allocated yet. 212 * allocated yet.
218 */ 213 */
219 if(list_empty(&ino->i_dentry)) 214 if (list_empty(&ino->i_dentry))
220 goto out; 215 goto out;
221 216
222 err = -ENOMEM; 217 err = -ENOMEM;
223 name = inode_name(ino, 0); 218 name = inode_name(ino, 0);
224 if(name == NULL) 219 if (name == NULL)
225 goto out; 220 goto out;
226 221
227 if(file_type(name, NULL, NULL) == OS_TYPE_SYMLINK){ 222 if (file_type(name, NULL, NULL) == OS_TYPE_SYMLINK) {
228 name = follow_link(name); 223 name = follow_link(name);
229 if(IS_ERR(name)){ 224 if (IS_ERR(name)) {
230 err = PTR_ERR(name); 225 err = PTR_ERR(name);
231 goto out; 226 goto out;
232 } 227 }
@@ -240,7 +235,8 @@ static int read_inode(struct inode *ino)
240 235
241int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf) 236int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf)
242{ 237{
243 /* do_statfs uses struct statfs64 internally, but the linux kernel 238 /*
239 * do_statfs uses struct statfs64 internally, but the linux kernel
244 * struct statfs still has 32-bit versions for most of these fields, 240 * struct statfs still has 32-bit versions for most of these fields,
245 * so we convert them here 241 * so we convert them here
246 */ 242 */
@@ -255,7 +251,7 @@ int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf)
255 &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files, 251 &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files,
256 &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), 252 &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid),
257 &sf->f_namelen, sf->f_spare); 253 &sf->f_namelen, sf->f_spare);
258 if(err) 254 if (err)
259 return err; 255 return err;
260 sf->f_blocks = f_blocks; 256 sf->f_blocks = f_blocks;
261 sf->f_bfree = f_bfree; 257 sf->f_bfree = f_bfree;
@@ -271,7 +267,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
271 struct hostfs_inode_info *hi; 267 struct hostfs_inode_info *hi;
272 268
273 hi = kmalloc(sizeof(*hi), GFP_KERNEL); 269 hi = kmalloc(sizeof(*hi), GFP_KERNEL);
274 if(hi == NULL) 270 if (hi == NULL)
275 return NULL; 271 return NULL;
276 272
277 *hi = ((struct hostfs_inode_info) { .host_filename = NULL, 273 *hi = ((struct hostfs_inode_info) { .host_filename = NULL,
@@ -284,7 +280,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
284static void hostfs_delete_inode(struct inode *inode) 280static void hostfs_delete_inode(struct inode *inode)
285{ 281{
286 truncate_inode_pages(&inode->i_data, 0); 282 truncate_inode_pages(&inode->i_data, 0);
287 if(HOSTFS_I(inode)->fd != -1) { 283 if (HOSTFS_I(inode)->fd != -1) {
288 close_file(&HOSTFS_I(inode)->fd); 284 close_file(&HOSTFS_I(inode)->fd);
289 HOSTFS_I(inode)->fd = -1; 285 HOSTFS_I(inode)->fd = -1;
290 } 286 }
@@ -295,9 +291,11 @@ static void hostfs_destroy_inode(struct inode *inode)
295{ 291{
296 kfree(HOSTFS_I(inode)->host_filename); 292 kfree(HOSTFS_I(inode)->host_filename);
297 293
298 /*XXX: This should not happen, probably. The check is here for 294 /*
299 * additional safety.*/ 295 * XXX: This should not happen, probably. The check is here for
300 if(HOSTFS_I(inode)->fd != -1) { 296 * additional safety.
297 */
298 if (HOSTFS_I(inode)->fd != -1) {
301 close_file(&HOSTFS_I(inode)->fd); 299 close_file(&HOSTFS_I(inode)->fd);
302 printk(KERN_DEBUG "Closing host fd in .destroy_inode\n"); 300 printk(KERN_DEBUG "Closing host fd in .destroy_inode\n");
303 } 301 }
@@ -327,17 +325,17 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
327 int error, len; 325 int error, len;
328 326
329 name = dentry_name(file->f_path.dentry, 0); 327 name = dentry_name(file->f_path.dentry, 0);
330 if(name == NULL) 328 if (name == NULL)
331 return -ENOMEM; 329 return -ENOMEM;
332 dir = open_dir(name, &error); 330 dir = open_dir(name, &error);
333 kfree(name); 331 kfree(name);
334 if(dir == NULL) 332 if (dir == NULL)
335 return -error; 333 return -error;
336 next = file->f_pos; 334 next = file->f_pos;
337 while((name = read_dir(dir, &next, &ino, &len)) != NULL){ 335 while ((name = read_dir(dir, &next, &ino, &len)) != NULL) {
338 error = (*filldir)(ent, name, len, file->f_pos, 336 error = (*filldir)(ent, name, len, file->f_pos,
339 ino, DT_UNKNOWN); 337 ino, DT_UNKNOWN);
340 if(error) break; 338 if (error) break;
341 file->f_pos = next; 339 file->f_pos = next;
342 } 340 }
343 close_dir(dir); 341 close_dir(dir);
@@ -350,32 +348,33 @@ int hostfs_file_open(struct inode *ino, struct file *file)
350 int mode = 0, r = 0, w = 0, fd; 348 int mode = 0, r = 0, w = 0, fd;
351 349
352 mode = file->f_mode & (FMODE_READ | FMODE_WRITE); 350 mode = file->f_mode & (FMODE_READ | FMODE_WRITE);
353 if((mode & HOSTFS_I(ino)->mode) == mode) 351 if ((mode & HOSTFS_I(ino)->mode) == mode)
354 return 0; 352 return 0;
355 353
356 /* The file may already have been opened, but with the wrong access, 354 /*
355 * The file may already have been opened, but with the wrong access,
357 * so this resets things and reopens the file with the new access. 356 * so this resets things and reopens the file with the new access.
358 */ 357 */
359 if(HOSTFS_I(ino)->fd != -1){ 358 if (HOSTFS_I(ino)->fd != -1) {
360 close_file(&HOSTFS_I(ino)->fd); 359 close_file(&HOSTFS_I(ino)->fd);
361 HOSTFS_I(ino)->fd = -1; 360 HOSTFS_I(ino)->fd = -1;
362 } 361 }
363 362
364 HOSTFS_I(ino)->mode |= mode; 363 HOSTFS_I(ino)->mode |= mode;
365 if(HOSTFS_I(ino)->mode & FMODE_READ) 364 if (HOSTFS_I(ino)->mode & FMODE_READ)
366 r = 1; 365 r = 1;
367 if(HOSTFS_I(ino)->mode & FMODE_WRITE) 366 if (HOSTFS_I(ino)->mode & FMODE_WRITE)
368 w = 1; 367 w = 1;
369 if(w) 368 if (w)
370 r = 1; 369 r = 1;
371 370
372 name = dentry_name(file->f_path.dentry, 0); 371 name = dentry_name(file->f_path.dentry, 0);
373 if(name == NULL) 372 if (name == NULL)
374 return -ENOMEM; 373 return -ENOMEM;
375 374
376 fd = open_file(name, r, w, append); 375 fd = open_file(name, r, w, append);
377 kfree(name); 376 kfree(name);
378 if(fd < 0) 377 if (fd < 0)
379 return fd; 378 return fd;
380 FILE_HOSTFS_I(file)->fd = fd; 379 FILE_HOSTFS_I(file)->fd = fd;
381 380
@@ -423,7 +422,7 @@ int hostfs_writepage(struct page *page, struct writeback_control *wbc)
423 base = ((unsigned long long) page->index) << PAGE_CACHE_SHIFT; 422 base = ((unsigned long long) page->index) << PAGE_CACHE_SHIFT;
424 423
425 err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count); 424 err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count);
426 if(err != count){ 425 if (err != count) {
427 ClearPageUptodate(page); 426 ClearPageUptodate(page);
428 goto out; 427 goto out;
429 } 428 }
@@ -452,7 +451,8 @@ int hostfs_readpage(struct file *file, struct page *page)
452 buffer = kmap(page); 451 buffer = kmap(page);
453 err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer, 452 err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer,
454 PAGE_CACHE_SIZE); 453 PAGE_CACHE_SIZE);
455 if(err < 0) goto out; 454 if (err < 0)
455 goto out;
456 456
457 memset(&buffer[err], 0, PAGE_CACHE_SIZE - err); 457 memset(&buffer[err], 0, PAGE_CACHE_SIZE - err);
458 458
@@ -466,56 +466,43 @@ int hostfs_readpage(struct file *file, struct page *page)
466 return err; 466 return err;
467} 467}
468 468
469int hostfs_prepare_write(struct file *file, struct page *page, 469int hostfs_write_begin(struct file *file, struct address_space *mapping,
470 unsigned int from, unsigned int to) 470 loff_t pos, unsigned len, unsigned flags,
471 struct page **pagep, void **fsdata)
471{ 472{
472 char *buffer; 473 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
473 long long start, tmp;
474 int err;
475 474
476 start = (long long) page->index << PAGE_CACHE_SHIFT; 475 *pagep = __grab_cache_page(mapping, index);
477 buffer = kmap(page); 476 if (!*pagep)
478 if(from != 0){ 477 return -ENOMEM;
479 tmp = start; 478 return 0;
480 err = read_file(FILE_HOSTFS_I(file)->fd, &tmp, buffer,
481 from);
482 if(err < 0) goto out;
483 }
484 if(to != PAGE_CACHE_SIZE){
485 start += to;
486 err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer + to,
487 PAGE_CACHE_SIZE - to);
488 if(err < 0) goto out;
489 }
490 err = 0;
491 out:
492 kunmap(page);
493 return err;
494} 479}
495 480
496int hostfs_commit_write(struct file *file, struct page *page, unsigned from, 481int hostfs_write_end(struct file *file, struct address_space *mapping,
497 unsigned to) 482 loff_t pos, unsigned len, unsigned copied,
483 struct page *page, void *fsdata)
498{ 484{
499 struct address_space *mapping = page->mapping;
500 struct inode *inode = mapping->host; 485 struct inode *inode = mapping->host;
501 char *buffer; 486 void *buffer;
502 long long start; 487 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
503 int err = 0; 488 int err;
504 489
505 start = (((long long) page->index) << PAGE_CACHE_SHIFT) + from;
506 buffer = kmap(page); 490 buffer = kmap(page);
507 err = write_file(FILE_HOSTFS_I(file)->fd, &start, buffer + from, 491 err = write_file(FILE_HOSTFS_I(file)->fd, &pos, buffer + from, copied);
508 to - from); 492 kunmap(page);
509 if(err > 0) err = 0;
510 493
511 /* Actually, if !err, write_file has added to-from to start, so, despite 494 if (!PageUptodate(page) && err == PAGE_CACHE_SIZE)
512 * the appearance, we are comparing i_size against the _last_ written 495 SetPageUptodate(page);
513 * location, as we should. */
514 496
515 if(!err && (start > inode->i_size)) 497 /*
516 inode->i_size = start; 498 * If err > 0, write_file has added err to pos, so we are comparing
499 * i_size against the last byte written.
500 */
501 if (err > 0 && (pos > inode->i_size))
502 inode->i_size = pos;
503 unlock_page(page);
504 page_cache_release(page);
517 505
518 kunmap(page);
519 return err; 506 return err;
520} 507}
521 508
@@ -523,8 +510,8 @@ static const struct address_space_operations hostfs_aops = {
523 .writepage = hostfs_writepage, 510 .writepage = hostfs_writepage,
524 .readpage = hostfs_readpage, 511 .readpage = hostfs_readpage,
525 .set_page_dirty = __set_page_dirty_nobuffers, 512 .set_page_dirty = __set_page_dirty_nobuffers,
526 .prepare_write = hostfs_prepare_write, 513 .write_begin = hostfs_write_begin,
527 .commit_write = hostfs_commit_write 514 .write_end = hostfs_write_end,
528}; 515};
529 516
530static int init_inode(struct inode *inode, struct dentry *dentry) 517static int init_inode(struct inode *inode, struct dentry *dentry)
@@ -534,28 +521,28 @@ static int init_inode(struct inode *inode, struct dentry *dentry)
534 int maj, min; 521 int maj, min;
535 dev_t rdev = 0; 522 dev_t rdev = 0;
536 523
537 if(dentry){ 524 if (dentry) {
538 name = dentry_name(dentry, 0); 525 name = dentry_name(dentry, 0);
539 if(name == NULL) 526 if (name == NULL)
540 goto out; 527 goto out;
541 type = file_type(name, &maj, &min); 528 type = file_type(name, &maj, &min);
542 /*Reencode maj and min with the kernel encoding.*/ 529 /* Reencode maj and min with the kernel encoding.*/
543 rdev = MKDEV(maj, min); 530 rdev = MKDEV(maj, min);
544 kfree(name); 531 kfree(name);
545 } 532 }
546 else type = OS_TYPE_DIR; 533 else type = OS_TYPE_DIR;
547 534
548 err = 0; 535 err = 0;
549 if(type == OS_TYPE_SYMLINK) 536 if (type == OS_TYPE_SYMLINK)
550 inode->i_op = &page_symlink_inode_operations; 537 inode->i_op = &page_symlink_inode_operations;
551 else if(type == OS_TYPE_DIR) 538 else if (type == OS_TYPE_DIR)
552 inode->i_op = &hostfs_dir_iops; 539 inode->i_op = &hostfs_dir_iops;
553 else inode->i_op = &hostfs_iops; 540 else inode->i_op = &hostfs_iops;
554 541
555 if(type == OS_TYPE_DIR) inode->i_fop = &hostfs_dir_fops; 542 if (type == OS_TYPE_DIR) inode->i_fop = &hostfs_dir_fops;
556 else inode->i_fop = &hostfs_file_fops; 543 else inode->i_fop = &hostfs_file_fops;
557 544
558 if(type == OS_TYPE_SYMLINK) 545 if (type == OS_TYPE_SYMLINK)
559 inode->i_mapping->a_ops = &hostfs_link_aops; 546 inode->i_mapping->a_ops = &hostfs_link_aops;
560 else inode->i_mapping->a_ops = &hostfs_aops; 547 else inode->i_mapping->a_ops = &hostfs_aops;
561 548
@@ -578,7 +565,7 @@ static int init_inode(struct inode *inode, struct dentry *dentry)
578} 565}
579 566
580int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, 567int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
581 struct nameidata *nd) 568 struct nameidata *nd)
582{ 569{
583 struct inode *inode; 570 struct inode *inode;
584 char *name; 571 char *name;
@@ -586,27 +573,28 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
586 573
587 error = -ENOMEM; 574 error = -ENOMEM;
588 inode = iget(dir->i_sb, 0); 575 inode = iget(dir->i_sb, 0);
589 if(inode == NULL) goto out; 576 if (inode == NULL)
577 goto out;
590 578
591 error = init_inode(inode, dentry); 579 error = init_inode(inode, dentry);
592 if(error) 580 if (error)
593 goto out_put; 581 goto out_put;
594 582
595 error = -ENOMEM; 583 error = -ENOMEM;
596 name = dentry_name(dentry, 0); 584 name = dentry_name(dentry, 0);
597 if(name == NULL) 585 if (name == NULL)
598 goto out_put; 586 goto out_put;
599 587
600 fd = file_create(name, 588 fd = file_create(name,
601 mode & S_IRUSR, mode & S_IWUSR, mode & S_IXUSR, 589 mode & S_IRUSR, mode & S_IWUSR, mode & S_IXUSR,
602 mode & S_IRGRP, mode & S_IWGRP, mode & S_IXGRP, 590 mode & S_IRGRP, mode & S_IWGRP, mode & S_IXGRP,
603 mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH); 591 mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH);
604 if(fd < 0) 592 if (fd < 0)
605 error = fd; 593 error = fd;
606 else error = read_name(inode, name); 594 else error = read_name(inode, name);
607 595
608 kfree(name); 596 kfree(name);
609 if(error) 597 if (error)
610 goto out_put; 598 goto out_put;
611 599
612 HOSTFS_I(inode)->fd = fd; 600 HOSTFS_I(inode)->fd = fd;
@@ -629,25 +617,25 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
629 617
630 err = -ENOMEM; 618 err = -ENOMEM;
631 inode = iget(ino->i_sb, 0); 619 inode = iget(ino->i_sb, 0);
632 if(inode == NULL) 620 if (inode == NULL)
633 goto out; 621 goto out;
634 622
635 err = init_inode(inode, dentry); 623 err = init_inode(inode, dentry);
636 if(err) 624 if (err)
637 goto out_put; 625 goto out_put;
638 626
639 err = -ENOMEM; 627 err = -ENOMEM;
640 name = dentry_name(dentry, 0); 628 name = dentry_name(dentry, 0);
641 if(name == NULL) 629 if (name == NULL)
642 goto out_put; 630 goto out_put;
643 631
644 err = read_name(inode, name); 632 err = read_name(inode, name);
645 kfree(name); 633 kfree(name);
646 if(err == -ENOENT){ 634 if (err == -ENOENT) {
647 iput(inode); 635 iput(inode);
648 inode = NULL; 636 inode = NULL;
649 } 637 }
650 else if(err) 638 else if (err)
651 goto out_put; 639 goto out_put;
652 640
653 d_add(dentry, inode); 641 d_add(dentry, inode);
@@ -666,7 +654,7 @@ static char *inode_dentry_name(struct inode *ino, struct dentry *dentry)
666 int len; 654 int len;
667 655
668 file = inode_name(ino, dentry->d_name.len + 1); 656 file = inode_name(ino, dentry->d_name.len + 1);
669 if(file == NULL) 657 if (file == NULL)
670 return NULL; 658 return NULL;
671 strcat(file, "/"); 659 strcat(file, "/");
672 len = strlen(file); 660 len = strlen(file);
@@ -680,10 +668,10 @@ int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from)
680 char *from_name, *to_name; 668 char *from_name, *to_name;
681 int err; 669 int err;
682 670
683 if((from_name = inode_dentry_name(ino, from)) == NULL) 671 if ((from_name = inode_dentry_name(ino, from)) == NULL)
684 return -ENOMEM; 672 return -ENOMEM;
685 to_name = dentry_name(to, 0); 673 to_name = dentry_name(to, 0);
686 if(to_name == NULL){ 674 if (to_name == NULL) {
687 kfree(from_name); 675 kfree(from_name);
688 return -ENOMEM; 676 return -ENOMEM;
689 } 677 }
@@ -698,9 +686,9 @@ int hostfs_unlink(struct inode *ino, struct dentry *dentry)
698 char *file; 686 char *file;
699 int err; 687 int err;
700 688
701 if((file = inode_dentry_name(ino, dentry)) == NULL) 689 if ((file = inode_dentry_name(ino, dentry)) == NULL)
702 return -ENOMEM; 690 return -ENOMEM;
703 if(append) 691 if (append)
704 return -EPERM; 692 return -EPERM;
705 693
706 err = unlink_file(file); 694 err = unlink_file(file);
@@ -713,7 +701,7 @@ int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to)
713 char *file; 701 char *file;
714 int err; 702 int err;
715 703
716 if((file = inode_dentry_name(ino, dentry)) == NULL) 704 if ((file = inode_dentry_name(ino, dentry)) == NULL)
717 return -ENOMEM; 705 return -ENOMEM;
718 err = make_symlink(file, to); 706 err = make_symlink(file, to);
719 kfree(file); 707 kfree(file);
@@ -725,7 +713,7 @@ int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode)
725 char *file; 713 char *file;
726 int err; 714 int err;
727 715
728 if((file = inode_dentry_name(ino, dentry)) == NULL) 716 if ((file = inode_dentry_name(ino, dentry)) == NULL)
729 return -ENOMEM; 717 return -ENOMEM;
730 err = do_mkdir(file, mode); 718 err = do_mkdir(file, mode);
731 kfree(file); 719 kfree(file);
@@ -737,7 +725,7 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
737 char *file; 725 char *file;
738 int err; 726 int err;
739 727
740 if((file = inode_dentry_name(ino, dentry)) == NULL) 728 if ((file = inode_dentry_name(ino, dentry)) == NULL)
741 return -ENOMEM; 729 return -ENOMEM;
742 err = do_rmdir(file); 730 err = do_rmdir(file);
743 kfree(file); 731 kfree(file);
@@ -751,26 +739,26 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
751 int err = -ENOMEM; 739 int err = -ENOMEM;
752 740
753 inode = iget(dir->i_sb, 0); 741 inode = iget(dir->i_sb, 0);
754 if(inode == NULL) 742 if (inode == NULL)
755 goto out; 743 goto out;
756 744
757 err = init_inode(inode, dentry); 745 err = init_inode(inode, dentry);
758 if(err) 746 if (err)
759 goto out_put; 747 goto out_put;
760 748
761 err = -ENOMEM; 749 err = -ENOMEM;
762 name = dentry_name(dentry, 0); 750 name = dentry_name(dentry, 0);
763 if(name == NULL) 751 if (name == NULL)
764 goto out_put; 752 goto out_put;
765 753
766 init_special_inode(inode, mode, dev); 754 init_special_inode(inode, mode, dev);
767 err = do_mknod(name, mode, MAJOR(dev), MINOR(dev)); 755 err = do_mknod(name, mode, MAJOR(dev), MINOR(dev));
768 if(err) 756 if (err)
769 goto out_free; 757 goto out_free;
770 758
771 err = read_name(inode, name); 759 err = read_name(inode, name);
772 kfree(name); 760 kfree(name);
773 if(err) 761 if (err)
774 goto out_put; 762 goto out_put;
775 763
776 d_instantiate(dentry, inode); 764 d_instantiate(dentry, inode);
@@ -790,9 +778,9 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
790 char *from_name, *to_name; 778 char *from_name, *to_name;
791 int err; 779 int err;
792 780
793 if((from_name = inode_dentry_name(from_ino, from)) == NULL) 781 if ((from_name = inode_dentry_name(from_ino, from)) == NULL)
794 return -ENOMEM; 782 return -ENOMEM;
795 if((to_name = inode_dentry_name(to_ino, to)) == NULL){ 783 if ((to_name = inode_dentry_name(to_ino, to)) == NULL) {
796 kfree(from_name); 784 kfree(from_name);
797 return -ENOMEM; 785 return -ENOMEM;
798 } 786 }
@@ -815,12 +803,12 @@ int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd)
815 return -ENOMEM; 803 return -ENOMEM;
816 804
817 if (S_ISCHR(ino->i_mode) || S_ISBLK(ino->i_mode) || 805 if (S_ISCHR(ino->i_mode) || S_ISBLK(ino->i_mode) ||
818 S_ISFIFO(ino->i_mode) || S_ISSOCK(ino->i_mode)) 806 S_ISFIFO(ino->i_mode) || S_ISSOCK(ino->i_mode))
819 err = 0; 807 err = 0;
820 else 808 else
821 err = access_file(name, r, w, x); 809 err = access_file(name, r, w, x);
822 kfree(name); 810 kfree(name);
823 if(!err) 811 if (!err)
824 err = generic_permission(ino, desired, NULL); 812 err = generic_permission(ino, desired, NULL);
825 return err; 813 return err;
826} 814}
@@ -837,62 +825,55 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
837 if (err) 825 if (err)
838 return err; 826 return err;
839 827
840 if(append) 828 if (append)
841 attr->ia_valid &= ~ATTR_SIZE; 829 attr->ia_valid &= ~ATTR_SIZE;
842 830
843 attrs.ia_valid = 0; 831 attrs.ia_valid = 0;
844 if(attr->ia_valid & ATTR_MODE){ 832 if (attr->ia_valid & ATTR_MODE) {
845 attrs.ia_valid |= HOSTFS_ATTR_MODE; 833 attrs.ia_valid |= HOSTFS_ATTR_MODE;
846 attrs.ia_mode = attr->ia_mode; 834 attrs.ia_mode = attr->ia_mode;
847 } 835 }
848 if(attr->ia_valid & ATTR_UID){ 836 if (attr->ia_valid & ATTR_UID) {
849 attrs.ia_valid |= HOSTFS_ATTR_UID; 837 attrs.ia_valid |= HOSTFS_ATTR_UID;
850 attrs.ia_uid = attr->ia_uid; 838 attrs.ia_uid = attr->ia_uid;
851 } 839 }
852 if(attr->ia_valid & ATTR_GID){ 840 if (attr->ia_valid & ATTR_GID) {
853 attrs.ia_valid |= HOSTFS_ATTR_GID; 841 attrs.ia_valid |= HOSTFS_ATTR_GID;
854 attrs.ia_gid = attr->ia_gid; 842 attrs.ia_gid = attr->ia_gid;
855 } 843 }
856 if(attr->ia_valid & ATTR_SIZE){ 844 if (attr->ia_valid & ATTR_SIZE) {
857 attrs.ia_valid |= HOSTFS_ATTR_SIZE; 845 attrs.ia_valid |= HOSTFS_ATTR_SIZE;
858 attrs.ia_size = attr->ia_size; 846 attrs.ia_size = attr->ia_size;
859 } 847 }
860 if(attr->ia_valid & ATTR_ATIME){ 848 if (attr->ia_valid & ATTR_ATIME) {
861 attrs.ia_valid |= HOSTFS_ATTR_ATIME; 849 attrs.ia_valid |= HOSTFS_ATTR_ATIME;
862 attrs.ia_atime = attr->ia_atime; 850 attrs.ia_atime = attr->ia_atime;
863 } 851 }
864 if(attr->ia_valid & ATTR_MTIME){ 852 if (attr->ia_valid & ATTR_MTIME) {
865 attrs.ia_valid |= HOSTFS_ATTR_MTIME; 853 attrs.ia_valid |= HOSTFS_ATTR_MTIME;
866 attrs.ia_mtime = attr->ia_mtime; 854 attrs.ia_mtime = attr->ia_mtime;
867 } 855 }
868 if(attr->ia_valid & ATTR_CTIME){ 856 if (attr->ia_valid & ATTR_CTIME) {
869 attrs.ia_valid |= HOSTFS_ATTR_CTIME; 857 attrs.ia_valid |= HOSTFS_ATTR_CTIME;
870 attrs.ia_ctime = attr->ia_ctime; 858 attrs.ia_ctime = attr->ia_ctime;
871 } 859 }
872 if(attr->ia_valid & ATTR_ATIME_SET){ 860 if (attr->ia_valid & ATTR_ATIME_SET) {
873 attrs.ia_valid |= HOSTFS_ATTR_ATIME_SET; 861 attrs.ia_valid |= HOSTFS_ATTR_ATIME_SET;
874 } 862 }
875 if(attr->ia_valid & ATTR_MTIME_SET){ 863 if (attr->ia_valid & ATTR_MTIME_SET) {
876 attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET; 864 attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET;
877 } 865 }
878 name = dentry_name(dentry, 0); 866 name = dentry_name(dentry, 0);
879 if(name == NULL) 867 if (name == NULL)
880 return -ENOMEM; 868 return -ENOMEM;
881 err = set_attr(name, &attrs, fd); 869 err = set_attr(name, &attrs, fd);
882 kfree(name); 870 kfree(name);
883 if(err) 871 if (err)
884 return err; 872 return err;
885 873
886 return inode_setattr(dentry->d_inode, attr); 874 return inode_setattr(dentry->d_inode, attr);
887} 875}
888 876
889int hostfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
890 struct kstat *stat)
891{
892 generic_fillattr(dentry->d_inode, stat);
893 return 0;
894}
895
896static const struct inode_operations hostfs_iops = { 877static const struct inode_operations hostfs_iops = {
897 .create = hostfs_create, 878 .create = hostfs_create,
898 .link = hostfs_link, 879 .link = hostfs_link,
@@ -904,7 +885,6 @@ static const struct inode_operations hostfs_iops = {
904 .rename = hostfs_rename, 885 .rename = hostfs_rename,
905 .permission = hostfs_permission, 886 .permission = hostfs_permission,
906 .setattr = hostfs_setattr, 887 .setattr = hostfs_setattr,
907 .getattr = hostfs_getattr,
908}; 888};
909 889
910static const struct inode_operations hostfs_dir_iops = { 890static const struct inode_operations hostfs_dir_iops = {
@@ -919,7 +899,6 @@ static const struct inode_operations hostfs_dir_iops = {
919 .rename = hostfs_rename, 899 .rename = hostfs_rename,
920 .permission = hostfs_permission, 900 .permission = hostfs_permission,
921 .setattr = hostfs_setattr, 901 .setattr = hostfs_setattr,
922 .getattr = hostfs_getattr,
923}; 902};
924 903
925int hostfs_link_readpage(struct file *file, struct page *page) 904int hostfs_link_readpage(struct file *file, struct page *page)
@@ -929,13 +908,13 @@ int hostfs_link_readpage(struct file *file, struct page *page)
929 908
930 buffer = kmap(page); 909 buffer = kmap(page);
931 name = inode_name(page->mapping->host, 0); 910 name = inode_name(page->mapping->host, 0);
932 if(name == NULL) 911 if (name == NULL)
933 return -ENOMEM; 912 return -ENOMEM;
934 err = do_readlink(name, buffer, PAGE_CACHE_SIZE); 913 err = do_readlink(name, buffer, PAGE_CACHE_SIZE);
935 kfree(name); 914 kfree(name);
936 if(err == PAGE_CACHE_SIZE) 915 if (err == PAGE_CACHE_SIZE)
937 err = -E2BIG; 916 err = -E2BIG;
938 else if(err > 0){ 917 else if (err > 0) {
939 flush_dcache_page(page); 918 flush_dcache_page(page);
940 SetPageUptodate(page); 919 SetPageUptodate(page);
941 if (PageError(page)) ClearPageError(page); 920 if (PageError(page)) ClearPageError(page);
@@ -968,31 +947,33 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
968 err = -ENOMEM; 947 err = -ENOMEM;
969 host_root_path = kmalloc(strlen(root_ino) + 1 948 host_root_path = kmalloc(strlen(root_ino) + 1
970 + strlen(req_root) + 1, GFP_KERNEL); 949 + strlen(req_root) + 1, GFP_KERNEL);
971 if(host_root_path == NULL) 950 if (host_root_path == NULL)
972 goto out; 951 goto out;
973 952
974 sprintf(host_root_path, "%s/%s", root_ino, req_root); 953 sprintf(host_root_path, "%s/%s", root_ino, req_root);
975 954
976 root_inode = iget(sb, 0); 955 root_inode = iget(sb, 0);
977 if(root_inode == NULL) 956 if (root_inode == NULL)
978 goto out_free; 957 goto out_free;
979 958
980 err = init_inode(root_inode, NULL); 959 err = init_inode(root_inode, NULL);
981 if(err) 960 if (err)
982 goto out_put; 961 goto out_put;
983 962
984 HOSTFS_I(root_inode)->host_filename = host_root_path; 963 HOSTFS_I(root_inode)->host_filename = host_root_path;
985 /* Avoid that in the error path, iput(root_inode) frees again 964 /*
986 * host_root_path through hostfs_destroy_inode! */ 965 * Avoid that in the error path, iput(root_inode) frees again
966 * host_root_path through hostfs_destroy_inode!
967 */
987 host_root_path = NULL; 968 host_root_path = NULL;
988 969
989 err = -ENOMEM; 970 err = -ENOMEM;
990 sb->s_root = d_alloc_root(root_inode); 971 sb->s_root = d_alloc_root(root_inode);
991 if(sb->s_root == NULL) 972 if (sb->s_root == NULL)
992 goto out_put; 973 goto out_put;
993 974
994 err = read_inode(root_inode); 975 err = read_inode(root_inode);
995 if(err){ 976 if (err) {
996 /* No iput in this case because the dput does that for us */ 977 /* No iput in this case because the dput does that for us */
997 dput(sb->s_root); 978 dput(sb->s_root);
998 sb->s_root = NULL; 979 sb->s_root = NULL;
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 5625e2481dd3..35c1a9f33f47 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -3,19 +3,21 @@
3 * Licensed under the GPL 3 * Licensed under the GPL
4 */ 4 */
5 5
6#include <unistd.h>
7#include <stdio.h> 6#include <stdio.h>
8#include <fcntl.h> 7#include <stddef.h>
8#include <unistd.h>
9#include <dirent.h> 9#include <dirent.h>
10#include <errno.h> 10#include <errno.h>
11#include <utime.h> 11#include <fcntl.h>
12#include <string.h> 12#include <string.h>
13#include <sys/stat.h> 13#include <sys/stat.h>
14#include <sys/time.h> 14#include <sys/time.h>
15#include <sys/types.h>
15#include <sys/vfs.h> 16#include <sys/vfs.h>
16#include "hostfs.h" 17#include "hostfs.h"
17#include "kern_util.h" 18#include "os.h"
18#include "user.h" 19#include "user.h"
20#include <utime.h>
19 21
20int stat_file(const char *path, unsigned long long *inode_out, int *mode_out, 22int stat_file(const char *path, unsigned long long *inode_out, int *mode_out,
21 int *nlink_out, int *uid_out, int *gid_out, 23 int *nlink_out, int *uid_out, int *gid_out,
@@ -25,33 +27,41 @@ int stat_file(const char *path, unsigned long long *inode_out, int *mode_out,
25{ 27{
26 struct stat64 buf; 28 struct stat64 buf;
27 29
28 if(fd >= 0) { 30 if (fd >= 0) {
29 if (fstat64(fd, &buf) < 0) 31 if (fstat64(fd, &buf) < 0)
30 return -errno; 32 return -errno;
31 } else if(lstat64(path, &buf) < 0) { 33 } else if (lstat64(path, &buf) < 0) {
32 return -errno; 34 return -errno;
33 } 35 }
34 36
35 if(inode_out != NULL) *inode_out = buf.st_ino; 37 if (inode_out != NULL)
36 if(mode_out != NULL) *mode_out = buf.st_mode; 38 *inode_out = buf.st_ino;
37 if(nlink_out != NULL) *nlink_out = buf.st_nlink; 39 if (mode_out != NULL)
38 if(uid_out != NULL) *uid_out = buf.st_uid; 40 *mode_out = buf.st_mode;
39 if(gid_out != NULL) *gid_out = buf.st_gid; 41 if (nlink_out != NULL)
40 if(size_out != NULL) *size_out = buf.st_size; 42 *nlink_out = buf.st_nlink;
41 if(atime_out != NULL) { 43 if (uid_out != NULL)
44 *uid_out = buf.st_uid;
45 if (gid_out != NULL)
46 *gid_out = buf.st_gid;
47 if (size_out != NULL)
48 *size_out = buf.st_size;
49 if (atime_out != NULL) {
42 atime_out->tv_sec = buf.st_atime; 50 atime_out->tv_sec = buf.st_atime;
43 atime_out->tv_nsec = 0; 51 atime_out->tv_nsec = 0;
44 } 52 }
45 if(mtime_out != NULL) { 53 if (mtime_out != NULL) {
46 mtime_out->tv_sec = buf.st_mtime; 54 mtime_out->tv_sec = buf.st_mtime;
47 mtime_out->tv_nsec = 0; 55 mtime_out->tv_nsec = 0;
48 } 56 }
49 if(ctime_out != NULL) { 57 if (ctime_out != NULL) {
50 ctime_out->tv_sec = buf.st_ctime; 58 ctime_out->tv_sec = buf.st_ctime;
51 ctime_out->tv_nsec = 0; 59 ctime_out->tv_nsec = 0;
52 } 60 }
53 if(blksize_out != NULL) *blksize_out = buf.st_blksize; 61 if (blksize_out != NULL)
54 if(blocks_out != NULL) *blocks_out = buf.st_blocks; 62 *blksize_out = buf.st_blksize;
63 if (blocks_out != NULL)
64 *blocks_out = buf.st_blocks;
55 return 0; 65 return 0;
56} 66}
57 67
@@ -59,21 +69,29 @@ int file_type(const char *path, int *maj, int *min)
59{ 69{
60 struct stat64 buf; 70 struct stat64 buf;
61 71
62 if(lstat64(path, &buf) < 0) 72 if (lstat64(path, &buf) < 0)
63 return -errno; 73 return -errno;
64 /*We cannot pass rdev as is because glibc and the kernel disagree 74 /*
65 *about its definition.*/ 75 * We cannot pass rdev as is because glibc and the kernel disagree
66 if(maj != NULL) 76 * about its definition.
77 */
78 if (maj != NULL)
67 *maj = major(buf.st_rdev); 79 *maj = major(buf.st_rdev);
68 if(min != NULL) 80 if (min != NULL)
69 *min = minor(buf.st_rdev); 81 *min = minor(buf.st_rdev);
70 82
71 if(S_ISDIR(buf.st_mode)) return OS_TYPE_DIR; 83 if (S_ISDIR(buf.st_mode))
72 else if(S_ISLNK(buf.st_mode)) return OS_TYPE_SYMLINK; 84 return OS_TYPE_DIR;
73 else if(S_ISCHR(buf.st_mode)) return OS_TYPE_CHARDEV; 85 else if (S_ISLNK(buf.st_mode))
74 else if(S_ISBLK(buf.st_mode)) return OS_TYPE_BLOCKDEV; 86 return OS_TYPE_SYMLINK;
75 else if(S_ISFIFO(buf.st_mode))return OS_TYPE_FIFO; 87 else if (S_ISCHR(buf.st_mode))
76 else if(S_ISSOCK(buf.st_mode))return OS_TYPE_SOCK; 88 return OS_TYPE_CHARDEV;
89 else if (S_ISBLK(buf.st_mode))
90 return OS_TYPE_BLOCKDEV;
91 else if (S_ISFIFO(buf.st_mode))
92 return OS_TYPE_FIFO;
93 else if (S_ISSOCK(buf.st_mode))
94 return OS_TYPE_SOCK;
77 else return OS_TYPE_FILE; 95 else return OS_TYPE_FILE;
78} 96}
79 97
@@ -81,10 +99,13 @@ int access_file(char *path, int r, int w, int x)
81{ 99{
82 int mode = 0; 100 int mode = 0;
83 101
84 if(r) mode = R_OK; 102 if (r)
85 if(w) mode |= W_OK; 103 mode = R_OK;
86 if(x) mode |= X_OK; 104 if (w)
87 if(access(path, mode) != 0) 105 mode |= W_OK;
106 if (x)
107 mode |= X_OK;
108 if (access(path, mode) != 0)
88 return -errno; 109 return -errno;
89 else return 0; 110 else return 0;
90} 111}
@@ -93,18 +114,18 @@ int open_file(char *path, int r, int w, int append)
93{ 114{
94 int mode = 0, fd; 115 int mode = 0, fd;
95 116
96 if(r && !w) 117 if (r && !w)
97 mode = O_RDONLY; 118 mode = O_RDONLY;
98 else if(!r && w) 119 else if (!r && w)
99 mode = O_WRONLY; 120 mode = O_WRONLY;
100 else if(r && w) 121 else if (r && w)
101 mode = O_RDWR; 122 mode = O_RDWR;
102 else panic("Impossible mode in open_file"); 123 else panic("Impossible mode in open_file");
103 124
104 if(append) 125 if (append)
105 mode |= O_APPEND; 126 mode |= O_APPEND;
106 fd = open64(path, mode); 127 fd = open64(path, mode);
107 if(fd < 0) 128 if (fd < 0)
108 return -errno; 129 return -errno;
109 else return fd; 130 else return fd;
110} 131}
@@ -115,7 +136,7 @@ void *open_dir(char *path, int *err_out)
115 136
116 dir = opendir(path); 137 dir = opendir(path);
117 *err_out = errno; 138 *err_out = errno;
118 if(dir == NULL) 139 if (dir == NULL)
119 return NULL; 140 return NULL;
120 return dir; 141 return dir;
121} 142}
@@ -128,7 +149,7 @@ char *read_dir(void *stream, unsigned long long *pos,
128 149
129 seekdir(dir, *pos); 150 seekdir(dir, *pos);
130 ent = readdir(dir); 151 ent = readdir(dir);
131 if(ent == NULL) 152 if (ent == NULL)
132 return NULL; 153 return NULL;
133 *len_out = strlen(ent->d_name); 154 *len_out = strlen(ent->d_name);
134 *ino_out = ent->d_ino; 155 *ino_out = ent->d_ino;
@@ -141,7 +162,7 @@ int read_file(int fd, unsigned long long *offset, char *buf, int len)
141 int n; 162 int n;
142 163
143 n = pread64(fd, buf, len, *offset); 164 n = pread64(fd, buf, len, *offset);
144 if(n < 0) 165 if (n < 0)
145 return -errno; 166 return -errno;
146 *offset += n; 167 *offset += n;
147 return n; 168 return n;
@@ -152,7 +173,7 @@ int write_file(int fd, unsigned long long *offset, const char *buf, int len)
152 int n; 173 int n;
153 174
154 n = pwrite64(fd, buf, len, *offset); 175 n = pwrite64(fd, buf, len, *offset);
155 if(n < 0) 176 if (n < 0)
156 return -errno; 177 return -errno;
157 *offset += n; 178 *offset += n;
158 return n; 179 return n;
@@ -163,7 +184,7 @@ int lseek_file(int fd, long long offset, int whence)
163 int ret; 184 int ret;
164 185
165 ret = lseek64(fd, offset, whence); 186 ret = lseek64(fd, offset, whence);
166 if(ret < 0) 187 if (ret < 0)
167 return -errno; 188 return -errno;
168 return 0; 189 return 0;
169} 190}
@@ -207,7 +228,7 @@ int file_create(char *name, int ur, int uw, int ux, int gr,
207 mode |= ow ? S_IWOTH : 0; 228 mode |= ow ? S_IWOTH : 0;
208 mode |= ox ? S_IXOTH : 0; 229 mode |= ox ? S_IXOTH : 0;
209 fd = open64(name, O_CREAT | O_RDWR, mode); 230 fd = open64(name, O_CREAT | O_RDWR, mode);
210 if(fd < 0) 231 if (fd < 0)
211 return -errno; 232 return -errno;
212 return fd; 233 return fd;
213} 234}
@@ -230,7 +251,7 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
230 if (fd >= 0) { 251 if (fd >= 0) {
231 if (fchown(fd, attrs->ia_uid, -1)) 252 if (fchown(fd, attrs->ia_uid, -1))
232 return -errno; 253 return -errno;
233 } else if(chown(file, attrs->ia_uid, -1)) { 254 } else if (chown(file, attrs->ia_uid, -1)) {
234 return -errno; 255 return -errno;
235 } 256 }
236 } 257 }
@@ -251,9 +272,11 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
251 } 272 }
252 } 273 }
253 274
254 /* Update accessed and/or modified time, in two parts: first set 275 /*
276 * Update accessed and/or modified time, in two parts: first set
255 * times according to the changes to perform, and then call futimes() 277 * times according to the changes to perform, and then call futimes()
256 * or utimes() to apply them. */ 278 * or utimes() to apply them.
279 */
257 ma = (HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET); 280 ma = (HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET);
258 if (attrs->ia_valid & ma) { 281 if (attrs->ia_valid & ma) {
259 err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL, 282 err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -283,12 +306,12 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
283 } 306 }
284 } 307 }
285 308
286 if(attrs->ia_valid & HOSTFS_ATTR_CTIME) ; 309 /* Note: ctime is not handled */
287 if(attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)){ 310 if (attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)) {
288 err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL, 311 err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL,
289 &attrs->ia_atime, &attrs->ia_mtime, NULL, 312 &attrs->ia_atime, &attrs->ia_mtime, NULL,
290 NULL, NULL, fd); 313 NULL, NULL, fd);
291 if(err != 0) 314 if (err != 0)
292 return err; 315 return err;
293 } 316 }
294 return 0; 317 return 0;
@@ -299,7 +322,7 @@ int make_symlink(const char *from, const char *to)
299 int err; 322 int err;
300 323
301 err = symlink(to, from); 324 err = symlink(to, from);
302 if(err) 325 if (err)
303 return -errno; 326 return -errno;
304 return 0; 327 return 0;
305} 328}
@@ -309,7 +332,7 @@ int unlink_file(const char *file)
309 int err; 332 int err;
310 333
311 err = unlink(file); 334 err = unlink(file);
312 if(err) 335 if (err)
313 return -errno; 336 return -errno;
314 return 0; 337 return 0;
315} 338}
@@ -319,7 +342,7 @@ int do_mkdir(const char *file, int mode)
319 int err; 342 int err;
320 343
321 err = mkdir(file, mode); 344 err = mkdir(file, mode);
322 if(err) 345 if (err)
323 return -errno; 346 return -errno;
324 return 0; 347 return 0;
325} 348}
@@ -329,7 +352,7 @@ int do_rmdir(const char *file)
329 int err; 352 int err;
330 353
331 err = rmdir(file); 354 err = rmdir(file);
332 if(err) 355 if (err)
333 return -errno; 356 return -errno;
334 return 0; 357 return 0;
335} 358}
@@ -339,7 +362,7 @@ int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor)
339 int err; 362 int err;
340 363
341 err = mknod(file, mode, makedev(major, minor)); 364 err = mknod(file, mode, makedev(major, minor));
342 if(err) 365 if (err)
343 return -errno; 366 return -errno;
344 return 0; 367 return 0;
345} 368}
@@ -349,7 +372,7 @@ int link_file(const char *to, const char *from)
349 int err; 372 int err;
350 373
351 err = link(to, from); 374 err = link(to, from);
352 if(err) 375 if (err)
353 return -errno; 376 return -errno;
354 return 0; 377 return 0;
355} 378}
@@ -359,9 +382,9 @@ int do_readlink(char *file, char *buf, int size)
359 int n; 382 int n;
360 383
361 n = readlink(file, buf, size); 384 n = readlink(file, buf, size);
362 if(n < 0) 385 if (n < 0)
363 return -errno; 386 return -errno;
364 if(n < size) 387 if (n < size)
365 buf[n] = '\0'; 388 buf[n] = '\0';
366 return n; 389 return n;
367} 390}
@@ -371,7 +394,7 @@ int rename_file(char *from, char *to)
371 int err; 394 int err;
372 395
373 err = rename(from, to); 396 err = rename(from, to);
374 if(err < 0) 397 if (err < 0)
375 return -errno; 398 return -errno;
376 return 0; 399 return 0;
377} 400}
@@ -386,7 +409,7 @@ int do_statfs(char *root, long *bsize_out, long long *blocks_out,
386 int err; 409 int err;
387 410
388 err = statfs64(root, &buf); 411 err = statfs64(root, &buf);
389 if(err < 0) 412 if (err < 0)
390 return -errno; 413 return -errno;
391 414
392 *bsize_out = buf.f_bsize; 415 *bsize_out = buf.f_bsize;
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 5b53e5c5d8df..be8be5040e07 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -86,25 +86,33 @@ static int hpfs_writepage(struct page *page, struct writeback_control *wbc)
86{ 86{
87 return block_write_full_page(page,hpfs_get_block, wbc); 87 return block_write_full_page(page,hpfs_get_block, wbc);
88} 88}
89
89static int hpfs_readpage(struct file *file, struct page *page) 90static int hpfs_readpage(struct file *file, struct page *page)
90{ 91{
91 return block_read_full_page(page,hpfs_get_block); 92 return block_read_full_page(page,hpfs_get_block);
92} 93}
93static int hpfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) 94
95static int hpfs_write_begin(struct file *file, struct address_space *mapping,
96 loff_t pos, unsigned len, unsigned flags,
97 struct page **pagep, void **fsdata)
94{ 98{
95 return cont_prepare_write(page,from,to,hpfs_get_block, 99 *pagep = NULL;
96 &hpfs_i(page->mapping->host)->mmu_private); 100 return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
101 hpfs_get_block,
102 &hpfs_i(mapping->host)->mmu_private);
97} 103}
104
98static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block) 105static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block)
99{ 106{
100 return generic_block_bmap(mapping,block,hpfs_get_block); 107 return generic_block_bmap(mapping,block,hpfs_get_block);
101} 108}
109
102const struct address_space_operations hpfs_aops = { 110const struct address_space_operations hpfs_aops = {
103 .readpage = hpfs_readpage, 111 .readpage = hpfs_readpage,
104 .writepage = hpfs_writepage, 112 .writepage = hpfs_writepage,
105 .sync_page = block_sync_page, 113 .sync_page = block_sync_page,
106 .prepare_write = hpfs_prepare_write, 114 .write_begin = hpfs_write_begin,
107 .commit_write = generic_commit_write, 115 .write_end = generic_write_end,
108 .bmap = _hpfs_bmap 116 .bmap = _hpfs_bmap
109}; 117};
110 118
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 950c2fbb815b..04598e12c489 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -179,6 +179,130 @@ full_search:
179} 179}
180#endif 180#endif
181 181
182static int
183hugetlbfs_read_actor(struct page *page, unsigned long offset,
184 char __user *buf, unsigned long count,
185 unsigned long size)
186{
187 char *kaddr;
188 unsigned long left, copied = 0;
189 int i, chunksize;
190
191 if (size > count)
192 size = count;
193
194 /* Find which 4k chunk and offset with in that chunk */
195 i = offset >> PAGE_CACHE_SHIFT;
196 offset = offset & ~PAGE_CACHE_MASK;
197
198 while (size) {
199 chunksize = PAGE_CACHE_SIZE;
200 if (offset)
201 chunksize -= offset;
202 if (chunksize > size)
203 chunksize = size;
204 kaddr = kmap(&page[i]);
205 left = __copy_to_user(buf, kaddr + offset, chunksize);
206 kunmap(&page[i]);
207 if (left) {
208 copied += (chunksize - left);
209 break;
210 }
211 offset = 0;
212 size -= chunksize;
213 buf += chunksize;
214 copied += chunksize;
215 i++;
216 }
217 return copied ? copied : -EFAULT;
218}
219
220/*
221 * Support for read() - Find the page attached to f_mapping and copy out the
222 * data. Its *very* similar to do_generic_mapping_read(), we can't use that
223 * since it has PAGE_CACHE_SIZE assumptions.
224 */
225static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
226 size_t len, loff_t *ppos)
227{
228 struct address_space *mapping = filp->f_mapping;
229 struct inode *inode = mapping->host;
230 unsigned long index = *ppos >> HPAGE_SHIFT;
231 unsigned long offset = *ppos & ~HPAGE_MASK;
232 unsigned long end_index;
233 loff_t isize;
234 ssize_t retval = 0;
235
236 mutex_lock(&inode->i_mutex);
237
238 /* validate length */
239 if (len == 0)
240 goto out;
241
242 isize = i_size_read(inode);
243 if (!isize)
244 goto out;
245
246 end_index = (isize - 1) >> HPAGE_SHIFT;
247 for (;;) {
248 struct page *page;
249 int nr, ret;
250
251 /* nr is the maximum number of bytes to copy from this page */
252 nr = HPAGE_SIZE;
253 if (index >= end_index) {
254 if (index > end_index)
255 goto out;
256 nr = ((isize - 1) & ~HPAGE_MASK) + 1;
257 if (nr <= offset) {
258 goto out;
259 }
260 }
261 nr = nr - offset;
262
263 /* Find the page */
264 page = find_get_page(mapping, index);
265 if (unlikely(page == NULL)) {
266 /*
267 * We have a HOLE, zero out the user-buffer for the
268 * length of the hole or request.
269 */
270 ret = len < nr ? len : nr;
271 if (clear_user(buf, ret))
272 ret = -EFAULT;
273 } else {
274 /*
275 * We have the page, copy it to user space buffer.
276 */
277 ret = hugetlbfs_read_actor(page, offset, buf, len, nr);
278 }
279 if (ret < 0) {
280 if (retval == 0)
281 retval = ret;
282 if (page)
283 page_cache_release(page);
284 goto out;
285 }
286
287 offset += ret;
288 retval += ret;
289 len -= ret;
290 index += offset >> HPAGE_SHIFT;
291 offset &= ~HPAGE_MASK;
292
293 if (page)
294 page_cache_release(page);
295
296 /* short read or no more work */
297 if ((ret != nr) || (len == 0))
298 break;
299 }
300out:
301 *ppos = ((loff_t)index << HPAGE_SHIFT) + offset;
302 mutex_unlock(&inode->i_mutex);
303 return retval;
304}
305
182/* 306/*
183 * Read a page. Again trivial. If it didn't already exist 307 * Read a page. Again trivial. If it didn't already exist
184 * in the page cache, it is zero-filled. 308 * in the page cache, it is zero-filled.
@@ -189,15 +313,19 @@ static int hugetlbfs_readpage(struct file *file, struct page * page)
189 return -EINVAL; 313 return -EINVAL;
190} 314}
191 315
192static int hugetlbfs_prepare_write(struct file *file, 316static int hugetlbfs_write_begin(struct file *file,
193 struct page *page, unsigned offset, unsigned to) 317 struct address_space *mapping,
318 loff_t pos, unsigned len, unsigned flags,
319 struct page **pagep, void **fsdata)
194{ 320{
195 return -EINVAL; 321 return -EINVAL;
196} 322}
197 323
198static int hugetlbfs_commit_write(struct file *file, 324static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,
199 struct page *page, unsigned offset, unsigned to) 325 loff_t pos, unsigned len, unsigned copied,
326 struct page *page, void *fsdata)
200{ 327{
328 BUG();
201 return -EINVAL; 329 return -EINVAL;
202} 330}
203 331
@@ -318,21 +446,15 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff)
318 } 446 }
319} 447}
320 448
321/*
322 * Expanding truncates are not allowed.
323 */
324static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) 449static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
325{ 450{
326 pgoff_t pgoff; 451 pgoff_t pgoff;
327 struct address_space *mapping = inode->i_mapping; 452 struct address_space *mapping = inode->i_mapping;
328 453
329 if (offset > inode->i_size)
330 return -EINVAL;
331
332 BUG_ON(offset & ~HPAGE_MASK); 454 BUG_ON(offset & ~HPAGE_MASK);
333 pgoff = offset >> PAGE_SHIFT; 455 pgoff = offset >> PAGE_SHIFT;
334 456
335 inode->i_size = offset; 457 i_size_write(inode, offset);
336 spin_lock(&mapping->i_mmap_lock); 458 spin_lock(&mapping->i_mmap_lock);
337 if (!prio_tree_empty(&mapping->i_mmap)) 459 if (!prio_tree_empty(&mapping->i_mmap))
338 hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); 460 hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
@@ -569,8 +691,8 @@ static void hugetlbfs_destroy_inode(struct inode *inode)
569 691
570static const struct address_space_operations hugetlbfs_aops = { 692static const struct address_space_operations hugetlbfs_aops = {
571 .readpage = hugetlbfs_readpage, 693 .readpage = hugetlbfs_readpage,
572 .prepare_write = hugetlbfs_prepare_write, 694 .write_begin = hugetlbfs_write_begin,
573 .commit_write = hugetlbfs_commit_write, 695 .write_end = hugetlbfs_write_end,
574 .set_page_dirty = hugetlbfs_set_page_dirty, 696 .set_page_dirty = hugetlbfs_set_page_dirty,
575}; 697};
576 698
@@ -583,6 +705,7 @@ static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
583} 705}
584 706
585const struct file_operations hugetlbfs_file_operations = { 707const struct file_operations hugetlbfs_file_operations = {
708 .read = hugetlbfs_read,
586 .mmap = hugetlbfs_file_mmap, 709 .mmap = hugetlbfs_file_mmap,
587 .fsync = simple_sync_file, 710 .fsync = simple_sync_file,
588 .get_unmapped_area = hugetlb_get_unmapped_area, 711 .get_unmapped_area = hugetlb_get_unmapped_area,
diff --git a/fs/inode.c b/fs/inode.c
index f97de0aeb3b6..21dab18b2f18 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -568,16 +568,16 @@ EXPORT_SYMBOL(new_inode);
568void unlock_new_inode(struct inode *inode) 568void unlock_new_inode(struct inode *inode)
569{ 569{
570#ifdef CONFIG_DEBUG_LOCK_ALLOC 570#ifdef CONFIG_DEBUG_LOCK_ALLOC
571 struct file_system_type *type = inode->i_sb->s_type; 571 if (inode->i_mode & S_IFDIR) {
572 /* 572 struct file_system_type *type = inode->i_sb->s_type;
573 * ensure nobody is actually holding i_mutex 573
574 */ 574 /*
575 mutex_destroy(&inode->i_mutex); 575 * ensure nobody is actually holding i_mutex
576 mutex_init(&inode->i_mutex); 576 */
577 if (inode->i_mode & S_IFDIR) 577 mutex_destroy(&inode->i_mutex);
578 mutex_init(&inode->i_mutex);
578 lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key); 579 lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key);
579 else 580 }
580 lockdep_set_class(&inode->i_mutex, &type->i_mutex_key);
581#endif 581#endif
582 /* 582 /*
583 * This is special! We do not need the spinlock 583 * This is special! We do not need the spinlock
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 06ab3c10b1b8..a6be78c05dce 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1710,7 +1710,7 @@ static int journal_init_journal_head_cache(void)
1710 journal_head_cache = kmem_cache_create("journal_head", 1710 journal_head_cache = kmem_cache_create("journal_head",
1711 sizeof(struct journal_head), 1711 sizeof(struct journal_head),
1712 0, /* offset */ 1712 0, /* offset */
1713 0, /* flags */ 1713 SLAB_TEMPORARY, /* flags */
1714 NULL); /* ctor */ 1714 NULL); /* ctor */
1715 retval = 0; 1715 retval = 0;
1716 if (journal_head_cache == 0) { 1716 if (journal_head_cache == 0) {
@@ -2006,7 +2006,7 @@ static int __init journal_init_handle_cache(void)
2006 jbd_handle_cache = kmem_cache_create("journal_handle", 2006 jbd_handle_cache = kmem_cache_create("journal_handle",
2007 sizeof(handle_t), 2007 sizeof(handle_t),
2008 0, /* offset */ 2008 0, /* offset */
2009 0, /* flags */ 2009 SLAB_TEMPORARY, /* flags */
2010 NULL); /* ctor */ 2010 NULL); /* ctor */
2011 if (jbd_handle_cache == NULL) { 2011 if (jbd_handle_cache == NULL) {
2012 printk(KERN_EMERG "JBD: failed to create handle cache\n"); 2012 printk(KERN_EMERG "JBD: failed to create handle cache\n");
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 62e13c8db132..ad2eacf570c6 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -170,13 +170,15 @@ int __init journal_init_revoke_caches(void)
170{ 170{
171 revoke_record_cache = kmem_cache_create("revoke_record", 171 revoke_record_cache = kmem_cache_create("revoke_record",
172 sizeof(struct jbd_revoke_record_s), 172 sizeof(struct jbd_revoke_record_s),
173 0, SLAB_HWCACHE_ALIGN, NULL); 173 0,
174 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
175 NULL);
174 if (revoke_record_cache == 0) 176 if (revoke_record_cache == 0)
175 return -ENOMEM; 177 return -ENOMEM;
176 178
177 revoke_table_cache = kmem_cache_create("revoke_table", 179 revoke_table_cache = kmem_cache_create("revoke_table",
178 sizeof(struct jbd_revoke_table_s), 180 sizeof(struct jbd_revoke_table_s),
179 0, 0, NULL); 181 0, SLAB_TEMPORARY, NULL);
180 if (revoke_table_cache == 0) { 182 if (revoke_table_cache == 0) {
181 kmem_cache_destroy(revoke_record_cache); 183 kmem_cache_destroy(revoke_record_cache);
182 revoke_record_cache = NULL; 184 revoke_record_cache = NULL;
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index c2530197be0c..023a17539dd4 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -19,10 +19,12 @@
19#include <linux/jffs2.h> 19#include <linux/jffs2.h>
20#include "nodelist.h" 20#include "nodelist.h"
21 21
22static int jffs2_commit_write (struct file *filp, struct page *pg, 22static int jffs2_write_end(struct file *filp, struct address_space *mapping,
23 unsigned start, unsigned end); 23 loff_t pos, unsigned len, unsigned copied,
24static int jffs2_prepare_write (struct file *filp, struct page *pg, 24 struct page *pg, void *fsdata);
25 unsigned start, unsigned end); 25static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
26 loff_t pos, unsigned len, unsigned flags,
27 struct page **pagep, void **fsdata);
26static int jffs2_readpage (struct file *filp, struct page *pg); 28static int jffs2_readpage (struct file *filp, struct page *pg);
27 29
28int jffs2_fsync(struct file *filp, struct dentry *dentry, int datasync) 30int jffs2_fsync(struct file *filp, struct dentry *dentry, int datasync)
@@ -65,8 +67,8 @@ const struct inode_operations jffs2_file_inode_operations =
65const struct address_space_operations jffs2_file_address_operations = 67const struct address_space_operations jffs2_file_address_operations =
66{ 68{
67 .readpage = jffs2_readpage, 69 .readpage = jffs2_readpage,
68 .prepare_write =jffs2_prepare_write, 70 .write_begin = jffs2_write_begin,
69 .commit_write = jffs2_commit_write 71 .write_end = jffs2_write_end,
70}; 72};
71 73
72static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg) 74static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg)
@@ -119,15 +121,23 @@ static int jffs2_readpage (struct file *filp, struct page *pg)
119 return ret; 121 return ret;
120} 122}
121 123
122static int jffs2_prepare_write (struct file *filp, struct page *pg, 124static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
123 unsigned start, unsigned end) 125 loff_t pos, unsigned len, unsigned flags,
126 struct page **pagep, void **fsdata)
124{ 127{
125 struct inode *inode = pg->mapping->host; 128 struct page *pg;
129 struct inode *inode = mapping->host;
126 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); 130 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
127 uint32_t pageofs = pg->index << PAGE_CACHE_SHIFT; 131 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
132 uint32_t pageofs = pos & (PAGE_CACHE_SIZE - 1);
128 int ret = 0; 133 int ret = 0;
129 134
130 D1(printk(KERN_DEBUG "jffs2_prepare_write()\n")); 135 pg = __grab_cache_page(mapping, index);
136 if (!pg)
137 return -ENOMEM;
138 *pagep = pg;
139
140 D1(printk(KERN_DEBUG "jffs2_write_begin()\n"));
131 141
132 if (pageofs > inode->i_size) { 142 if (pageofs > inode->i_size) {
133 /* Make new hole frag from old EOF to new page */ 143 /* Make new hole frag from old EOF to new page */
@@ -142,7 +152,7 @@ static int jffs2_prepare_write (struct file *filp, struct page *pg,
142 ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, 152 ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
143 ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); 153 ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
144 if (ret) 154 if (ret)
145 return ret; 155 goto out_page;
146 156
147 down(&f->sem); 157 down(&f->sem);
148 memset(&ri, 0, sizeof(ri)); 158 memset(&ri, 0, sizeof(ri));
@@ -172,7 +182,7 @@ static int jffs2_prepare_write (struct file *filp, struct page *pg,
172 ret = PTR_ERR(fn); 182 ret = PTR_ERR(fn);
173 jffs2_complete_reservation(c); 183 jffs2_complete_reservation(c);
174 up(&f->sem); 184 up(&f->sem);
175 return ret; 185 goto out_page;
176 } 186 }
177 ret = jffs2_add_full_dnode_to_inode(c, f, fn); 187 ret = jffs2_add_full_dnode_to_inode(c, f, fn);
178 if (f->metadata) { 188 if (f->metadata) {
@@ -181,65 +191,79 @@ static int jffs2_prepare_write (struct file *filp, struct page *pg,
181 f->metadata = NULL; 191 f->metadata = NULL;
182 } 192 }
183 if (ret) { 193 if (ret) {
184 D1(printk(KERN_DEBUG "Eep. add_full_dnode_to_inode() failed in prepare_write, returned %d\n", ret)); 194 D1(printk(KERN_DEBUG "Eep. add_full_dnode_to_inode() failed in write_begin, returned %d\n", ret));
185 jffs2_mark_node_obsolete(c, fn->raw); 195 jffs2_mark_node_obsolete(c, fn->raw);
186 jffs2_free_full_dnode(fn); 196 jffs2_free_full_dnode(fn);
187 jffs2_complete_reservation(c); 197 jffs2_complete_reservation(c);
188 up(&f->sem); 198 up(&f->sem);
189 return ret; 199 goto out_page;
190 } 200 }
191 jffs2_complete_reservation(c); 201 jffs2_complete_reservation(c);
192 inode->i_size = pageofs; 202 inode->i_size = pageofs;
193 up(&f->sem); 203 up(&f->sem);
194 } 204 }
195 205
196 /* Read in the page if it wasn't already present, unless it's a whole page */ 206 /*
197 if (!PageUptodate(pg) && (start || end < PAGE_CACHE_SIZE)) { 207 * Read in the page if it wasn't already present. Cannot optimize away
208 * the whole page write case until jffs2_write_end can handle the
209 * case of a short-copy.
210 */
211 if (!PageUptodate(pg)) {
198 down(&f->sem); 212 down(&f->sem);
199 ret = jffs2_do_readpage_nolock(inode, pg); 213 ret = jffs2_do_readpage_nolock(inode, pg);
200 up(&f->sem); 214 up(&f->sem);
215 if (ret)
216 goto out_page;
201 } 217 }
202 D1(printk(KERN_DEBUG "end prepare_write(). pg->flags %lx\n", pg->flags)); 218 D1(printk(KERN_DEBUG "end write_begin(). pg->flags %lx\n", pg->flags));
219 return ret;
220
221out_page:
222 unlock_page(pg);
223 page_cache_release(pg);
203 return ret; 224 return ret;
204} 225}
205 226
206static int jffs2_commit_write (struct file *filp, struct page *pg, 227static int jffs2_write_end(struct file *filp, struct address_space *mapping,
207 unsigned start, unsigned end) 228 loff_t pos, unsigned len, unsigned copied,
229 struct page *pg, void *fsdata)
208{ 230{
209 /* Actually commit the write from the page cache page we're looking at. 231 /* Actually commit the write from the page cache page we're looking at.
210 * For now, we write the full page out each time. It sucks, but it's simple 232 * For now, we write the full page out each time. It sucks, but it's simple
211 */ 233 */
212 struct inode *inode = pg->mapping->host; 234 struct inode *inode = mapping->host;
213 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); 235 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
214 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); 236 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
215 struct jffs2_raw_inode *ri; 237 struct jffs2_raw_inode *ri;
238 unsigned start = pos & (PAGE_CACHE_SIZE - 1);
239 unsigned end = start + copied;
216 unsigned aligned_start = start & ~3; 240 unsigned aligned_start = start & ~3;
217 int ret = 0; 241 int ret = 0;
218 uint32_t writtenlen = 0; 242 uint32_t writtenlen = 0;
219 243
220 D1(printk(KERN_DEBUG "jffs2_commit_write(): ino #%lu, page at 0x%lx, range %d-%d, flags %lx\n", 244 D1(printk(KERN_DEBUG "jffs2_write_end(): ino #%lu, page at 0x%lx, range %d-%d, flags %lx\n",
221 inode->i_ino, pg->index << PAGE_CACHE_SHIFT, start, end, pg->flags)); 245 inode->i_ino, pg->index << PAGE_CACHE_SHIFT, start, end, pg->flags));
222 246
247 /* We need to avoid deadlock with page_cache_read() in
248 jffs2_garbage_collect_pass(). So the page must be
249 up to date to prevent page_cache_read() from trying
250 to re-lock it. */
251 BUG_ON(!PageUptodate(pg));
252
223 if (end == PAGE_CACHE_SIZE) { 253 if (end == PAGE_CACHE_SIZE) {
224 if (!start) { 254 /* When writing out the end of a page, write out the
225 /* We need to avoid deadlock with page_cache_read() in 255 _whole_ page. This helps to reduce the number of
226 jffs2_garbage_collect_pass(). So we have to mark the 256 nodes in files which have many short writes, like
227 page up to date, to prevent page_cache_read() from 257 syslog files. */
228 trying to re-lock it. */ 258 start = aligned_start = 0;
229 SetPageUptodate(pg);
230 } else {
231 /* When writing out the end of a page, write out the
232 _whole_ page. This helps to reduce the number of
233 nodes in files which have many short writes, like
234 syslog files. */
235 start = aligned_start = 0;
236 }
237 } 259 }
238 260
239 ri = jffs2_alloc_raw_inode(); 261 ri = jffs2_alloc_raw_inode();
240 262
241 if (!ri) { 263 if (!ri) {
242 D1(printk(KERN_DEBUG "jffs2_commit_write(): Allocation of raw inode failed\n")); 264 D1(printk(KERN_DEBUG "jffs2_write_end(): Allocation of raw inode failed\n"));
265 unlock_page(pg);
266 page_cache_release(pg);
243 return -ENOMEM; 267 return -ENOMEM;
244 } 268 }
245 269
@@ -287,11 +311,14 @@ static int jffs2_commit_write (struct file *filp, struct page *pg,
287 /* generic_file_write has written more to the page cache than we've 311 /* generic_file_write has written more to the page cache than we've
288 actually written to the medium. Mark the page !Uptodate so that 312 actually written to the medium. Mark the page !Uptodate so that
289 it gets reread */ 313 it gets reread */
290 D1(printk(KERN_DEBUG "jffs2_commit_write(): Not all bytes written. Marking page !uptodate\n")); 314 D1(printk(KERN_DEBUG "jffs2_write_end(): Not all bytes written. Marking page !uptodate\n"));
291 SetPageError(pg); 315 SetPageError(pg);
292 ClearPageUptodate(pg); 316 ClearPageUptodate(pg);
293 } 317 }
294 318
295 D1(printk(KERN_DEBUG "jffs2_commit_write() returning %d\n",start+writtenlen==end?0:ret)); 319 D1(printk(KERN_DEBUG "jffs2_write_end() returning %d\n",
296 return start+writtenlen==end?0:ret; 320 writtenlen > 0 ? writtenlen : ret));
321 unlock_page(pg);
322 page_cache_release(pg);
323 return writtenlen > 0 ? writtenlen : ret;
297} 324}
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 3467dde27e5a..4672013802e1 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -255,7 +255,7 @@ int jfs_get_block(struct inode *ip, sector_t lblock,
255 255
256static int jfs_writepage(struct page *page, struct writeback_control *wbc) 256static int jfs_writepage(struct page *page, struct writeback_control *wbc)
257{ 257{
258 return nobh_writepage(page, jfs_get_block, wbc); 258 return block_write_full_page(page, jfs_get_block, wbc);
259} 259}
260 260
261static int jfs_writepages(struct address_space *mapping, 261static int jfs_writepages(struct address_space *mapping,
@@ -275,10 +275,12 @@ static int jfs_readpages(struct file *file, struct address_space *mapping,
275 return mpage_readpages(mapping, pages, nr_pages, jfs_get_block); 275 return mpage_readpages(mapping, pages, nr_pages, jfs_get_block);
276} 276}
277 277
278static int jfs_prepare_write(struct file *file, 278static int jfs_write_begin(struct file *file, struct address_space *mapping,
279 struct page *page, unsigned from, unsigned to) 279 loff_t pos, unsigned len, unsigned flags,
280 struct page **pagep, void **fsdata)
280{ 281{
281 return nobh_prepare_write(page, from, to, jfs_get_block); 282 return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
283 jfs_get_block);
282} 284}
283 285
284static sector_t jfs_bmap(struct address_space *mapping, sector_t block) 286static sector_t jfs_bmap(struct address_space *mapping, sector_t block)
@@ -302,8 +304,8 @@ const struct address_space_operations jfs_aops = {
302 .writepage = jfs_writepage, 304 .writepage = jfs_writepage,
303 .writepages = jfs_writepages, 305 .writepages = jfs_writepages,
304 .sync_page = block_sync_page, 306 .sync_page = block_sync_page,
305 .prepare_write = jfs_prepare_write, 307 .write_begin = jfs_write_begin,
306 .commit_write = nobh_commit_write, 308 .write_end = nobh_write_end,
307 .bmap = jfs_bmap, 309 .bmap = jfs_bmap,
308 .direct_IO = jfs_direct_IO, 310 .direct_IO = jfs_direct_IO,
309}; 311};
@@ -356,7 +358,7 @@ void jfs_truncate(struct inode *ip)
356{ 358{
357 jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size); 359 jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size);
358 360
359 nobh_truncate_page(ip->i_mapping, ip->i_size); 361 nobh_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block);
360 362
361 IWRITE_LOCK(ip, RDWRLOCK_NORMAL); 363 IWRITE_LOCK(ip, RDWRLOCK_NORMAL);
362 jfs_truncate_nolock(ip, ip->i_size); 364 jfs_truncate_nolock(ip, ip->i_size);
diff --git a/fs/libfs.c b/fs/libfs.c
index 5294de1f40c4..f2b32d3a9093 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -351,6 +351,26 @@ int simple_prepare_write(struct file *file, struct page *page,
351 return 0; 351 return 0;
352} 352}
353 353
354int simple_write_begin(struct file *file, struct address_space *mapping,
355 loff_t pos, unsigned len, unsigned flags,
356 struct page **pagep, void **fsdata)
357{
358 struct page *page;
359 pgoff_t index;
360 unsigned from;
361
362 index = pos >> PAGE_CACHE_SHIFT;
363 from = pos & (PAGE_CACHE_SIZE - 1);
364
365 page = __grab_cache_page(mapping, index);
366 if (!page)
367 return -ENOMEM;
368
369 *pagep = page;
370
371 return simple_prepare_write(file, page, from, from+len);
372}
373
354int simple_commit_write(struct file *file, struct page *page, 374int simple_commit_write(struct file *file, struct page *page,
355 unsigned from, unsigned to) 375 unsigned from, unsigned to)
356{ 376{
@@ -369,6 +389,28 @@ int simple_commit_write(struct file *file, struct page *page,
369 return 0; 389 return 0;
370} 390}
371 391
392int simple_write_end(struct file *file, struct address_space *mapping,
393 loff_t pos, unsigned len, unsigned copied,
394 struct page *page, void *fsdata)
395{
396 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
397
398 /* zero the stale part of the page if we did a short copy */
399 if (copied < len) {
400 void *kaddr = kmap_atomic(page, KM_USER0);
401 memset(kaddr + from + copied, 0, len - copied);
402 flush_dcache_page(page);
403 kunmap_atomic(kaddr, KM_USER0);
404 }
405
406 simple_commit_write(file, page, from, from+copied);
407
408 unlock_page(page);
409 page_cache_release(page);
410
411 return copied;
412}
413
372/* 414/*
373 * the inodes created here are not hashed. If you use iunique to generate 415 * the inodes created here are not hashed. If you use iunique to generate
374 * unique inode values later for this filesystem, then you must take care 416 * unique inode values later for this filesystem, then you must take care
@@ -642,6 +684,8 @@ EXPORT_SYMBOL(dcache_dir_open);
642EXPORT_SYMBOL(dcache_readdir); 684EXPORT_SYMBOL(dcache_readdir);
643EXPORT_SYMBOL(generic_read_dir); 685EXPORT_SYMBOL(generic_read_dir);
644EXPORT_SYMBOL(get_sb_pseudo); 686EXPORT_SYMBOL(get_sb_pseudo);
687EXPORT_SYMBOL(simple_write_begin);
688EXPORT_SYMBOL(simple_write_end);
645EXPORT_SYMBOL(simple_commit_write); 689EXPORT_SYMBOL(simple_commit_write);
646EXPORT_SYMBOL(simple_dir_inode_operations); 690EXPORT_SYMBOL(simple_dir_inode_operations);
647EXPORT_SYMBOL(simple_dir_operations); 691EXPORT_SYMBOL(simple_dir_operations);
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index e207cbe70951..f70433816a38 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -9,8 +9,10 @@
9 */ 9 */
10 10
11#include "minix.h" 11#include "minix.h"
12#include <linux/buffer_head.h>
12#include <linux/highmem.h> 13#include <linux/highmem.h>
13#include <linux/smp_lock.h> 14#include <linux/smp_lock.h>
15#include <linux/swap.h>
14 16
15typedef struct minix_dir_entry minix_dirent; 17typedef struct minix_dir_entry minix_dirent;
16typedef struct minix3_dir_entry minix3_dirent; 18typedef struct minix3_dir_entry minix3_dirent;
@@ -48,11 +50,17 @@ static inline unsigned long dir_pages(struct inode *inode)
48 return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; 50 return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
49} 51}
50 52
51static int dir_commit_chunk(struct page *page, unsigned from, unsigned to) 53static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len)
52{ 54{
53 struct inode *dir = (struct inode *)page->mapping->host; 55 struct address_space *mapping = page->mapping;
56 struct inode *dir = mapping->host;
54 int err = 0; 57 int err = 0;
55 page->mapping->a_ops->commit_write(NULL, page, from, to); 58 block_write_end(NULL, mapping, pos, len, len, page, NULL);
59
60 if (pos+len > dir->i_size) {
61 i_size_write(dir, pos+len);
62 mark_inode_dirty(dir);
63 }
56 if (IS_DIRSYNC(dir)) 64 if (IS_DIRSYNC(dir))
57 err = write_one_page(page, 1); 65 err = write_one_page(page, 1);
58 else 66 else
@@ -220,7 +228,7 @@ int minix_add_link(struct dentry *dentry, struct inode *inode)
220 char *kaddr, *p; 228 char *kaddr, *p;
221 minix_dirent *de; 229 minix_dirent *de;
222 minix3_dirent *de3; 230 minix3_dirent *de3;
223 unsigned from, to; 231 loff_t pos;
224 int err; 232 int err;
225 char *namx = NULL; 233 char *namx = NULL;
226 __u32 inumber; 234 __u32 inumber;
@@ -272,9 +280,9 @@ int minix_add_link(struct dentry *dentry, struct inode *inode)
272 return -EINVAL; 280 return -EINVAL;
273 281
274got_it: 282got_it:
275 from = p - (char*)page_address(page); 283 pos = (page->index >> PAGE_CACHE_SHIFT) + p - (char*)page_address(page);
276 to = from + sbi->s_dirsize; 284 err = __minix_write_begin(NULL, page->mapping, pos, sbi->s_dirsize,
277 err = page->mapping->a_ops->prepare_write(NULL, page, from, to); 285 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
278 if (err) 286 if (err)
279 goto out_unlock; 287 goto out_unlock;
280 memcpy (namx, name, namelen); 288 memcpy (namx, name, namelen);
@@ -285,7 +293,7 @@ got_it:
285 memset (namx + namelen, 0, sbi->s_dirsize - namelen - 2); 293 memset (namx + namelen, 0, sbi->s_dirsize - namelen - 2);
286 de->inode = inode->i_ino; 294 de->inode = inode->i_ino;
287 } 295 }
288 err = dir_commit_chunk(page, from, to); 296 err = dir_commit_chunk(page, pos, sbi->s_dirsize);
289 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; 297 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
290 mark_inode_dirty(dir); 298 mark_inode_dirty(dir);
291out_put: 299out_put:
@@ -302,15 +310,16 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page)
302 struct address_space *mapping = page->mapping; 310 struct address_space *mapping = page->mapping;
303 struct inode *inode = (struct inode*)mapping->host; 311 struct inode *inode = (struct inode*)mapping->host;
304 char *kaddr = page_address(page); 312 char *kaddr = page_address(page);
305 unsigned from = (char*)de - kaddr; 313 loff_t pos = page_offset(page) + (char*)de - kaddr;
306 unsigned to = from + minix_sb(inode->i_sb)->s_dirsize; 314 unsigned len = minix_sb(inode->i_sb)->s_dirsize;
307 int err; 315 int err;
308 316
309 lock_page(page); 317 lock_page(page);
310 err = mapping->a_ops->prepare_write(NULL, page, from, to); 318 err = __minix_write_begin(NULL, mapping, pos, len,
319 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
311 if (err == 0) { 320 if (err == 0) {
312 de->inode = 0; 321 de->inode = 0;
313 err = dir_commit_chunk(page, from, to); 322 err = dir_commit_chunk(page, pos, len);
314 } else { 323 } else {
315 unlock_page(page); 324 unlock_page(page);
316 } 325 }
@@ -330,7 +339,8 @@ int minix_make_empty(struct inode *inode, struct inode *dir)
330 339
331 if (!page) 340 if (!page)
332 return -ENOMEM; 341 return -ENOMEM;
333 err = mapping->a_ops->prepare_write(NULL, page, 0, 2 * sbi->s_dirsize); 342 err = __minix_write_begin(NULL, mapping, 0, 2 * sbi->s_dirsize,
343 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
334 if (err) { 344 if (err) {
335 unlock_page(page); 345 unlock_page(page);
336 goto fail; 346 goto fail;
@@ -421,17 +431,20 @@ not_empty:
421void minix_set_link(struct minix_dir_entry *de, struct page *page, 431void minix_set_link(struct minix_dir_entry *de, struct page *page,
422 struct inode *inode) 432 struct inode *inode)
423{ 433{
424 struct inode *dir = (struct inode*)page->mapping->host; 434 struct address_space *mapping = page->mapping;
435 struct inode *dir = mapping->host;
425 struct minix_sb_info *sbi = minix_sb(dir->i_sb); 436 struct minix_sb_info *sbi = minix_sb(dir->i_sb);
426 unsigned from = (char *)de-(char*)page_address(page); 437 loff_t pos = page_offset(page) +
427 unsigned to = from + sbi->s_dirsize; 438 (char *)de-(char*)page_address(page);
428 int err; 439 int err;
429 440
430 lock_page(page); 441 lock_page(page);
431 err = page->mapping->a_ops->prepare_write(NULL, page, from, to); 442
443 err = __minix_write_begin(NULL, mapping, pos, sbi->s_dirsize,
444 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
432 if (err == 0) { 445 if (err == 0) {
433 de->inode = inode->i_ino; 446 de->inode = inode->i_ino;
434 err = dir_commit_chunk(page, from, to); 447 err = dir_commit_chunk(page, pos, sbi->s_dirsize);
435 } else { 448 } else {
436 unlock_page(page); 449 unlock_page(page);
437 } 450 }
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 43668d7d668f..f4f3343b1800 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -346,24 +346,39 @@ static int minix_writepage(struct page *page, struct writeback_control *wbc)
346{ 346{
347 return block_write_full_page(page, minix_get_block, wbc); 347 return block_write_full_page(page, minix_get_block, wbc);
348} 348}
349
349static int minix_readpage(struct file *file, struct page *page) 350static int minix_readpage(struct file *file, struct page *page)
350{ 351{
351 return block_read_full_page(page,minix_get_block); 352 return block_read_full_page(page,minix_get_block);
352} 353}
353static int minix_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) 354
355int __minix_write_begin(struct file *file, struct address_space *mapping,
356 loff_t pos, unsigned len, unsigned flags,
357 struct page **pagep, void **fsdata)
354{ 358{
355 return block_prepare_write(page,from,to,minix_get_block); 359 return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
360 minix_get_block);
356} 361}
362
363static int minix_write_begin(struct file *file, struct address_space *mapping,
364 loff_t pos, unsigned len, unsigned flags,
365 struct page **pagep, void **fsdata)
366{
367 *pagep = NULL;
368 return __minix_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
369}
370
357static sector_t minix_bmap(struct address_space *mapping, sector_t block) 371static sector_t minix_bmap(struct address_space *mapping, sector_t block)
358{ 372{
359 return generic_block_bmap(mapping,block,minix_get_block); 373 return generic_block_bmap(mapping,block,minix_get_block);
360} 374}
375
361static const struct address_space_operations minix_aops = { 376static const struct address_space_operations minix_aops = {
362 .readpage = minix_readpage, 377 .readpage = minix_readpage,
363 .writepage = minix_writepage, 378 .writepage = minix_writepage,
364 .sync_page = block_sync_page, 379 .sync_page = block_sync_page,
365 .prepare_write = minix_prepare_write, 380 .write_begin = minix_write_begin,
366 .commit_write = generic_commit_write, 381 .write_end = generic_write_end,
367 .bmap = minix_bmap 382 .bmap = minix_bmap
368}; 383};
369 384
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 73ef84f8fb0b..ac5d3a75cb0d 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -54,6 +54,9 @@ extern int minix_new_block(struct inode * inode);
54extern void minix_free_block(struct inode *inode, unsigned long block); 54extern void minix_free_block(struct inode *inode, unsigned long block);
55extern unsigned long minix_count_free_blocks(struct minix_sb_info *sbi); 55extern unsigned long minix_count_free_blocks(struct minix_sb_info *sbi);
56extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *); 56extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *);
57extern int __minix_write_begin(struct file *file, struct address_space *mapping,
58 loff_t pos, unsigned len, unsigned flags,
59 struct page **pagep, void **fsdata);
57 60
58extern void V1_minix_truncate(struct inode *); 61extern void V1_minix_truncate(struct inode *);
59extern void V2_minix_truncate(struct inode *); 62extern void V2_minix_truncate(struct inode *);
diff --git a/fs/mpage.c b/fs/mpage.c
index b1c3e5890508..d54f8f897224 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -379,31 +379,25 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
379 struct bio *bio = NULL; 379 struct bio *bio = NULL;
380 unsigned page_idx; 380 unsigned page_idx;
381 sector_t last_block_in_bio = 0; 381 sector_t last_block_in_bio = 0;
382 struct pagevec lru_pvec;
383 struct buffer_head map_bh; 382 struct buffer_head map_bh;
384 unsigned long first_logical_block = 0; 383 unsigned long first_logical_block = 0;
385 384
386 clear_buffer_mapped(&map_bh); 385 clear_buffer_mapped(&map_bh);
387 pagevec_init(&lru_pvec, 0);
388 for (page_idx = 0; page_idx < nr_pages; page_idx++) { 386 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
389 struct page *page = list_entry(pages->prev, struct page, lru); 387 struct page *page = list_entry(pages->prev, struct page, lru);
390 388
391 prefetchw(&page->flags); 389 prefetchw(&page->flags);
392 list_del(&page->lru); 390 list_del(&page->lru);
393 if (!add_to_page_cache(page, mapping, 391 if (!add_to_page_cache_lru(page, mapping,
394 page->index, GFP_KERNEL)) { 392 page->index, GFP_KERNEL)) {
395 bio = do_mpage_readpage(bio, page, 393 bio = do_mpage_readpage(bio, page,
396 nr_pages - page_idx, 394 nr_pages - page_idx,
397 &last_block_in_bio, &map_bh, 395 &last_block_in_bio, &map_bh,
398 &first_logical_block, 396 &first_logical_block,
399 get_block); 397 get_block);
400 if (!pagevec_add(&lru_pvec, page))
401 __pagevec_lru_add(&lru_pvec);
402 } else {
403 page_cache_release(page);
404 } 398 }
399 page_cache_release(page);
405 } 400 }
406 pagevec_lru_add(&lru_pvec);
407 BUG_ON(!list_empty(pages)); 401 BUG_ON(!list_empty(pages));
408 if (bio) 402 if (bio)
409 mpage_bio_submit(READ, bio); 403 mpage_bio_submit(READ, bio);
diff --git a/fs/namei.c b/fs/namei.c
index a83160acd748..b40b8084eefc 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2729,53 +2729,29 @@ int __page_symlink(struct inode *inode, const char *symname, int len,
2729{ 2729{
2730 struct address_space *mapping = inode->i_mapping; 2730 struct address_space *mapping = inode->i_mapping;
2731 struct page *page; 2731 struct page *page;
2732 void *fsdata;
2732 int err; 2733 int err;
2733 char *kaddr; 2734 char *kaddr;
2734 2735
2735retry: 2736retry:
2736 err = -ENOMEM; 2737 err = pagecache_write_begin(NULL, mapping, 0, len-1,
2737 page = find_or_create_page(mapping, 0, gfp_mask); 2738 AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
2738 if (!page)
2739 goto fail;
2740 err = mapping->a_ops->prepare_write(NULL, page, 0, len-1);
2741 if (err == AOP_TRUNCATED_PAGE) {
2742 page_cache_release(page);
2743 goto retry;
2744 }
2745 if (err) 2739 if (err)
2746 goto fail_map; 2740 goto fail;
2741
2747 kaddr = kmap_atomic(page, KM_USER0); 2742 kaddr = kmap_atomic(page, KM_USER0);
2748 memcpy(kaddr, symname, len-1); 2743 memcpy(kaddr, symname, len-1);
2749 kunmap_atomic(kaddr, KM_USER0); 2744 kunmap_atomic(kaddr, KM_USER0);
2750 err = mapping->a_ops->commit_write(NULL, page, 0, len-1); 2745
2751 if (err == AOP_TRUNCATED_PAGE) { 2746 err = pagecache_write_end(NULL, mapping, 0, len-1, len-1,
2752 page_cache_release(page); 2747 page, fsdata);
2753 goto retry;
2754 }
2755 if (err)
2756 goto fail_map;
2757 /*
2758 * Notice that we are _not_ going to block here - end of page is
2759 * unmapped, so this will only try to map the rest of page, see
2760 * that it is unmapped (typically even will not look into inode -
2761 * ->i_size will be enough for everything) and zero it out.
2762 * OTOH it's obviously correct and should make the page up-to-date.
2763 */
2764 if (!PageUptodate(page)) {
2765 err = mapping->a_ops->readpage(NULL, page);
2766 if (err != AOP_TRUNCATED_PAGE)
2767 wait_on_page_locked(page);
2768 } else {
2769 unlock_page(page);
2770 }
2771 page_cache_release(page);
2772 if (err < 0) 2748 if (err < 0)
2773 goto fail; 2749 goto fail;
2750 if (err < len-1)
2751 goto retry;
2752
2774 mark_inode_dirty(inode); 2753 mark_inode_dirty(inode);
2775 return 0; 2754 return 0;
2776fail_map:
2777 unlock_page(page);
2778 page_cache_release(page);
2779fail: 2755fail:
2780 return err; 2756 return err;
2781} 2757}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 08c7c7387fce..d29f90d00aa2 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -306,27 +306,50 @@ nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
306} 306}
307 307
308/* 308/*
309 * This does the "real" work of the write. The generic routine has 309 * This does the "real" work of the write. We must allocate and lock the
310 * allocated the page, locked it, done all the page alignment stuff 310 * page to be sent back to the generic routine, which then copies the
311 * calculations etc. Now we should just copy the data from user 311 * data from user space.
312 * space and write it back to the real medium..
313 * 312 *
314 * If the writer ends up delaying the write, the writer needs to 313 * If the writer ends up delaying the write, the writer needs to
315 * increment the page use counts until he is done with the page. 314 * increment the page use counts until he is done with the page.
316 */ 315 */
317static int nfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) 316static int nfs_write_begin(struct file *file, struct address_space *mapping,
317 loff_t pos, unsigned len, unsigned flags,
318 struct page **pagep, void **fsdata)
318{ 319{
319 return nfs_flush_incompatible(file, page); 320 int ret;
321 pgoff_t index;
322 struct page *page;
323 index = pos >> PAGE_CACHE_SHIFT;
324
325 page = __grab_cache_page(mapping, index);
326 if (!page)
327 return -ENOMEM;
328 *pagep = page;
329
330 ret = nfs_flush_incompatible(file, page);
331 if (ret) {
332 unlock_page(page);
333 page_cache_release(page);
334 }
335 return ret;
320} 336}
321 337
322static int nfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to) 338static int nfs_write_end(struct file *file, struct address_space *mapping,
339 loff_t pos, unsigned len, unsigned copied,
340 struct page *page, void *fsdata)
323{ 341{
324 long status; 342 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
343 int status;
325 344
326 lock_kernel(); 345 lock_kernel();
327 status = nfs_updatepage(file, page, offset, to-offset); 346 status = nfs_updatepage(file, page, offset, copied);
328 unlock_kernel(); 347 unlock_kernel();
329 return status; 348
349 unlock_page(page);
350 page_cache_release(page);
351
352 return status < 0 ? status : copied;
330} 353}
331 354
332static void nfs_invalidate_page(struct page *page, unsigned long offset) 355static void nfs_invalidate_page(struct page *page, unsigned long offset)
@@ -354,8 +377,8 @@ const struct address_space_operations nfs_file_aops = {
354 .set_page_dirty = __set_page_dirty_nobuffers, 377 .set_page_dirty = __set_page_dirty_nobuffers,
355 .writepage = nfs_writepage, 378 .writepage = nfs_writepage,
356 .writepages = nfs_writepages, 379 .writepages = nfs_writepages,
357 .prepare_write = nfs_prepare_write, 380 .write_begin = nfs_write_begin,
358 .commit_write = nfs_commit_write, 381 .write_end = nfs_write_end,
359 .invalidatepage = nfs_invalidate_page, 382 .invalidatepage = nfs_invalidate_page,
360 .releasepage = nfs_release_page, 383 .releasepage = nfs_release_page,
361#ifdef CONFIG_NFS_DIRECTIO 384#ifdef CONFIG_NFS_DIRECTIO
@@ -369,18 +392,35 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
369 struct file *filp = vma->vm_file; 392 struct file *filp = vma->vm_file;
370 unsigned pagelen; 393 unsigned pagelen;
371 int ret = -EINVAL; 394 int ret = -EINVAL;
395 void *fsdata;
396 struct address_space *mapping;
397 loff_t offset;
372 398
373 lock_page(page); 399 lock_page(page);
374 if (page->mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping) 400 mapping = page->mapping;
375 goto out_unlock; 401 if (mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping) {
402 unlock_page(page);
403 return -EINVAL;
404 }
376 pagelen = nfs_page_length(page); 405 pagelen = nfs_page_length(page);
377 if (pagelen == 0) 406 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
378 goto out_unlock;
379 ret = nfs_prepare_write(filp, page, 0, pagelen);
380 if (!ret)
381 ret = nfs_commit_write(filp, page, 0, pagelen);
382out_unlock:
383 unlock_page(page); 407 unlock_page(page);
408
409 /*
410 * we can use mapping after releasing the page lock, because:
411 * we hold mmap_sem on the fault path, which should pin the vma
412 * which should pin the file, which pins the dentry which should
413 * hold a reference on inode.
414 */
415
416 if (pagelen) {
417 struct page *page2 = NULL;
418 ret = nfs_write_begin(filp, mapping, offset, pagelen,
419 0, &page2, &fsdata);
420 if (!ret)
421 ret = nfs_write_end(filp, mapping, offset, pagelen,
422 pagelen, page2, fsdata);
423 }
384 return ret; 424 return ret;
385} 425}
386 426
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index cba899a3494e..04b266729802 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -861,9 +861,9 @@ exp_get_fsid_key(svc_client *clp, int fsid)
861 return exp_find_key(clp, FSID_NUM, fsidv, NULL); 861 return exp_find_key(clp, FSID_NUM, fsidv, NULL);
862} 862}
863 863
864svc_export * 864static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt,
865exp_get_by_name(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry, 865 struct dentry *dentry,
866 struct cache_req *reqp) 866 struct cache_req *reqp)
867{ 867{
868 struct svc_export *exp, key; 868 struct svc_export *exp, key;
869 int err; 869 int err;
@@ -887,9 +887,9 @@ exp_get_by_name(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry,
887/* 887/*
888 * Find the export entry for a given dentry. 888 * Find the export entry for a given dentry.
889 */ 889 */
890struct svc_export * 890static struct svc_export *exp_parent(svc_client *clp, struct vfsmount *mnt,
891exp_parent(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry, 891 struct dentry *dentry,
892 struct cache_req *reqp) 892 struct cache_req *reqp)
893{ 893{
894 svc_export *exp; 894 svc_export *exp;
895 895
@@ -1214,9 +1214,8 @@ out:
1214 return err; 1214 return err;
1215} 1215}
1216 1216
1217struct svc_export * 1217static struct svc_export *exp_find(struct auth_domain *clp, int fsid_type,
1218exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv, 1218 u32 *fsidv, struct cache_req *reqp)
1219 struct cache_req *reqp)
1220{ 1219{
1221 struct svc_export *exp; 1220 struct svc_export *exp;
1222 struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp); 1221 struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 34d10452c56d..c69c1b300155 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1724,9 +1724,9 @@ out:
1724 return ret; 1724 return ret;
1725} 1725}
1726 1726
1727int ocfs2_write_begin(struct file *file, struct address_space *mapping, 1727static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
1728 loff_t pos, unsigned len, unsigned flags, 1728 loff_t pos, unsigned len, unsigned flags,
1729 struct page **pagep, void **fsdata) 1729 struct page **pagep, void **fsdata)
1730{ 1730{
1731 int ret; 1731 int ret;
1732 struct buffer_head *di_bh = NULL; 1732 struct buffer_head *di_bh = NULL;
@@ -1877,9 +1877,9 @@ out_write_size:
1877 return copied; 1877 return copied;
1878} 1878}
1879 1879
1880int ocfs2_write_end(struct file *file, struct address_space *mapping, 1880static int ocfs2_write_end(struct file *file, struct address_space *mapping,
1881 loff_t pos, unsigned len, unsigned copied, 1881 loff_t pos, unsigned len, unsigned copied,
1882 struct page *page, void *fsdata) 1882 struct page *page, void *fsdata)
1883{ 1883{
1884 int ret; 1884 int ret;
1885 struct inode *inode = mapping->host; 1885 struct inode *inode = mapping->host;
@@ -1896,6 +1896,8 @@ int ocfs2_write_end(struct file *file, struct address_space *mapping,
1896const struct address_space_operations ocfs2_aops = { 1896const struct address_space_operations ocfs2_aops = {
1897 .readpage = ocfs2_readpage, 1897 .readpage = ocfs2_readpage,
1898 .writepage = ocfs2_writepage, 1898 .writepage = ocfs2_writepage,
1899 .write_begin = ocfs2_write_begin,
1900 .write_end = ocfs2_write_end,
1899 .bmap = ocfs2_bmap, 1901 .bmap = ocfs2_bmap,
1900 .sync_page = block_sync_page, 1902 .sync_page = block_sync_page,
1901 .direct_IO = ocfs2_direct_IO, 1903 .direct_IO = ocfs2_direct_IO,
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 113560877dbb..503e49232e11 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -44,14 +44,6 @@ int walk_page_buffers( handle_t *handle,
44 int (*fn)( handle_t *handle, 44 int (*fn)( handle_t *handle,
45 struct buffer_head *bh)); 45 struct buffer_head *bh));
46 46
47int ocfs2_write_begin(struct file *file, struct address_space *mapping,
48 loff_t pos, unsigned len, unsigned flags,
49 struct page **pagep, void **fsdata);
50
51int ocfs2_write_end(struct file *file, struct address_space *mapping,
52 loff_t pos, unsigned len, unsigned copied,
53 struct page *page, void *fsdata);
54
55int ocfs2_write_end_nolock(struct address_space *mapping, 47int ocfs2_write_end_nolock(struct address_space *mapping,
56 loff_t pos, unsigned len, unsigned copied, 48 loff_t pos, unsigned len, unsigned copied,
57 struct page *page, void *fsdata); 49 struct page *page, void *fsdata);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index a62b14eb4065..f92fe91ff260 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1881,143 +1881,13 @@ out:
1881 return ret; 1881 return ret;
1882} 1882}
1883 1883
1884static inline void
1885ocfs2_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes)
1886{
1887 const struct iovec *iov = *iovp;
1888 size_t base = *basep;
1889
1890 do {
1891 int copy = min(bytes, iov->iov_len - base);
1892
1893 bytes -= copy;
1894 base += copy;
1895 if (iov->iov_len == base) {
1896 iov++;
1897 base = 0;
1898 }
1899 } while (bytes);
1900 *iovp = iov;
1901 *basep = base;
1902}
1903
1904static struct page * ocfs2_get_write_source(char **ret_src_buf,
1905 const struct iovec *cur_iov,
1906 size_t iov_offset)
1907{
1908 int ret;
1909 char *buf = cur_iov->iov_base + iov_offset;
1910 struct page *src_page = NULL;
1911 unsigned long off;
1912
1913 off = (unsigned long)(buf) & ~PAGE_CACHE_MASK;
1914
1915 if (!segment_eq(get_fs(), KERNEL_DS)) {
1916 /*
1917 * Pull in the user page. We want to do this outside
1918 * of the meta data locks in order to preserve locking
1919 * order in case of page fault.
1920 */
1921 ret = get_user_pages(current, current->mm,
1922 (unsigned long)buf & PAGE_CACHE_MASK, 1,
1923 0, 0, &src_page, NULL);
1924 if (ret == 1)
1925 *ret_src_buf = kmap(src_page) + off;
1926 else
1927 src_page = ERR_PTR(-EFAULT);
1928 } else {
1929 *ret_src_buf = buf;
1930 }
1931
1932 return src_page;
1933}
1934
1935static void ocfs2_put_write_source(struct page *page)
1936{
1937 if (page) {
1938 kunmap(page);
1939 page_cache_release(page);
1940 }
1941}
1942
1943static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos,
1944 const struct iovec *iov,
1945 unsigned long nr_segs,
1946 size_t count,
1947 ssize_t o_direct_written)
1948{
1949 int ret = 0;
1950 ssize_t copied, total = 0;
1951 size_t iov_offset = 0, bytes;
1952 loff_t pos;
1953 const struct iovec *cur_iov = iov;
1954 struct page *user_page, *page;
1955 char * uninitialized_var(buf);
1956 char *dst;
1957 void *fsdata;
1958
1959 /*
1960 * handle partial DIO write. Adjust cur_iov if needed.
1961 */
1962 ocfs2_set_next_iovec(&cur_iov, &iov_offset, o_direct_written);
1963
1964 do {
1965 pos = *ppos;
1966
1967 user_page = ocfs2_get_write_source(&buf, cur_iov, iov_offset);
1968 if (IS_ERR(user_page)) {
1969 ret = PTR_ERR(user_page);
1970 goto out;
1971 }
1972
1973 /* Stay within our page boundaries */
1974 bytes = min((PAGE_CACHE_SIZE - ((unsigned long)pos & ~PAGE_CACHE_MASK)),
1975 (PAGE_CACHE_SIZE - ((unsigned long)buf & ~PAGE_CACHE_MASK)));
1976 /* Stay within the vector boundary */
1977 bytes = min_t(size_t, bytes, cur_iov->iov_len - iov_offset);
1978 /* Stay within count */
1979 bytes = min(bytes, count);
1980
1981 page = NULL;
1982 ret = ocfs2_write_begin(file, file->f_mapping, pos, bytes, 0,
1983 &page, &fsdata);
1984 if (ret) {
1985 mlog_errno(ret);
1986 goto out;
1987 }
1988
1989 dst = kmap_atomic(page, KM_USER0);
1990 memcpy(dst + (pos & (loff_t)(PAGE_CACHE_SIZE - 1)), buf, bytes);
1991 kunmap_atomic(dst, KM_USER0);
1992 flush_dcache_page(page);
1993 ocfs2_put_write_source(user_page);
1994
1995 copied = ocfs2_write_end(file, file->f_mapping, pos, bytes,
1996 bytes, page, fsdata);
1997 if (copied < 0) {
1998 mlog_errno(copied);
1999 ret = copied;
2000 goto out;
2001 }
2002
2003 total += copied;
2004 *ppos = pos + copied;
2005 count -= copied;
2006
2007 ocfs2_set_next_iovec(&cur_iov, &iov_offset, copied);
2008 } while(count);
2009
2010out:
2011 return total ? total : ret;
2012}
2013
2014static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, 1884static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
2015 const struct iovec *iov, 1885 const struct iovec *iov,
2016 unsigned long nr_segs, 1886 unsigned long nr_segs,
2017 loff_t pos) 1887 loff_t pos)
2018{ 1888{
2019 int ret, direct_io, appending, rw_level, have_alloc_sem = 0; 1889 int ret, direct_io, appending, rw_level, have_alloc_sem = 0;
2020 int can_do_direct, sync = 0; 1890 int can_do_direct;
2021 ssize_t written = 0; 1891 ssize_t written = 0;
2022 size_t ocount; /* original count */ 1892 size_t ocount; /* original count */
2023 size_t count; /* after file limit checks */ 1893 size_t count; /* after file limit checks */
@@ -2033,12 +1903,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
2033 if (iocb->ki_left == 0) 1903 if (iocb->ki_left == 0)
2034 return 0; 1904 return 0;
2035 1905
2036 ret = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
2037 if (ret)
2038 return ret;
2039
2040 count = ocount;
2041
2042 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 1906 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
2043 1907
2044 appending = file->f_flags & O_APPEND ? 1 : 0; 1908 appending = file->f_flags & O_APPEND ? 1 : 0;
@@ -2082,33 +1946,23 @@ relock:
2082 rw_level = -1; 1946 rw_level = -1;
2083 1947
2084 direct_io = 0; 1948 direct_io = 0;
2085 sync = 1;
2086 goto relock; 1949 goto relock;
2087 } 1950 }
2088 1951
2089 if (!sync && ((file->f_flags & O_SYNC) || IS_SYNC(inode)))
2090 sync = 1;
2091
2092 /*
2093 * XXX: Is it ok to execute these checks a second time?
2094 */
2095 ret = generic_write_checks(file, ppos, &count, S_ISBLK(inode->i_mode));
2096 if (ret)
2097 goto out;
2098
2099 /*
2100 * Set pos so that sync_page_range_nolock() below understands
2101 * where to start from. We might've moved it around via the
2102 * calls above. The range we want to actually sync starts from
2103 * *ppos here.
2104 *
2105 */
2106 pos = *ppos;
2107
2108 /* communicate with ocfs2_dio_end_io */ 1952 /* communicate with ocfs2_dio_end_io */
2109 ocfs2_iocb_set_rw_locked(iocb, rw_level); 1953 ocfs2_iocb_set_rw_locked(iocb, rw_level);
2110 1954
2111 if (direct_io) { 1955 if (direct_io) {
1956 ret = generic_segment_checks(iov, &nr_segs, &ocount,
1957 VERIFY_READ);
1958 if (ret)
1959 goto out_dio;
1960
1961 ret = generic_write_checks(file, ppos, &count,
1962 S_ISBLK(inode->i_mode));
1963 if (ret)
1964 goto out_dio;
1965
2112 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, 1966 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
2113 ppos, count, ocount); 1967 ppos, count, ocount);
2114 if (written < 0) { 1968 if (written < 0) {
@@ -2116,14 +1970,8 @@ relock:
2116 goto out_dio; 1970 goto out_dio;
2117 } 1971 }
2118 } else { 1972 } else {
2119 written = ocfs2_file_buffered_write(file, ppos, iov, nr_segs, 1973 written = generic_file_aio_write_nolock(iocb, iov, nr_segs,
2120 count, written); 1974 *ppos);
2121 if (written < 0) {
2122 ret = written;
2123 if (ret != -EFAULT || ret != -ENOSPC)
2124 mlog_errno(ret);
2125 goto out;
2126 }
2127 } 1975 }
2128 1976
2129out_dio: 1977out_dio:
@@ -2153,97 +2001,12 @@ out_sems:
2153 if (have_alloc_sem) 2001 if (have_alloc_sem)
2154 up_read(&inode->i_alloc_sem); 2002 up_read(&inode->i_alloc_sem);
2155 2003
2156 if (written > 0 && sync) {
2157 ssize_t err;
2158
2159 err = sync_page_range_nolock(inode, file->f_mapping, pos, count);
2160 if (err < 0)
2161 written = err;
2162 }
2163
2164 mutex_unlock(&inode->i_mutex); 2004 mutex_unlock(&inode->i_mutex);
2165 2005
2166 mlog_exit(ret); 2006 mlog_exit(ret);
2167 return written ? written : ret; 2007 return written ? written : ret;
2168} 2008}
2169 2009
2170static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
2171 struct pipe_buffer *buf,
2172 struct splice_desc *sd)
2173{
2174 int ret, count;
2175 ssize_t copied = 0;
2176 struct file *file = sd->u.file;
2177 unsigned int offset;
2178 struct page *page = NULL;
2179 void *fsdata;
2180 char *src, *dst;
2181
2182 ret = buf->ops->confirm(pipe, buf);
2183 if (ret)
2184 goto out;
2185
2186 offset = sd->pos & ~PAGE_CACHE_MASK;
2187 count = sd->len;
2188 if (count + offset > PAGE_CACHE_SIZE)
2189 count = PAGE_CACHE_SIZE - offset;
2190
2191 ret = ocfs2_write_begin(file, file->f_mapping, sd->pos, count, 0,
2192 &page, &fsdata);
2193 if (ret) {
2194 mlog_errno(ret);
2195 goto out;
2196 }
2197
2198 src = buf->ops->map(pipe, buf, 1);
2199 dst = kmap_atomic(page, KM_USER1);
2200 memcpy(dst + offset, src + buf->offset, count);
2201 kunmap_atomic(dst, KM_USER1);
2202 buf->ops->unmap(pipe, buf, src);
2203
2204 copied = ocfs2_write_end(file, file->f_mapping, sd->pos, count, count,
2205 page, fsdata);
2206 if (copied < 0) {
2207 mlog_errno(copied);
2208 ret = copied;
2209 goto out;
2210 }
2211out:
2212
2213 return copied ? copied : ret;
2214}
2215
2216static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe,
2217 struct file *out,
2218 loff_t *ppos,
2219 size_t len,
2220 unsigned int flags)
2221{
2222 int ret, err;
2223 struct address_space *mapping = out->f_mapping;
2224 struct inode *inode = mapping->host;
2225 struct splice_desc sd = {
2226 .total_len = len,
2227 .flags = flags,
2228 .pos = *ppos,
2229 .u.file = out,
2230 };
2231
2232 ret = __splice_from_pipe(pipe, &sd, ocfs2_splice_write_actor);
2233 if (ret > 0) {
2234 *ppos += ret;
2235
2236 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
2237 err = generic_osync_inode(inode, mapping,
2238 OSYNC_METADATA|OSYNC_DATA);
2239 if (err)
2240 ret = err;
2241 }
2242 }
2243
2244 return ret;
2245}
2246
2247static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, 2010static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
2248 struct file *out, 2011 struct file *out,
2249 loff_t *ppos, 2012 loff_t *ppos,
@@ -2273,8 +2036,7 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
2273 goto out_unlock; 2036 goto out_unlock;
2274 } 2037 }
2275 2038
2276 /* ok, we're done with i_size and alloc work */ 2039 ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags);
2277 ret = __ocfs2_file_splice_write(pipe, out, ppos, len, flags);
2278 2040
2279out_unlock: 2041out_unlock:
2280 ocfs2_rw_unlock(inode, 1); 2042 ocfs2_rw_unlock(inode, 1);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index e5d0953d4db1..78fdfea1a7f8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -492,7 +492,7 @@ static ssize_t proc_info_read(struct file * file, char __user * buf,
492 count = PROC_BLOCK_SIZE; 492 count = PROC_BLOCK_SIZE;
493 493
494 length = -ENOMEM; 494 length = -ENOMEM;
495 if (!(page = __get_free_page(GFP_KERNEL))) 495 if (!(page = __get_free_page(GFP_TEMPORARY)))
496 goto out; 496 goto out;
497 497
498 length = PROC_I(inode)->op.proc_read(task, (char*)page); 498 length = PROC_I(inode)->op.proc_read(task, (char*)page);
@@ -532,7 +532,7 @@ static ssize_t mem_read(struct file * file, char __user * buf,
532 goto out; 532 goto out;
533 533
534 ret = -ENOMEM; 534 ret = -ENOMEM;
535 page = (char *)__get_free_page(GFP_USER); 535 page = (char *)__get_free_page(GFP_TEMPORARY);
536 if (!page) 536 if (!page)
537 goto out; 537 goto out;
538 538
@@ -602,7 +602,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
602 goto out; 602 goto out;
603 603
604 copied = -ENOMEM; 604 copied = -ENOMEM;
605 page = (char *)__get_free_page(GFP_USER); 605 page = (char *)__get_free_page(GFP_TEMPORARY);
606 if (!page) 606 if (!page)
607 goto out; 607 goto out;
608 608
@@ -788,7 +788,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
788 /* No partial writes. */ 788 /* No partial writes. */
789 return -EINVAL; 789 return -EINVAL;
790 } 790 }
791 page = (char*)__get_free_page(GFP_USER); 791 page = (char*)__get_free_page(GFP_TEMPORARY);
792 if (!page) 792 if (!page)
793 return -ENOMEM; 793 return -ENOMEM;
794 length = -EFAULT; 794 length = -EFAULT;
@@ -954,7 +954,8 @@ static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
954 char __user *buffer, int buflen) 954 char __user *buffer, int buflen)
955{ 955{
956 struct inode * inode; 956 struct inode * inode;
957 char *tmp = (char*)__get_free_page(GFP_KERNEL), *path; 957 char *tmp = (char*)__get_free_page(GFP_TEMPORARY);
958 char *path;
958 int len; 959 int len;
959 960
960 if (!tmp) 961 if (!tmp)
@@ -1726,7 +1727,7 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
1726 goto out; 1727 goto out;
1727 1728
1728 length = -ENOMEM; 1729 length = -ENOMEM;
1729 page = (char*)__get_free_page(GFP_USER); 1730 page = (char*)__get_free_page(GFP_TEMPORARY);
1730 if (!page) 1731 if (!page)
1731 goto out; 1732 goto out;
1732 1733
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index b5e7155d30d8..1bdb62435758 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -74,7 +74,7 @@ proc_file_read(struct file *file, char __user *buf, size_t nbytes,
74 nbytes = MAX_NON_LFS - pos; 74 nbytes = MAX_NON_LFS - pos;
75 75
76 dp = PDE(inode); 76 dp = PDE(inode);
77 if (!(page = (char*) __get_free_page(GFP_KERNEL))) 77 if (!(page = (char*) __get_free_page(GFP_TEMPORARY)))
78 return -ENOMEM; 78 return -ENOMEM;
79 79
80 while ((nbytes > 0) && !eof) { 80 while ((nbytes > 0) && !eof) {
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 0071939c0095..5de7f874d95c 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -229,6 +229,19 @@ static const struct file_operations fragmentation_file_operations = {
229 .release = seq_release, 229 .release = seq_release,
230}; 230};
231 231
232extern struct seq_operations pagetypeinfo_op;
233static int pagetypeinfo_open(struct inode *inode, struct file *file)
234{
235 return seq_open(file, &pagetypeinfo_op);
236}
237
238static const struct file_operations pagetypeinfo_file_ops = {
239 .open = pagetypeinfo_open,
240 .read = seq_read,
241 .llseek = seq_lseek,
242 .release = seq_release,
243};
244
232extern struct seq_operations zoneinfo_op; 245extern struct seq_operations zoneinfo_op;
233static int zoneinfo_open(struct inode *inode, struct file *file) 246static int zoneinfo_open(struct inode *inode, struct file *file)
234{ 247{
@@ -724,6 +737,7 @@ void __init proc_misc_init(void)
724#endif 737#endif
725#endif 738#endif
726 create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); 739 create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
740 create_seq_entry("pagetypeinfo", S_IRUGO, &pagetypeinfo_file_ops);
727 create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); 741 create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
728 create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations); 742 create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations);
729#ifdef CONFIG_BLOCK 743#ifdef CONFIG_BLOCK
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 1bc8d873a9e1..df8bd87e49b7 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -433,16 +433,21 @@ static int qnx4_writepage(struct page *page, struct writeback_control *wbc)
433{ 433{
434 return block_write_full_page(page,qnx4_get_block, wbc); 434 return block_write_full_page(page,qnx4_get_block, wbc);
435} 435}
436
436static int qnx4_readpage(struct file *file, struct page *page) 437static int qnx4_readpage(struct file *file, struct page *page)
437{ 438{
438 return block_read_full_page(page,qnx4_get_block); 439 return block_read_full_page(page,qnx4_get_block);
439} 440}
440static int qnx4_prepare_write(struct file *file, struct page *page, 441
441 unsigned from, unsigned to) 442static int qnx4_write_begin(struct file *file, struct address_space *mapping,
443 loff_t pos, unsigned len, unsigned flags,
444 struct page **pagep, void **fsdata)
442{ 445{
443 struct qnx4_inode_info *qnx4_inode = qnx4_i(page->mapping->host); 446 struct qnx4_inode_info *qnx4_inode = qnx4_i(mapping->host);
444 return cont_prepare_write(page, from, to, qnx4_get_block, 447 *pagep = NULL;
445 &qnx4_inode->mmu_private); 448 return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
449 qnx4_get_block,
450 &qnx4_inode->mmu_private);
446} 451}
447static sector_t qnx4_bmap(struct address_space *mapping, sector_t block) 452static sector_t qnx4_bmap(struct address_space *mapping, sector_t block)
448{ 453{
@@ -452,8 +457,8 @@ static const struct address_space_operations qnx4_aops = {
452 .readpage = qnx4_readpage, 457 .readpage = qnx4_readpage,
453 .writepage = qnx4_writepage, 458 .writepage = qnx4_writepage,
454 .sync_page = block_sync_page, 459 .sync_page = block_sync_page,
455 .prepare_write = qnx4_prepare_write, 460 .write_begin = qnx4_write_begin,
456 .commit_write = generic_commit_write, 461 .write_end = generic_write_end,
457 .bmap = qnx4_bmap 462 .bmap = qnx4_bmap
458}; 463};
459 464
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 97bdc0b2f9d2..b41a514b0976 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -29,8 +29,8 @@
29 29
30const struct address_space_operations ramfs_aops = { 30const struct address_space_operations ramfs_aops = {
31 .readpage = simple_readpage, 31 .readpage = simple_readpage,
32 .prepare_write = simple_prepare_write, 32 .write_begin = simple_write_begin,
33 .commit_write = simple_commit_write, 33 .write_end = simple_write_end,
34 .set_page_dirty = __set_page_dirty_no_writeback, 34 .set_page_dirty = __set_page_dirty_no_writeback,
35}; 35};
36 36
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 237fe8b8e819..0989bc2c2f69 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -29,8 +29,8 @@ static int ramfs_nommu_setattr(struct dentry *, struct iattr *);
29 29
30const struct address_space_operations ramfs_aops = { 30const struct address_space_operations ramfs_aops = {
31 .readpage = simple_readpage, 31 .readpage = simple_readpage,
32 .prepare_write = simple_prepare_write, 32 .write_begin = simple_write_begin,
33 .commit_write = simple_commit_write, 33 .write_end = simple_write_end,
34 .set_page_dirty = __set_page_dirty_no_writeback, 34 .set_page_dirty = __set_page_dirty_no_writeback,
35}; 35};
36 36
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 2070aeee2a52..a804903d31d1 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -153,608 +153,6 @@ static int reiserfs_sync_file(struct file *p_s_filp,
153 return (n_err < 0) ? -EIO : 0; 153 return (n_err < 0) ? -EIO : 0;
154} 154}
155 155
156/* I really do not want to play with memory shortage right now, so
157 to simplify the code, we are not going to write more than this much pages at
158 a time. This still should considerably improve performance compared to 4k
159 at a time case. This is 32 pages of 4k size. */
160#define REISERFS_WRITE_PAGES_AT_A_TIME (128 * 1024) / PAGE_CACHE_SIZE
161
162/* Allocates blocks for a file to fulfil write request.
163 Maps all unmapped but prepared pages from the list.
164 Updates metadata with newly allocated blocknumbers as needed */
165static int reiserfs_allocate_blocks_for_region(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode we work with */
166 loff_t pos, /* Writing position */
167 int num_pages, /* number of pages write going
168 to touch */
169 int write_bytes, /* amount of bytes to write */
170 struct page **prepared_pages, /* array of
171 prepared pages
172 */
173 int blocks_to_allocate /* Amount of blocks we
174 need to allocate to
175 fit the data into file
176 */
177 )
178{
179 struct cpu_key key; // cpu key of item that we are going to deal with
180 struct item_head *ih; // pointer to item head that we are going to deal with
181 struct buffer_head *bh; // Buffer head that contains items that we are going to deal with
182 __le32 *item; // pointer to item we are going to deal with
183 INITIALIZE_PATH(path); // path to item, that we are going to deal with.
184 b_blocknr_t *allocated_blocks; // Pointer to a place where allocated blocknumbers would be stored.
185 reiserfs_blocknr_hint_t hint; // hint structure for block allocator.
186 size_t res; // return value of various functions that we call.
187 int curr_block; // current block used to keep track of unmapped blocks.
188 int i; // loop counter
189 int itempos; // position in item
190 unsigned int from = (pos & (PAGE_CACHE_SIZE - 1)); // writing position in
191 // first page
192 unsigned int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1; /* last modified byte offset in last page */
193 __u64 hole_size; // amount of blocks for a file hole, if it needed to be created.
194 int modifying_this_item = 0; // Flag for items traversal code to keep track
195 // of the fact that we already prepared
196 // current block for journal
197 int will_prealloc = 0;
198 RFALSE(!blocks_to_allocate,
199 "green-9004: tried to allocate zero blocks?");
200
201 /* only preallocate if this is a small write */
202 if (REISERFS_I(inode)->i_prealloc_count ||
203 (!(write_bytes & (inode->i_sb->s_blocksize - 1)) &&
204 blocks_to_allocate <
205 REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize))
206 will_prealloc =
207 REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize;
208
209 allocated_blocks = kmalloc((blocks_to_allocate + will_prealloc) *
210 sizeof(b_blocknr_t), GFP_NOFS);
211 if (!allocated_blocks)
212 return -ENOMEM;
213
214 /* First we compose a key to point at the writing position, we want to do
215 that outside of any locking region. */
216 make_cpu_key(&key, inode, pos + 1, TYPE_ANY, 3 /*key length */ );
217
218 /* If we came here, it means we absolutely need to open a transaction,
219 since we need to allocate some blocks */
220 reiserfs_write_lock(inode->i_sb); // Journaling stuff and we need that.
221 res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb)); // Wish I know if this number enough
222 if (res)
223 goto error_exit;
224 reiserfs_update_inode_transaction(inode);
225
226 /* Look for the in-tree position of our write, need path for block allocator */
227 res = search_for_position_by_key(inode->i_sb, &key, &path);
228 if (res == IO_ERROR) {
229 res = -EIO;
230 goto error_exit;
231 }
232
233 /* Allocate blocks */
234 /* First fill in "hint" structure for block allocator */
235 hint.th = th; // transaction handle.
236 hint.path = &path; // Path, so that block allocator can determine packing locality or whatever it needs to determine.
237 hint.inode = inode; // Inode is needed by block allocator too.
238 hint.search_start = 0; // We have no hint on where to search free blocks for block allocator.
239 hint.key = key.on_disk_key; // on disk key of file.
240 hint.block = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); // Number of disk blocks this file occupies already.
241 hint.formatted_node = 0; // We are allocating blocks for unformatted node.
242 hint.preallocate = will_prealloc;
243
244 /* Call block allocator to allocate blocks */
245 res =
246 reiserfs_allocate_blocknrs(&hint, allocated_blocks,
247 blocks_to_allocate, blocks_to_allocate);
248 if (res != CARRY_ON) {
249 if (res == NO_DISK_SPACE) {
250 /* We flush the transaction in case of no space. This way some
251 blocks might become free */
252 SB_JOURNAL(inode->i_sb)->j_must_wait = 1;
253 res = restart_transaction(th, inode, &path);
254 if (res)
255 goto error_exit;
256
257 /* We might have scheduled, so search again */
258 res =
259 search_for_position_by_key(inode->i_sb, &key,
260 &path);
261 if (res == IO_ERROR) {
262 res = -EIO;
263 goto error_exit;
264 }
265
266 /* update changed info for hint structure. */
267 res =
268 reiserfs_allocate_blocknrs(&hint, allocated_blocks,
269 blocks_to_allocate,
270 blocks_to_allocate);
271 if (res != CARRY_ON) {
272 res = res == QUOTA_EXCEEDED ? -EDQUOT : -ENOSPC;
273 pathrelse(&path);
274 goto error_exit;
275 }
276 } else {
277 res = res == QUOTA_EXCEEDED ? -EDQUOT : -ENOSPC;
278 pathrelse(&path);
279 goto error_exit;
280 }
281 }
282#ifdef __BIG_ENDIAN
283 // Too bad, I have not found any way to convert a given region from
284 // cpu format to little endian format
285 {
286 int i;
287 for (i = 0; i < blocks_to_allocate; i++)
288 allocated_blocks[i] = cpu_to_le32(allocated_blocks[i]);
289 }
290#endif
291
292 /* Blocks allocating well might have scheduled and tree might have changed,
293 let's search the tree again */
294 /* find where in the tree our write should go */
295 res = search_for_position_by_key(inode->i_sb, &key, &path);
296 if (res == IO_ERROR) {
297 res = -EIO;
298 goto error_exit_free_blocks;
299 }
300
301 bh = get_last_bh(&path); // Get a bufferhead for last element in path.
302 ih = get_ih(&path); // Get a pointer to last item head in path.
303 item = get_item(&path); // Get a pointer to last item in path
304
305 /* Let's see what we have found */
306 if (res != POSITION_FOUND) { /* position not found, this means that we
307 might need to append file with holes
308 first */
309 // Since we are writing past the file's end, we need to find out if
310 // there is a hole that needs to be inserted before our writing
311 // position, and how many blocks it is going to cover (we need to
312 // populate pointers to file blocks representing the hole with zeros)
313
314 {
315 int item_offset = 1;
316 /*
317 * if ih is stat data, its offset is 0 and we don't want to
318 * add 1 to pos in the hole_size calculation
319 */
320 if (is_statdata_le_ih(ih))
321 item_offset = 0;
322 hole_size = (pos + item_offset -
323 (le_key_k_offset
324 (get_inode_item_key_version(inode),
325 &(ih->ih_key)) + op_bytes_number(ih,
326 inode->
327 i_sb->
328 s_blocksize)))
329 >> inode->i_sb->s_blocksize_bits;
330 }
331
332 if (hole_size > 0) {
333 int to_paste = min_t(__u64, hole_size, MAX_ITEM_LEN(inode->i_sb->s_blocksize) / UNFM_P_SIZE); // How much data to insert first time.
334 /* area filled with zeroes, to supply as list of zero blocknumbers
335 We allocate it outside of loop just in case loop would spin for
336 several iterations. */
337 char *zeros = kzalloc(to_paste * UNFM_P_SIZE, GFP_ATOMIC); // We cannot insert more than MAX_ITEM_LEN bytes anyway.
338 if (!zeros) {
339 res = -ENOMEM;
340 goto error_exit_free_blocks;
341 }
342 do {
343 to_paste =
344 min_t(__u64, hole_size,
345 MAX_ITEM_LEN(inode->i_sb->
346 s_blocksize) /
347 UNFM_P_SIZE);
348 if (is_indirect_le_ih(ih)) {
349 /* Ok, there is existing indirect item already. Need to append it */
350 /* Calculate position past inserted item */
351 make_cpu_key(&key, inode,
352 le_key_k_offset
353 (get_inode_item_key_version
354 (inode),
355 &(ih->ih_key)) +
356 op_bytes_number(ih,
357 inode->
358 i_sb->
359 s_blocksize),
360 TYPE_INDIRECT, 3);
361 res =
362 reiserfs_paste_into_item(th, &path,
363 &key,
364 inode,
365 (char *)
366 zeros,
367 UNFM_P_SIZE
368 *
369 to_paste);
370 if (res) {
371 kfree(zeros);
372 goto error_exit_free_blocks;
373 }
374 } else if (is_statdata_le_ih(ih)) {
375 /* No existing item, create it */
376 /* item head for new item */
377 struct item_head ins_ih;
378
379 /* create a key for our new item */
380 make_cpu_key(&key, inode, 1,
381 TYPE_INDIRECT, 3);
382
383 /* Create new item head for our new item */
384 make_le_item_head(&ins_ih, &key,
385 key.version, 1,
386 TYPE_INDIRECT,
387 to_paste *
388 UNFM_P_SIZE,
389 0 /* free space */ );
390
391 /* Find where such item should live in the tree */
392 res =
393 search_item(inode->i_sb, &key,
394 &path);
395 if (res != ITEM_NOT_FOUND) {
396 /* item should not exist, otherwise we have error */
397 if (res != -ENOSPC) {
398 reiserfs_warning(inode->
399 i_sb,
400 "green-9008: search_by_key (%K) returned %d",
401 &key,
402 res);
403 }
404 res = -EIO;
405 kfree(zeros);
406 goto error_exit_free_blocks;
407 }
408 res =
409 reiserfs_insert_item(th, &path,
410 &key, &ins_ih,
411 inode,
412 (char *)zeros);
413 } else {
414 reiserfs_panic(inode->i_sb,
415 "green-9011: Unexpected key type %K\n",
416 &key);
417 }
418 if (res) {
419 kfree(zeros);
420 goto error_exit_free_blocks;
421 }
422 /* Now we want to check if transaction is too full, and if it is
423 we restart it. This will also free the path. */
424 if (journal_transaction_should_end
425 (th, th->t_blocks_allocated)) {
426 inode->i_size = cpu_key_k_offset(&key) +
427 (to_paste << inode->i_blkbits);
428 res =
429 restart_transaction(th, inode,
430 &path);
431 if (res) {
432 pathrelse(&path);
433 kfree(zeros);
434 goto error_exit;
435 }
436 }
437
438 /* Well, need to recalculate path and stuff */
439 set_cpu_key_k_offset(&key,
440 cpu_key_k_offset(&key) +
441 (to_paste << inode->
442 i_blkbits));
443 res =
444 search_for_position_by_key(inode->i_sb,
445 &key, &path);
446 if (res == IO_ERROR) {
447 res = -EIO;
448 kfree(zeros);
449 goto error_exit_free_blocks;
450 }
451 bh = get_last_bh(&path);
452 ih = get_ih(&path);
453 item = get_item(&path);
454 hole_size -= to_paste;
455 } while (hole_size);
456 kfree(zeros);
457 }
458 }
459 // Go through existing indirect items first
460 // replace all zeroes with blocknumbers from list
461 // Note that if no corresponding item was found, by previous search,
462 // it means there are no existing in-tree representation for file area
463 // we are going to overwrite, so there is nothing to scan through for holes.
464 for (curr_block = 0, itempos = path.pos_in_item;
465 curr_block < blocks_to_allocate && res == POSITION_FOUND;) {
466 retry:
467
468 if (itempos >= ih_item_len(ih) / UNFM_P_SIZE) {
469 /* We run out of data in this indirect item, let's look for another
470 one. */
471 /* First if we are already modifying current item, log it */
472 if (modifying_this_item) {
473 journal_mark_dirty(th, inode->i_sb, bh);
474 modifying_this_item = 0;
475 }
476 /* Then set the key to look for a new indirect item (offset of old
477 item is added to old item length */
478 set_cpu_key_k_offset(&key,
479 le_key_k_offset
480 (get_inode_item_key_version(inode),
481 &(ih->ih_key)) +
482 op_bytes_number(ih,
483 inode->i_sb->
484 s_blocksize));
485 /* Search ofor position of new key in the tree. */
486 res =
487 search_for_position_by_key(inode->i_sb, &key,
488 &path);
489 if (res == IO_ERROR) {
490 res = -EIO;
491 goto error_exit_free_blocks;
492 }
493 bh = get_last_bh(&path);
494 ih = get_ih(&path);
495 item = get_item(&path);
496 itempos = path.pos_in_item;
497 continue; // loop to check all kinds of conditions and so on.
498 }
499 /* Ok, we have correct position in item now, so let's see if it is
500 representing file hole (blocknumber is zero) and fill it if needed */
501 if (!item[itempos]) {
502 /* Ok, a hole. Now we need to check if we already prepared this
503 block to be journaled */
504 while (!modifying_this_item) { // loop until succeed
505 /* Well, this item is not journaled yet, so we must prepare
506 it for journal first, before we can change it */
507 struct item_head tmp_ih; // We copy item head of found item,
508 // here to detect if fs changed under
509 // us while we were preparing for
510 // journal.
511 int fs_gen; // We store fs generation here to find if someone
512 // changes fs under our feet
513
514 copy_item_head(&tmp_ih, ih); // Remember itemhead
515 fs_gen = get_generation(inode->i_sb); // remember fs generation
516 reiserfs_prepare_for_journal(inode->i_sb, bh, 1); // Prepare a buffer within which indirect item is stored for changing.
517 if (fs_changed(fs_gen, inode->i_sb)
518 && item_moved(&tmp_ih, &path)) {
519 // Sigh, fs was changed under us, we need to look for new
520 // location of item we are working with
521
522 /* unmark prepaerd area as journaled and search for it's
523 new position */
524 reiserfs_restore_prepared_buffer(inode->
525 i_sb,
526 bh);
527 res =
528 search_for_position_by_key(inode->
529 i_sb,
530 &key,
531 &path);
532 if (res == IO_ERROR) {
533 res = -EIO;
534 goto error_exit_free_blocks;
535 }
536 bh = get_last_bh(&path);
537 ih = get_ih(&path);
538 item = get_item(&path);
539 itempos = path.pos_in_item;
540 goto retry;
541 }
542 modifying_this_item = 1;
543 }
544 item[itempos] = allocated_blocks[curr_block]; // Assign new block
545 curr_block++;
546 }
547 itempos++;
548 }
549
550 if (modifying_this_item) { // We need to log last-accessed block, if it
551 // was modified, but not logged yet.
552 journal_mark_dirty(th, inode->i_sb, bh);
553 }
554
555 if (curr_block < blocks_to_allocate) {
556 // Oh, well need to append to indirect item, or to create indirect item
557 // if there weren't any
558 if (is_indirect_le_ih(ih)) {
559 // Existing indirect item - append. First calculate key for append
560 // position. We do not need to recalculate path as it should
561 // already point to correct place.
562 make_cpu_key(&key, inode,
563 le_key_k_offset(get_inode_item_key_version
564 (inode),
565 &(ih->ih_key)) +
566 op_bytes_number(ih,
567 inode->i_sb->s_blocksize),
568 TYPE_INDIRECT, 3);
569 res =
570 reiserfs_paste_into_item(th, &path, &key, inode,
571 (char *)(allocated_blocks +
572 curr_block),
573 UNFM_P_SIZE *
574 (blocks_to_allocate -
575 curr_block));
576 if (res) {
577 goto error_exit_free_blocks;
578 }
579 } else if (is_statdata_le_ih(ih)) {
580 // Last found item was statdata. That means we need to create indirect item.
581 struct item_head ins_ih; /* itemhead for new item */
582
583 /* create a key for our new item */
584 make_cpu_key(&key, inode, 1, TYPE_INDIRECT, 3); // Position one,
585 // because that's
586 // where first
587 // indirect item
588 // begins
589 /* Create new item head for our new item */
590 make_le_item_head(&ins_ih, &key, key.version, 1,
591 TYPE_INDIRECT,
592 (blocks_to_allocate -
593 curr_block) * UNFM_P_SIZE,
594 0 /* free space */ );
595 /* Find where such item should live in the tree */
596 res = search_item(inode->i_sb, &key, &path);
597 if (res != ITEM_NOT_FOUND) {
598 /* Well, if we have found such item already, or some error
599 occured, we need to warn user and return error */
600 if (res != -ENOSPC) {
601 reiserfs_warning(inode->i_sb,
602 "green-9009: search_by_key (%K) "
603 "returned %d", &key,
604 res);
605 }
606 res = -EIO;
607 goto error_exit_free_blocks;
608 }
609 /* Insert item into the tree with the data as its body */
610 res =
611 reiserfs_insert_item(th, &path, &key, &ins_ih,
612 inode,
613 (char *)(allocated_blocks +
614 curr_block));
615 } else {
616 reiserfs_panic(inode->i_sb,
617 "green-9010: unexpected item type for key %K\n",
618 &key);
619 }
620 }
621 // the caller is responsible for closing the transaction
622 // unless we return an error, they are also responsible for logging
623 // the inode.
624 //
625 pathrelse(&path);
626 /*
627 * cleanup prellocation from previous writes
628 * if this is a partial block write
629 */
630 if (write_bytes & (inode->i_sb->s_blocksize - 1))
631 reiserfs_discard_prealloc(th, inode);
632 reiserfs_write_unlock(inode->i_sb);
633
634 // go through all the pages/buffers and map the buffers to newly allocated
635 // blocks (so that system knows where to write these pages later).
636 curr_block = 0;
637 for (i = 0; i < num_pages; i++) {
638 struct page *page = prepared_pages[i]; //current page
639 struct buffer_head *head = page_buffers(page); // first buffer for a page
640 int block_start, block_end; // in-page offsets for buffers.
641
642 if (!page_buffers(page))
643 reiserfs_panic(inode->i_sb,
644 "green-9005: No buffers for prepared page???");
645
646 /* For each buffer in page */
647 for (bh = head, block_start = 0; bh != head || !block_start;
648 block_start = block_end, bh = bh->b_this_page) {
649 if (!bh)
650 reiserfs_panic(inode->i_sb,
651 "green-9006: Allocated but absent buffer for a page?");
652 block_end = block_start + inode->i_sb->s_blocksize;
653 if (i == 0 && block_end <= from)
654 /* if this buffer is before requested data to map, skip it */
655 continue;
656 if (i == num_pages - 1 && block_start >= to)
657 /* If this buffer is after requested data to map, abort
658 processing of current page */
659 break;
660
661 if (!buffer_mapped(bh)) { // Ok, unmapped buffer, need to map it
662 map_bh(bh, inode->i_sb,
663 le32_to_cpu(allocated_blocks
664 [curr_block]));
665 curr_block++;
666 set_buffer_new(bh);
667 }
668 }
669 }
670
671 RFALSE(curr_block > blocks_to_allocate,
672 "green-9007: Used too many blocks? weird");
673
674 kfree(allocated_blocks);
675 return 0;
676
677// Need to deal with transaction here.
678 error_exit_free_blocks:
679 pathrelse(&path);
680 // free blocks
681 for (i = 0; i < blocks_to_allocate; i++)
682 reiserfs_free_block(th, inode, le32_to_cpu(allocated_blocks[i]),
683 1);
684
685 error_exit:
686 if (th->t_trans_id) {
687 int err;
688 // update any changes we made to blk count
689 mark_inode_dirty(inode);
690 err =
691 journal_end(th, inode->i_sb,
692 JOURNAL_PER_BALANCE_CNT * 3 + 1 +
693 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb));
694 if (err)
695 res = err;
696 }
697 reiserfs_write_unlock(inode->i_sb);
698 kfree(allocated_blocks);
699
700 return res;
701}
702
703/* Unlock pages prepared by reiserfs_prepare_file_region_for_write */
704static void reiserfs_unprepare_pages(struct page **prepared_pages, /* list of locked pages */
705 size_t num_pages /* amount of pages */ )
706{
707 int i; // loop counter
708
709 for (i = 0; i < num_pages; i++) {
710 struct page *page = prepared_pages[i];
711
712 try_to_free_buffers(page);
713 unlock_page(page);
714 page_cache_release(page);
715 }
716}
717
718/* This function will copy data from userspace to specified pages within
719 supplied byte range */
720static int reiserfs_copy_from_user_to_file_region(loff_t pos, /* In-file position */
721 int num_pages, /* Number of pages affected */
722 int write_bytes, /* Amount of bytes to write */
723 struct page **prepared_pages, /* pointer to
724 array to
725 prepared pages
726 */
727 const char __user * buf /* Pointer to user-supplied
728 data */
729 )
730{
731 long page_fault = 0; // status of copy_from_user.
732 int i; // loop counter.
733 int offset; // offset in page
734
735 for (i = 0, offset = (pos & (PAGE_CACHE_SIZE - 1)); i < num_pages;
736 i++, offset = 0) {
737 size_t count = min_t(size_t, PAGE_CACHE_SIZE - offset, write_bytes); // How much of bytes to write to this page
738 struct page *page = prepared_pages[i]; // Current page we process.
739
740 fault_in_pages_readable(buf, count);
741
742 /* Copy data from userspace to the current page */
743 kmap(page);
744 page_fault = __copy_from_user(page_address(page) + offset, buf, count); // Copy the data.
745 /* Flush processor's dcache for this page */
746 flush_dcache_page(page);
747 kunmap(page);
748 buf += count;
749 write_bytes -= count;
750
751 if (page_fault)
752 break; // Was there a fault? abort.
753 }
754
755 return page_fault ? -EFAULT : 0;
756}
757
758/* taken fs/buffer.c:__block_commit_write */ 156/* taken fs/buffer.c:__block_commit_write */
759int reiserfs_commit_page(struct inode *inode, struct page *page, 157int reiserfs_commit_page(struct inode *inode, struct page *page,
760 unsigned from, unsigned to) 158 unsigned from, unsigned to)
@@ -824,432 +222,6 @@ int reiserfs_commit_page(struct inode *inode, struct page *page,
824 return ret; 222 return ret;
825} 223}
826 224
827/* Submit pages for write. This was separated from actual file copying
828 because we might want to allocate block numbers in-between.
829 This function assumes that caller will adjust file size to correct value. */
830static int reiserfs_submit_file_region_for_write(struct reiserfs_transaction_handle *th, struct inode *inode, loff_t pos, /* Writing position offset */
831 size_t num_pages, /* Number of pages to write */
832 size_t write_bytes, /* number of bytes to write */
833 struct page **prepared_pages /* list of pages */
834 )
835{
836 int status; // return status of block_commit_write.
837 int retval = 0; // Return value we are going to return.
838 int i; // loop counter
839 int offset; // Writing offset in page.
840 int orig_write_bytes = write_bytes;
841 int sd_update = 0;
842
843 for (i = 0, offset = (pos & (PAGE_CACHE_SIZE - 1)); i < num_pages;
844 i++, offset = 0) {
845 int count = min_t(int, PAGE_CACHE_SIZE - offset, write_bytes); // How much of bytes to write to this page
846 struct page *page = prepared_pages[i]; // Current page we process.
847
848 status =
849 reiserfs_commit_page(inode, page, offset, offset + count);
850 if (status)
851 retval = status; // To not overcomplicate matters We are going to
852 // submit all the pages even if there was error.
853 // we only remember error status to report it on
854 // exit.
855 write_bytes -= count;
856 }
857 /* now that we've gotten all the ordered buffers marked dirty,
858 * we can safely update i_size and close any running transaction
859 */
860 if (pos + orig_write_bytes > inode->i_size) {
861 inode->i_size = pos + orig_write_bytes; // Set new size
862 /* If the file have grown so much that tail packing is no
863 * longer possible, reset "need to pack" flag */
864 if ((have_large_tails(inode->i_sb) &&
865 inode->i_size > i_block_size(inode) * 4) ||
866 (have_small_tails(inode->i_sb) &&
867 inode->i_size > i_block_size(inode)))
868 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
869 else if ((have_large_tails(inode->i_sb) &&
870 inode->i_size < i_block_size(inode) * 4) ||
871 (have_small_tails(inode->i_sb) &&
872 inode->i_size < i_block_size(inode)))
873 REISERFS_I(inode)->i_flags |= i_pack_on_close_mask;
874
875 if (th->t_trans_id) {
876 reiserfs_write_lock(inode->i_sb);
877 // this sets the proper flags for O_SYNC to trigger a commit
878 mark_inode_dirty(inode);
879 reiserfs_write_unlock(inode->i_sb);
880 } else {
881 reiserfs_write_lock(inode->i_sb);
882 reiserfs_update_inode_transaction(inode);
883 mark_inode_dirty(inode);
884 reiserfs_write_unlock(inode->i_sb);
885 }
886
887 sd_update = 1;
888 }
889 if (th->t_trans_id) {
890 reiserfs_write_lock(inode->i_sb);
891 if (!sd_update)
892 mark_inode_dirty(inode);
893 status = journal_end(th, th->t_super, th->t_blocks_allocated);
894 if (status)
895 retval = status;
896 reiserfs_write_unlock(inode->i_sb);
897 }
898 th->t_trans_id = 0;
899
900 /*
901 * we have to unlock the pages after updating i_size, otherwise
902 * we race with writepage
903 */
904 for (i = 0; i < num_pages; i++) {
905 struct page *page = prepared_pages[i];
906 unlock_page(page);
907 mark_page_accessed(page);
908 page_cache_release(page);
909 }
910 return retval;
911}
912
913/* Look if passed writing region is going to touch file's tail
914 (if it is present). And if it is, convert the tail to unformatted node */
915static int reiserfs_check_for_tail_and_convert(struct inode *inode, /* inode to deal with */
916 loff_t pos, /* Writing position */
917 int write_bytes /* amount of bytes to write */
918 )
919{
920 INITIALIZE_PATH(path); // needed for search_for_position
921 struct cpu_key key; // Key that would represent last touched writing byte.
922 struct item_head *ih; // item header of found block;
923 int res; // Return value of various functions we call.
924 int cont_expand_offset; // We will put offset for generic_cont_expand here
925 // This can be int just because tails are created
926 // only for small files.
927
928/* this embodies a dependency on a particular tail policy */
929 if (inode->i_size >= inode->i_sb->s_blocksize * 4) {
930 /* such a big files do not have tails, so we won't bother ourselves
931 to look for tails, simply return */
932 return 0;
933 }
934
935 reiserfs_write_lock(inode->i_sb);
936 /* find the item containing the last byte to be written, or if
937 * writing past the end of the file then the last item of the
938 * file (and then we check its type). */
939 make_cpu_key(&key, inode, pos + write_bytes + 1, TYPE_ANY,
940 3 /*key length */ );
941 res = search_for_position_by_key(inode->i_sb, &key, &path);
942 if (res == IO_ERROR) {
943 reiserfs_write_unlock(inode->i_sb);
944 return -EIO;
945 }
946 ih = get_ih(&path);
947 res = 0;
948 if (is_direct_le_ih(ih)) {
949 /* Ok, closest item is file tail (tails are stored in "direct"
950 * items), so we need to unpack it. */
951 /* To not overcomplicate matters, we just call generic_cont_expand
952 which will in turn call other stuff and finally will boil down to
953 reiserfs_get_block() that would do necessary conversion. */
954 cont_expand_offset =
955 le_key_k_offset(get_inode_item_key_version(inode),
956 &(ih->ih_key));
957 pathrelse(&path);
958 res = generic_cont_expand(inode, cont_expand_offset);
959 } else
960 pathrelse(&path);
961
962 reiserfs_write_unlock(inode->i_sb);
963 return res;
964}
965
966/* This function locks pages starting from @pos for @inode.
967 @num_pages pages are locked and stored in
968 @prepared_pages array. Also buffers are allocated for these pages.
969 First and last page of the region is read if it is overwritten only
970 partially. If last page did not exist before write (file hole or file
971 append), it is zeroed, then.
972 Returns number of unallocated blocks that should be allocated to cover
973 new file data.*/
974static int reiserfs_prepare_file_region_for_write(struct inode *inode
975 /* Inode of the file */ ,
976 loff_t pos, /* position in the file */
977 size_t num_pages, /* number of pages to
978 prepare */
979 size_t write_bytes, /* Amount of bytes to be
980 overwritten from
981 @pos */
982 struct page **prepared_pages /* pointer to array
983 where to store
984 prepared pages */
985 )
986{
987 int res = 0; // Return values of different functions we call.
988 unsigned long index = pos >> PAGE_CACHE_SHIFT; // Offset in file in pages.
989 int from = (pos & (PAGE_CACHE_SIZE - 1)); // Writing offset in first page
990 int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1;
991 /* offset of last modified byte in last
992 page */
993 struct address_space *mapping = inode->i_mapping; // Pages are mapped here.
994 int i; // Simple counter
995 int blocks = 0; /* Return value (blocks that should be allocated) */
996 struct buffer_head *bh, *head; // Current bufferhead and first bufferhead
997 // of a page.
998 unsigned block_start, block_end; // Starting and ending offsets of current
999 // buffer in the page.
1000 struct buffer_head *wait[2], **wait_bh = wait; // Buffers for page, if
1001 // Page appeared to be not up
1002 // to date. Note how we have
1003 // at most 2 buffers, this is
1004 // because we at most may
1005 // partially overwrite two
1006 // buffers for one page. One at // the beginning of write area
1007 // and one at the end.
1008 // Everything inthe middle gets // overwritten totally.
1009
1010 struct cpu_key key; // cpu key of item that we are going to deal with
1011 struct item_head *ih = NULL; // pointer to item head that we are going to deal with
1012 struct buffer_head *itembuf = NULL; // Buffer head that contains items that we are going to deal with
1013 INITIALIZE_PATH(path); // path to item, that we are going to deal with.
1014 __le32 *item = NULL; // pointer to item we are going to deal with
1015 int item_pos = -1; /* Position in indirect item */
1016
1017 if (num_pages < 1) {
1018 reiserfs_warning(inode->i_sb,
1019 "green-9001: reiserfs_prepare_file_region_for_write "
1020 "called with zero number of pages to process");
1021 return -EFAULT;
1022 }
1023
1024 /* We have 2 loops for pages. In first loop we grab and lock the pages, so
1025 that nobody would touch these until we release the pages. Then
1026 we'd start to deal with mapping buffers to blocks. */
1027 for (i = 0; i < num_pages; i++) {
1028 prepared_pages[i] = grab_cache_page(mapping, index + i); // locks the page
1029 if (!prepared_pages[i]) {
1030 res = -ENOMEM;
1031 goto failed_page_grabbing;
1032 }
1033 if (!page_has_buffers(prepared_pages[i]))
1034 create_empty_buffers(prepared_pages[i],
1035 inode->i_sb->s_blocksize, 0);
1036 }
1037
1038 /* Let's count amount of blocks for a case where all the blocks
1039 overwritten are new (we will substract already allocated blocks later) */
1040 if (num_pages > 2)
1041 /* These are full-overwritten pages so we count all the blocks in
1042 these pages are counted as needed to be allocated */
1043 blocks =
1044 (num_pages - 2) << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1045
1046 /* count blocks needed for first page (possibly partially written) */
1047 blocks += ((PAGE_CACHE_SIZE - from) >> inode->i_blkbits) + !!(from & (inode->i_sb->s_blocksize - 1)); /* roundup */
1048
1049 /* Now we account for last page. If last page == first page (we
1050 overwrite only one page), we substract all the blocks past the
1051 last writing position in a page out of already calculated number
1052 of blocks */
1053 blocks += ((num_pages > 1) << (PAGE_CACHE_SHIFT - inode->i_blkbits)) -
1054 ((PAGE_CACHE_SIZE - to) >> inode->i_blkbits);
1055 /* Note how we do not roundup here since partial blocks still
1056 should be allocated */
1057
1058 /* Now if all the write area lies past the file end, no point in
1059 maping blocks, since there is none, so we just zero out remaining
1060 parts of first and last pages in write area (if needed) */
1061 if ((pos & ~((loff_t) PAGE_CACHE_SIZE - 1)) > inode->i_size) {
1062 if (from != 0) /* First page needs to be partially zeroed */
1063 zero_user_page(prepared_pages[0], 0, from, KM_USER0);
1064
1065 if (to != PAGE_CACHE_SIZE) /* Last page needs to be partially zeroed */
1066 zero_user_page(prepared_pages[num_pages-1], to,
1067 PAGE_CACHE_SIZE - to, KM_USER0);
1068
1069 /* Since all blocks are new - use already calculated value */
1070 return blocks;
1071 }
1072
1073 /* Well, since we write somewhere into the middle of a file, there is
1074 possibility we are writing over some already allocated blocks, so
1075 let's map these blocks and substract number of such blocks out of blocks
1076 we need to allocate (calculated above) */
1077 /* Mask write position to start on blocksize, we do it out of the
1078 loop for performance reasons */
1079 pos &= ~((loff_t) inode->i_sb->s_blocksize - 1);
1080 /* Set cpu key to the starting position in a file (on left block boundary) */
1081 make_cpu_key(&key, inode,
1082 1 + ((pos) & ~((loff_t) inode->i_sb->s_blocksize - 1)),
1083 TYPE_ANY, 3 /*key length */ );
1084
1085 reiserfs_write_lock(inode->i_sb); // We need that for at least search_by_key()
1086 for (i = 0; i < num_pages; i++) {
1087
1088 head = page_buffers(prepared_pages[i]);
1089 /* For each buffer in the page */
1090 for (bh = head, block_start = 0; bh != head || !block_start;
1091 block_start = block_end, bh = bh->b_this_page) {
1092 if (!bh)
1093 reiserfs_panic(inode->i_sb,
1094 "green-9002: Allocated but absent buffer for a page?");
1095 /* Find where this buffer ends */
1096 block_end = block_start + inode->i_sb->s_blocksize;
1097 if (i == 0 && block_end <= from)
1098 /* if this buffer is before requested data to map, skip it */
1099 continue;
1100
1101 if (i == num_pages - 1 && block_start >= to) {
1102 /* If this buffer is after requested data to map, abort
1103 processing of current page */
1104 break;
1105 }
1106
1107 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1108 /* This is optimisation for a case where buffer is mapped
1109 and have blocknumber assigned. In case significant amount
1110 of such buffers are present, we may avoid some amount
1111 of search_by_key calls.
1112 Probably it would be possible to move parts of this code
1113 out of BKL, but I afraid that would overcomplicate code
1114 without any noticeable benefit.
1115 */
1116 item_pos++;
1117 /* Update the key */
1118 set_cpu_key_k_offset(&key,
1119 cpu_key_k_offset(&key) +
1120 inode->i_sb->s_blocksize);
1121 blocks--; // Decrease the amount of blocks that need to be
1122 // allocated
1123 continue; // Go to the next buffer
1124 }
1125
1126 if (!itembuf || /* if first iteration */
1127 item_pos >= ih_item_len(ih) / UNFM_P_SIZE) { /* or if we progressed past the
1128 current unformatted_item */
1129 /* Try to find next item */
1130 res =
1131 search_for_position_by_key(inode->i_sb,
1132 &key, &path);
1133 /* Abort if no more items */
1134 if (res != POSITION_FOUND) {
1135 /* make sure later loops don't use this item */
1136 itembuf = NULL;
1137 item = NULL;
1138 break;
1139 }
1140
1141 /* Update information about current indirect item */
1142 itembuf = get_last_bh(&path);
1143 ih = get_ih(&path);
1144 item = get_item(&path);
1145 item_pos = path.pos_in_item;
1146
1147 RFALSE(!is_indirect_le_ih(ih),
1148 "green-9003: indirect item expected");
1149 }
1150
1151 /* See if there is some block associated with the file
1152 at that position, map the buffer to this block */
1153 if (get_block_num(item, item_pos)) {
1154 map_bh(bh, inode->i_sb,
1155 get_block_num(item, item_pos));
1156 blocks--; // Decrease the amount of blocks that need to be
1157 // allocated
1158 }
1159 item_pos++;
1160 /* Update the key */
1161 set_cpu_key_k_offset(&key,
1162 cpu_key_k_offset(&key) +
1163 inode->i_sb->s_blocksize);
1164 }
1165 }
1166 pathrelse(&path); // Free the path
1167 reiserfs_write_unlock(inode->i_sb);
1168
1169 /* Now zero out unmappend buffers for the first and last pages of
1170 write area or issue read requests if page is mapped. */
1171 /* First page, see if it is not uptodate */
1172 if (!PageUptodate(prepared_pages[0])) {
1173 head = page_buffers(prepared_pages[0]);
1174
1175 /* For each buffer in page */
1176 for (bh = head, block_start = 0; bh != head || !block_start;
1177 block_start = block_end, bh = bh->b_this_page) {
1178
1179 if (!bh)
1180 reiserfs_panic(inode->i_sb,
1181 "green-9002: Allocated but absent buffer for a page?");
1182 /* Find where this buffer ends */
1183 block_end = block_start + inode->i_sb->s_blocksize;
1184 if (block_end <= from)
1185 /* if this buffer is before requested data to map, skip it */
1186 continue;
1187 if (block_start < from) { /* Aha, our partial buffer */
1188 if (buffer_mapped(bh)) { /* If it is mapped, we need to
1189 issue READ request for it to
1190 not loose data */
1191 ll_rw_block(READ, 1, &bh);
1192 *wait_bh++ = bh;
1193 } else { /* Not mapped, zero it */
1194 zero_user_page(prepared_pages[0],
1195 block_start,
1196 from - block_start, KM_USER0);
1197 set_buffer_uptodate(bh);
1198 }
1199 }
1200 }
1201 }
1202
1203 /* Last page, see if it is not uptodate, or if the last page is past the end of the file. */
1204 if (!PageUptodate(prepared_pages[num_pages - 1]) ||
1205 ((pos + write_bytes) >> PAGE_CACHE_SHIFT) >
1206 (inode->i_size >> PAGE_CACHE_SHIFT)) {
1207 head = page_buffers(prepared_pages[num_pages - 1]);
1208
1209 /* for each buffer in page */
1210 for (bh = head, block_start = 0; bh != head || !block_start;
1211 block_start = block_end, bh = bh->b_this_page) {
1212
1213 if (!bh)
1214 reiserfs_panic(inode->i_sb,
1215 "green-9002: Allocated but absent buffer for a page?");
1216 /* Find where this buffer ends */
1217 block_end = block_start + inode->i_sb->s_blocksize;
1218 if (block_start >= to)
1219 /* if this buffer is after requested data to map, skip it */
1220 break;
1221 if (block_end > to) { /* Aha, our partial buffer */
1222 if (buffer_mapped(bh)) { /* If it is mapped, we need to
1223 issue READ request for it to
1224 not loose data */
1225 ll_rw_block(READ, 1, &bh);
1226 *wait_bh++ = bh;
1227 } else { /* Not mapped, zero it */
1228 zero_user_page(prepared_pages[num_pages-1],
1229 to, block_end - to, KM_USER0);
1230 set_buffer_uptodate(bh);
1231 }
1232 }
1233 }
1234 }
1235
1236 /* Wait for read requests we made to happen, if necessary */
1237 while (wait_bh > wait) {
1238 wait_on_buffer(*--wait_bh);
1239 if (!buffer_uptodate(*wait_bh)) {
1240 res = -EIO;
1241 goto failed_read;
1242 }
1243 }
1244
1245 return blocks;
1246 failed_page_grabbing:
1247 num_pages = i;
1248 failed_read:
1249 reiserfs_unprepare_pages(prepared_pages, num_pages);
1250 return res;
1251}
1252
1253/* Write @count bytes at position @ppos in a file indicated by @file 225/* Write @count bytes at position @ppos in a file indicated by @file
1254 from the buffer @buf. 226 from the buffer @buf.
1255 227
@@ -1284,14 +256,9 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t
1284 * new current position before returning. */ 256 * new current position before returning. */
1285 ) 257 )
1286{ 258{
1287 size_t already_written = 0; // Number of bytes already written to the file.
1288 loff_t pos; // Current position in the file.
1289 ssize_t res; // return value of various functions that we call.
1290 int err = 0;
1291 struct inode *inode = file->f_path.dentry->d_inode; // Inode of the file that we are writing to. 259 struct inode *inode = file->f_path.dentry->d_inode; // Inode of the file that we are writing to.
1292 /* To simplify coding at this time, we store 260 /* To simplify coding at this time, we store
1293 locked pages in array for now */ 261 locked pages in array for now */
1294 struct page *prepared_pages[REISERFS_WRITE_PAGES_AT_A_TIME];
1295 struct reiserfs_transaction_handle th; 262 struct reiserfs_transaction_handle th;
1296 th.t_trans_id = 0; 263 th.t_trans_id = 0;
1297 264
@@ -1311,212 +278,7 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t
1311 count = MAX_NON_LFS - (unsigned long)*ppos; 278 count = MAX_NON_LFS - (unsigned long)*ppos;
1312 } 279 }
1313 280
1314 if (file->f_flags & O_DIRECT) 281 return do_sync_write(file, buf, count, ppos);
1315 return do_sync_write(file, buf, count, ppos);
1316
1317 if (unlikely((ssize_t) count < 0))
1318 return -EINVAL;
1319
1320 if (unlikely(!access_ok(VERIFY_READ, buf, count)))
1321 return -EFAULT;
1322
1323 mutex_lock(&inode->i_mutex); // locks the entire file for just us
1324
1325 pos = *ppos;
1326
1327 /* Check if we can write to specified region of file, file
1328 is not overly big and this kind of stuff. Adjust pos and
1329 count, if needed */
1330 res = generic_write_checks(file, &pos, &count, 0);
1331 if (res)
1332 goto out;
1333
1334 if (count == 0)
1335 goto out;
1336
1337 res = remove_suid(file->f_path.dentry);
1338 if (res)
1339 goto out;
1340
1341 file_update_time(file);
1342
1343 // Ok, we are done with all the checks.
1344
1345 // Now we should start real work
1346
1347 /* If we are going to write past the file's packed tail or if we are going
1348 to overwrite part of the tail, we need that tail to be converted into
1349 unformatted node */
1350 res = reiserfs_check_for_tail_and_convert(inode, pos, count);
1351 if (res)
1352 goto out;
1353
1354 while (count > 0) {
1355 /* This is the main loop in which we running until some error occures
1356 or until we write all of the data. */
1357 size_t num_pages; /* amount of pages we are going to write this iteration */
1358 size_t write_bytes; /* amount of bytes to write during this iteration */
1359 size_t blocks_to_allocate; /* how much blocks we need to allocate for this iteration */
1360
1361 /* (pos & (PAGE_CACHE_SIZE-1)) is an idiom for offset into a page of pos */
1362 num_pages = !!((pos + count) & (PAGE_CACHE_SIZE - 1)) + /* round up partial
1363 pages */
1364 ((count +
1365 (pos & (PAGE_CACHE_SIZE - 1))) >> PAGE_CACHE_SHIFT);
1366 /* convert size to amount of
1367 pages */
1368 reiserfs_write_lock(inode->i_sb);
1369 if (num_pages > REISERFS_WRITE_PAGES_AT_A_TIME
1370 || num_pages > reiserfs_can_fit_pages(inode->i_sb)) {
1371 /* If we were asked to write more data than we want to or if there
1372 is not that much space, then we shorten amount of data to write
1373 for this iteration. */
1374 num_pages =
1375 min_t(size_t, REISERFS_WRITE_PAGES_AT_A_TIME,
1376 reiserfs_can_fit_pages(inode->i_sb));
1377 /* Also we should not forget to set size in bytes accordingly */
1378 write_bytes = (num_pages << PAGE_CACHE_SHIFT) -
1379 (pos & (PAGE_CACHE_SIZE - 1));
1380 /* If position is not on the
1381 start of the page, we need
1382 to substract the offset
1383 within page */
1384 } else
1385 write_bytes = count;
1386
1387 /* reserve the blocks to be allocated later, so that later on
1388 we still have the space to write the blocks to */
1389 reiserfs_claim_blocks_to_be_allocated(inode->i_sb,
1390 num_pages <<
1391 (PAGE_CACHE_SHIFT -
1392 inode->i_blkbits));
1393 reiserfs_write_unlock(inode->i_sb);
1394
1395 if (!num_pages) { /* If we do not have enough space even for a single page... */
1396 if (pos >
1397 inode->i_size + inode->i_sb->s_blocksize -
1398 (pos & (inode->i_sb->s_blocksize - 1))) {
1399 res = -ENOSPC;
1400 break; // In case we are writing past the end of the last file block, break.
1401 }
1402 // Otherwise we are possibly overwriting the file, so
1403 // let's set write size to be equal or less than blocksize.
1404 // This way we get it correctly for file holes.
1405 // But overwriting files on absolutelly full volumes would not
1406 // be very efficient. Well, people are not supposed to fill
1407 // 100% of disk space anyway.
1408 write_bytes =
1409 min_t(size_t, count,
1410 inode->i_sb->s_blocksize -
1411 (pos & (inode->i_sb->s_blocksize - 1)));
1412 num_pages = 1;
1413 // No blocks were claimed before, so do it now.
1414 reiserfs_claim_blocks_to_be_allocated(inode->i_sb,
1415 1 <<
1416 (PAGE_CACHE_SHIFT
1417 -
1418 inode->
1419 i_blkbits));
1420 }
1421
1422 /* Prepare for writing into the region, read in all the
1423 partially overwritten pages, if needed. And lock the pages,
1424 so that nobody else can access these until we are done.
1425 We get number of actual blocks needed as a result. */
1426 res = reiserfs_prepare_file_region_for_write(inode, pos,
1427 num_pages,
1428 write_bytes,
1429 prepared_pages);
1430 if (res < 0) {
1431 reiserfs_release_claimed_blocks(inode->i_sb,
1432 num_pages <<
1433 (PAGE_CACHE_SHIFT -
1434 inode->i_blkbits));
1435 break;
1436 }
1437
1438 blocks_to_allocate = res;
1439
1440 /* First we correct our estimate of how many blocks we need */
1441 reiserfs_release_claimed_blocks(inode->i_sb,
1442 (num_pages <<
1443 (PAGE_CACHE_SHIFT -
1444 inode->i_sb->
1445 s_blocksize_bits)) -
1446 blocks_to_allocate);
1447
1448 if (blocks_to_allocate > 0) { /*We only allocate blocks if we need to */
1449 /* Fill in all the possible holes and append the file if needed */
1450 res =
1451 reiserfs_allocate_blocks_for_region(&th, inode, pos,
1452 num_pages,
1453 write_bytes,
1454 prepared_pages,
1455 blocks_to_allocate);
1456 }
1457
1458 /* well, we have allocated the blocks, so it is time to free
1459 the reservation we made earlier. */
1460 reiserfs_release_claimed_blocks(inode->i_sb,
1461 blocks_to_allocate);
1462 if (res) {
1463 reiserfs_unprepare_pages(prepared_pages, num_pages);
1464 break;
1465 }
1466
1467/* NOTE that allocating blocks and filling blocks can be done in reverse order
1468 and probably we would do that just to get rid of garbage in files after a
1469 crash */
1470
1471 /* Copy data from user-supplied buffer to file's pages */
1472 res =
1473 reiserfs_copy_from_user_to_file_region(pos, num_pages,
1474 write_bytes,
1475 prepared_pages, buf);
1476 if (res) {
1477 reiserfs_unprepare_pages(prepared_pages, num_pages);
1478 break;
1479 }
1480
1481 /* Send the pages to disk and unlock them. */
1482 res =
1483 reiserfs_submit_file_region_for_write(&th, inode, pos,
1484 num_pages,
1485 write_bytes,
1486 prepared_pages);
1487 if (res)
1488 break;
1489
1490 already_written += write_bytes;
1491 buf += write_bytes;
1492 *ppos = pos += write_bytes;
1493 count -= write_bytes;
1494 balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
1495 }
1496
1497 /* this is only true on error */
1498 if (th.t_trans_id) {
1499 reiserfs_write_lock(inode->i_sb);
1500 err = journal_end(&th, th.t_super, th.t_blocks_allocated);
1501 reiserfs_write_unlock(inode->i_sb);
1502 if (err) {
1503 res = err;
1504 goto out;
1505 }
1506 }
1507
1508 if (likely(res >= 0) &&
1509 (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))))
1510 res = generic_osync_inode(inode, file->f_mapping,
1511 OSYNC_METADATA | OSYNC_DATA);
1512
1513 mutex_unlock(&inode->i_mutex);
1514 reiserfs_async_progress_wait(inode->i_sb);
1515 return (already_written != 0) ? already_written : res;
1516
1517 out:
1518 mutex_unlock(&inode->i_mutex); // unlock the file on exit.
1519 return res;
1520} 282}
1521 283
1522const struct file_operations reiserfs_file_operations = { 284const struct file_operations reiserfs_file_operations = {
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index ddde489f1cb2..95051d44a918 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -17,11 +17,12 @@
17#include <linux/mpage.h> 17#include <linux/mpage.h>
18#include <linux/writeback.h> 18#include <linux/writeback.h>
19#include <linux/quotaops.h> 19#include <linux/quotaops.h>
20#include <linux/swap.h>
20 21
21static int reiserfs_commit_write(struct file *f, struct page *page, 22int reiserfs_commit_write(struct file *f, struct page *page,
22 unsigned from, unsigned to); 23 unsigned from, unsigned to);
23static int reiserfs_prepare_write(struct file *f, struct page *page, 24int reiserfs_prepare_write(struct file *f, struct page *page,
24 unsigned from, unsigned to); 25 unsigned from, unsigned to);
25 26
26void reiserfs_delete_inode(struct inode *inode) 27void reiserfs_delete_inode(struct inode *inode)
27{ 28{
@@ -2550,8 +2551,78 @@ static int reiserfs_writepage(struct page *page, struct writeback_control *wbc)
2550 return reiserfs_write_full_page(page, wbc); 2551 return reiserfs_write_full_page(page, wbc);
2551} 2552}
2552 2553
2553static int reiserfs_prepare_write(struct file *f, struct page *page, 2554static int reiserfs_write_begin(struct file *file,
2554 unsigned from, unsigned to) 2555 struct address_space *mapping,
2556 loff_t pos, unsigned len, unsigned flags,
2557 struct page **pagep, void **fsdata)
2558{
2559 struct inode *inode;
2560 struct page *page;
2561 pgoff_t index;
2562 int ret;
2563 int old_ref = 0;
2564
2565 inode = mapping->host;
2566 *fsdata = 0;
2567 if (flags & AOP_FLAG_CONT_EXPAND &&
2568 (pos & (inode->i_sb->s_blocksize - 1)) == 0) {
2569 pos ++;
2570 *fsdata = (void *)(unsigned long)flags;
2571 }
2572
2573 index = pos >> PAGE_CACHE_SHIFT;
2574 page = __grab_cache_page(mapping, index);
2575 if (!page)
2576 return -ENOMEM;
2577 *pagep = page;
2578
2579 reiserfs_wait_on_write_block(inode->i_sb);
2580 fix_tail_page_for_writing(page);
2581 if (reiserfs_transaction_running(inode->i_sb)) {
2582 struct reiserfs_transaction_handle *th;
2583 th = (struct reiserfs_transaction_handle *)current->
2584 journal_info;
2585 BUG_ON(!th->t_refcount);
2586 BUG_ON(!th->t_trans_id);
2587 old_ref = th->t_refcount;
2588 th->t_refcount++;
2589 }
2590 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
2591 reiserfs_get_block);
2592 if (ret && reiserfs_transaction_running(inode->i_sb)) {
2593 struct reiserfs_transaction_handle *th = current->journal_info;
2594 /* this gets a little ugly. If reiserfs_get_block returned an
2595 * error and left a transacstion running, we've got to close it,
2596 * and we've got to free handle if it was a persistent transaction.
2597 *
2598 * But, if we had nested into an existing transaction, we need
2599 * to just drop the ref count on the handle.
2600 *
2601 * If old_ref == 0, the transaction is from reiserfs_get_block,
2602 * and it was a persistent trans. Otherwise, it was nested above.
2603 */
2604 if (th->t_refcount > old_ref) {
2605 if (old_ref)
2606 th->t_refcount--;
2607 else {
2608 int err;
2609 reiserfs_write_lock(inode->i_sb);
2610 err = reiserfs_end_persistent_transaction(th);
2611 reiserfs_write_unlock(inode->i_sb);
2612 if (err)
2613 ret = err;
2614 }
2615 }
2616 }
2617 if (ret) {
2618 unlock_page(page);
2619 page_cache_release(page);
2620 }
2621 return ret;
2622}
2623
2624int reiserfs_prepare_write(struct file *f, struct page *page,
2625 unsigned from, unsigned to)
2555{ 2626{
2556 struct inode *inode = page->mapping->host; 2627 struct inode *inode = page->mapping->host;
2557 int ret; 2628 int ret;
@@ -2604,8 +2675,102 @@ static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block)
2604 return generic_block_bmap(as, block, reiserfs_bmap); 2675 return generic_block_bmap(as, block, reiserfs_bmap);
2605} 2676}
2606 2677
2607static int reiserfs_commit_write(struct file *f, struct page *page, 2678static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2608 unsigned from, unsigned to) 2679 loff_t pos, unsigned len, unsigned copied,
2680 struct page *page, void *fsdata)
2681{
2682 struct inode *inode = page->mapping->host;
2683 int ret = 0;
2684 int update_sd = 0;
2685 struct reiserfs_transaction_handle *th;
2686 unsigned start;
2687
2688 if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND)
2689 pos ++;
2690
2691 reiserfs_wait_on_write_block(inode->i_sb);
2692 if (reiserfs_transaction_running(inode->i_sb))
2693 th = current->journal_info;
2694 else
2695 th = NULL;
2696
2697 start = pos & (PAGE_CACHE_SIZE - 1);
2698 if (unlikely(copied < len)) {
2699 if (!PageUptodate(page))
2700 copied = 0;
2701
2702 page_zero_new_buffers(page, start + copied, start + len);
2703 }
2704 flush_dcache_page(page);
2705
2706 reiserfs_commit_page(inode, page, start, start + copied);
2707
2708 /* generic_commit_write does this for us, but does not update the
2709 ** transaction tracking stuff when the size changes. So, we have
2710 ** to do the i_size updates here.
2711 */
2712 pos += copied;
2713 if (pos > inode->i_size) {
2714 struct reiserfs_transaction_handle myth;
2715 reiserfs_write_lock(inode->i_sb);
2716 /* If the file have grown beyond the border where it
2717 can have a tail, unmark it as needing a tail
2718 packing */
2719 if ((have_large_tails(inode->i_sb)
2720 && inode->i_size > i_block_size(inode) * 4)
2721 || (have_small_tails(inode->i_sb)
2722 && inode->i_size > i_block_size(inode)))
2723 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
2724
2725 ret = journal_begin(&myth, inode->i_sb, 1);
2726 if (ret) {
2727 reiserfs_write_unlock(inode->i_sb);
2728 goto journal_error;
2729 }
2730 reiserfs_update_inode_transaction(inode);
2731 inode->i_size = pos;
2732 /*
2733 * this will just nest into our transaction. It's important
2734 * to use mark_inode_dirty so the inode gets pushed around on the
2735 * dirty lists, and so that O_SYNC works as expected
2736 */
2737 mark_inode_dirty(inode);
2738 reiserfs_update_sd(&myth, inode);
2739 update_sd = 1;
2740 ret = journal_end(&myth, inode->i_sb, 1);
2741 reiserfs_write_unlock(inode->i_sb);
2742 if (ret)
2743 goto journal_error;
2744 }
2745 if (th) {
2746 reiserfs_write_lock(inode->i_sb);
2747 if (!update_sd)
2748 mark_inode_dirty(inode);
2749 ret = reiserfs_end_persistent_transaction(th);
2750 reiserfs_write_unlock(inode->i_sb);
2751 if (ret)
2752 goto out;
2753 }
2754
2755 out:
2756 unlock_page(page);
2757 page_cache_release(page);
2758 return ret == 0 ? copied : ret;
2759
2760 journal_error:
2761 if (th) {
2762 reiserfs_write_lock(inode->i_sb);
2763 if (!update_sd)
2764 reiserfs_update_sd(th, inode);
2765 ret = reiserfs_end_persistent_transaction(th);
2766 reiserfs_write_unlock(inode->i_sb);
2767 }
2768
2769 goto out;
2770}
2771
2772int reiserfs_commit_write(struct file *f, struct page *page,
2773 unsigned from, unsigned to)
2609{ 2774{
2610 struct inode *inode = page->mapping->host; 2775 struct inode *inode = page->mapping->host;
2611 loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to; 2776 loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to;
@@ -2909,7 +3074,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
2909 } 3074 }
2910 /* fill in hole pointers in the expanding truncate case. */ 3075 /* fill in hole pointers in the expanding truncate case. */
2911 if (attr->ia_size > inode->i_size) { 3076 if (attr->ia_size > inode->i_size) {
2912 error = generic_cont_expand(inode, attr->ia_size); 3077 error = generic_cont_expand_simple(inode, attr->ia_size);
2913 if (REISERFS_I(inode)->i_prealloc_count > 0) { 3078 if (REISERFS_I(inode)->i_prealloc_count > 0) {
2914 int err; 3079 int err;
2915 struct reiserfs_transaction_handle th; 3080 struct reiserfs_transaction_handle th;
@@ -2999,8 +3164,8 @@ const struct address_space_operations reiserfs_address_space_operations = {
2999 .releasepage = reiserfs_releasepage, 3164 .releasepage = reiserfs_releasepage,
3000 .invalidatepage = reiserfs_invalidatepage, 3165 .invalidatepage = reiserfs_invalidatepage,
3001 .sync_page = block_sync_page, 3166 .sync_page = block_sync_page,
3002 .prepare_write = reiserfs_prepare_write, 3167 .write_begin = reiserfs_write_begin,
3003 .commit_write = reiserfs_commit_write, 3168 .write_end = reiserfs_write_end,
3004 .bmap = reiserfs_aop_bmap, 3169 .bmap = reiserfs_aop_bmap,
3005 .direct_IO = reiserfs_direct_IO, 3170 .direct_IO = reiserfs_direct_IO,
3006 .set_page_dirty = reiserfs_set_page_dirty, 3171 .set_page_dirty = reiserfs_set_page_dirty,
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 11a0fcc2d402..c438a8f83f26 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -128,6 +128,10 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
128} 128}
129#endif 129#endif
130 130
131int reiserfs_commit_write(struct file *f, struct page *page,
132 unsigned from, unsigned to);
133int reiserfs_prepare_write(struct file *f, struct page *page,
134 unsigned from, unsigned to);
131/* 135/*
132** reiserfs_unpack 136** reiserfs_unpack
133** Function try to convert tail from direct item into indirect. 137** Function try to convert tail from direct item into indirect.
@@ -175,15 +179,13 @@ static int reiserfs_unpack(struct inode *inode, struct file *filp)
175 if (!page) { 179 if (!page) {
176 goto out; 180 goto out;
177 } 181 }
178 retval = 182 retval = reiserfs_prepare_write(NULL, page, write_from, write_from);
179 mapping->a_ops->prepare_write(NULL, page, write_from, write_from);
180 if (retval) 183 if (retval)
181 goto out_unlock; 184 goto out_unlock;
182 185
183 /* conversion can change page contents, must flush */ 186 /* conversion can change page contents, must flush */
184 flush_dcache_page(page); 187 flush_dcache_page(page);
185 retval = 188 retval = reiserfs_commit_write(NULL, page, write_from, write_from);
186 mapping->a_ops->commit_write(NULL, page, write_from, write_from);
187 REISERFS_I(inode)->i_flags |= i_nopack_mask; 189 REISERFS_I(inode)->i_flags |= i_nopack_mask;
188 190
189 out_unlock: 191 out_unlock:
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index bf6e58214538..fab4b9b2664f 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -426,6 +426,12 @@ static inline __u32 xattr_hash(const char *msg, int len)
426 return csum_partial(msg, len, 0); 426 return csum_partial(msg, len, 0);
427} 427}
428 428
429int reiserfs_commit_write(struct file *f, struct page *page,
430 unsigned from, unsigned to);
431int reiserfs_prepare_write(struct file *f, struct page *page,
432 unsigned from, unsigned to);
433
434
429/* Generic extended attribute operations that can be used by xa plugins */ 435/* Generic extended attribute operations that can be used by xa plugins */
430 436
431/* 437/*
@@ -512,15 +518,15 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
512 rxh->h_hash = cpu_to_le32(xahash); 518 rxh->h_hash = cpu_to_le32(xahash);
513 } 519 }
514 520
515 err = mapping->a_ops->prepare_write(fp, page, page_offset, 521 err = reiserfs_prepare_write(fp, page, page_offset,
516 page_offset + chunk + skip); 522 page_offset + chunk + skip);
517 if (!err) { 523 if (!err) {
518 if (buffer) 524 if (buffer)
519 memcpy(data + skip, buffer + buffer_pos, chunk); 525 memcpy(data + skip, buffer + buffer_pos, chunk);
520 err = 526 err =
521 mapping->a_ops->commit_write(fp, page, page_offset, 527 reiserfs_commit_write(fp, page, page_offset,
522 page_offset + chunk + 528 page_offset + chunk +
523 skip); 529 skip);
524 } 530 }
525 unlock_page(page); 531 unlock_page(page);
526 reiserfs_put_page(page); 532 reiserfs_put_page(page);
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index c5d78a7e492b..f5d14cebc75a 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -292,29 +292,45 @@ out:
292 * If the writer ends up delaying the write, the writer needs to 292 * If the writer ends up delaying the write, the writer needs to
293 * increment the page use counts until he is done with the page. 293 * increment the page use counts until he is done with the page.
294 */ 294 */
295static int smb_prepare_write(struct file *file, struct page *page, 295static int smb_write_begin(struct file *file, struct address_space *mapping,
296 unsigned offset, unsigned to) 296 loff_t pos, unsigned len, unsigned flags,
297 struct page **pagep, void **fsdata)
297{ 298{
299 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
300 *pagep = __grab_cache_page(mapping, index);
301 if (!*pagep)
302 return -ENOMEM;
298 return 0; 303 return 0;
299} 304}
300 305
301static int smb_commit_write(struct file *file, struct page *page, 306static int smb_write_end(struct file *file, struct address_space *mapping,
302 unsigned offset, unsigned to) 307 loff_t pos, unsigned len, unsigned copied,
308 struct page *page, void *fsdata)
303{ 309{
304 int status; 310 int status;
311 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
305 312
306 status = -EFAULT;
307 lock_kernel(); 313 lock_kernel();
308 status = smb_updatepage(file, page, offset, to-offset); 314 status = smb_updatepage(file, page, offset, copied);
309 unlock_kernel(); 315 unlock_kernel();
316
317 if (!status) {
318 if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
319 SetPageUptodate(page);
320 status = copied;
321 }
322
323 unlock_page(page);
324 page_cache_release(page);
325
310 return status; 326 return status;
311} 327}
312 328
313const struct address_space_operations smb_file_aops = { 329const struct address_space_operations smb_file_aops = {
314 .readpage = smb_readpage, 330 .readpage = smb_readpage,
315 .writepage = smb_writepage, 331 .writepage = smb_writepage,
316 .prepare_write = smb_prepare_write, 332 .write_begin = smb_write_begin,
317 .commit_write = smb_commit_write 333 .write_end = smb_write_end,
318}; 334};
319 335
320/* 336/*
diff --git a/fs/splice.c b/fs/splice.c
index 02c39ae719b6..59a941d404d9 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -447,7 +447,7 @@ fill_it:
447 */ 447 */
448 while (page_nr < nr_pages) 448 while (page_nr < nr_pages)
449 page_cache_release(pages[page_nr++]); 449 page_cache_release(pages[page_nr++]);
450 in->f_ra.prev_index = index; 450 in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
451 451
452 if (spd.nr_pages) 452 if (spd.nr_pages)
453 return splice_to_pipe(pipe, &spd); 453 return splice_to_pipe(pipe, &spd);
@@ -563,7 +563,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
563 struct address_space *mapping = file->f_mapping; 563 struct address_space *mapping = file->f_mapping;
564 unsigned int offset, this_len; 564 unsigned int offset, this_len;
565 struct page *page; 565 struct page *page;
566 pgoff_t index; 566 void *fsdata;
567 int ret; 567 int ret;
568 568
569 /* 569 /*
@@ -573,49 +573,16 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
573 if (unlikely(ret)) 573 if (unlikely(ret))
574 return ret; 574 return ret;
575 575
576 index = sd->pos >> PAGE_CACHE_SHIFT;
577 offset = sd->pos & ~PAGE_CACHE_MASK; 576 offset = sd->pos & ~PAGE_CACHE_MASK;
578 577
579 this_len = sd->len; 578 this_len = sd->len;
580 if (this_len + offset > PAGE_CACHE_SIZE) 579 if (this_len + offset > PAGE_CACHE_SIZE)
581 this_len = PAGE_CACHE_SIZE - offset; 580 this_len = PAGE_CACHE_SIZE - offset;
582 581
583find_page: 582 ret = pagecache_write_begin(file, mapping, sd->pos, this_len,
584 page = find_lock_page(mapping, index); 583 AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
585 if (!page) { 584 if (unlikely(ret))
586 ret = -ENOMEM; 585 goto out;
587 page = page_cache_alloc_cold(mapping);
588 if (unlikely(!page))
589 goto out_ret;
590
591 /*
592 * This will also lock the page
593 */
594 ret = add_to_page_cache_lru(page, mapping, index,
595 GFP_KERNEL);
596 if (unlikely(ret))
597 goto out_release;
598 }
599
600 ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len);
601 if (unlikely(ret)) {
602 loff_t isize = i_size_read(mapping->host);
603
604 if (ret != AOP_TRUNCATED_PAGE)
605 unlock_page(page);
606 page_cache_release(page);
607 if (ret == AOP_TRUNCATED_PAGE)
608 goto find_page;
609
610 /*
611 * prepare_write() may have instantiated a few blocks
612 * outside i_size. Trim these off again.
613 */
614 if (sd->pos + this_len > isize)
615 vmtruncate(mapping->host, isize);
616
617 goto out_ret;
618 }
619 586
620 if (buf->page != page) { 587 if (buf->page != page) {
621 /* 588 /*
@@ -629,31 +596,9 @@ find_page:
629 kunmap_atomic(dst, KM_USER1); 596 kunmap_atomic(dst, KM_USER1);
630 buf->ops->unmap(pipe, buf, src); 597 buf->ops->unmap(pipe, buf, src);
631 } 598 }
632 599 ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
633 ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); 600 page, fsdata);
634 if (ret) {
635 if (ret == AOP_TRUNCATED_PAGE) {
636 page_cache_release(page);
637 goto find_page;
638 }
639 if (ret < 0)
640 goto out;
641 /*
642 * Partial write has happened, so 'ret' already initialized by
643 * number of bytes written, Where is nothing we have to do here.
644 */
645 } else
646 ret = this_len;
647 /*
648 * Return the number of bytes written and mark page as
649 * accessed, we are now done!
650 */
651 mark_page_accessed(page);
652out: 601out:
653 unlock_page(page);
654out_release:
655 page_cache_release(page);
656out_ret:
657 return ret; 602 return ret;
658} 603}
659 604
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 9236635111f4..c4ef945d39c8 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -24,8 +24,8 @@ extern struct super_block * sysfs_sb;
24 24
25static const struct address_space_operations sysfs_aops = { 25static const struct address_space_operations sysfs_aops = {
26 .readpage = simple_readpage, 26 .readpage = simple_readpage,
27 .prepare_write = simple_prepare_write, 27 .write_begin = simple_write_begin,
28 .commit_write = simple_commit_write 28 .write_end = simple_write_end,
29}; 29};
30 30
31static struct backing_dev_info sysfs_backing_dev_info = { 31static struct backing_dev_info sysfs_backing_dev_info = {
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index e566b387fcf9..56f655254bfe 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -16,6 +16,7 @@
16#include <linux/pagemap.h> 16#include <linux/pagemap.h>
17#include <linux/highmem.h> 17#include <linux/highmem.h>
18#include <linux/smp_lock.h> 18#include <linux/smp_lock.h>
19#include <linux/swap.h>
19#include "sysv.h" 20#include "sysv.h"
20 21
21static int sysv_readdir(struct file *, void *, filldir_t); 22static int sysv_readdir(struct file *, void *, filldir_t);
@@ -37,12 +38,17 @@ static inline unsigned long dir_pages(struct inode *inode)
37 return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; 38 return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
38} 39}
39 40
40static int dir_commit_chunk(struct page *page, unsigned from, unsigned to) 41static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len)
41{ 42{
42 struct inode *dir = (struct inode *)page->mapping->host; 43 struct address_space *mapping = page->mapping;
44 struct inode *dir = mapping->host;
43 int err = 0; 45 int err = 0;
44 46
45 page->mapping->a_ops->commit_write(NULL, page, from, to); 47 block_write_end(NULL, mapping, pos, len, len, page, NULL);
48 if (pos+len > dir->i_size) {
49 i_size_write(dir, pos+len);
50 mark_inode_dirty(dir);
51 }
46 if (IS_DIRSYNC(dir)) 52 if (IS_DIRSYNC(dir))
47 err = write_one_page(page, 1); 53 err = write_one_page(page, 1);
48 else 54 else
@@ -186,7 +192,7 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
186 unsigned long npages = dir_pages(dir); 192 unsigned long npages = dir_pages(dir);
187 unsigned long n; 193 unsigned long n;
188 char *kaddr; 194 char *kaddr;
189 unsigned from, to; 195 loff_t pos;
190 int err; 196 int err;
191 197
192 /* We take care of directory expansion in the same loop */ 198 /* We take care of directory expansion in the same loop */
@@ -212,16 +218,17 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
212 return -EINVAL; 218 return -EINVAL;
213 219
214got_it: 220got_it:
215 from = (char*)de - (char*)page_address(page); 221 pos = page_offset(page) +
216 to = from + SYSV_DIRSIZE; 222 (char*)de - (char*)page_address(page);
217 lock_page(page); 223 lock_page(page);
218 err = page->mapping->a_ops->prepare_write(NULL, page, from, to); 224 err = __sysv_write_begin(NULL, page->mapping, pos, SYSV_DIRSIZE,
225 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
219 if (err) 226 if (err)
220 goto out_unlock; 227 goto out_unlock;
221 memcpy (de->name, name, namelen); 228 memcpy (de->name, name, namelen);
222 memset (de->name + namelen, 0, SYSV_DIRSIZE - namelen - 2); 229 memset (de->name + namelen, 0, SYSV_DIRSIZE - namelen - 2);
223 de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino); 230 de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
224 err = dir_commit_chunk(page, from, to); 231 err = dir_commit_chunk(page, pos, SYSV_DIRSIZE);
225 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; 232 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
226 mark_inode_dirty(dir); 233 mark_inode_dirty(dir);
227out_page: 234out_page:
@@ -238,15 +245,15 @@ int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page)
238 struct address_space *mapping = page->mapping; 245 struct address_space *mapping = page->mapping;
239 struct inode *inode = (struct inode*)mapping->host; 246 struct inode *inode = (struct inode*)mapping->host;
240 char *kaddr = (char*)page_address(page); 247 char *kaddr = (char*)page_address(page);
241 unsigned from = (char*)de - kaddr; 248 loff_t pos = page_offset(page) + (char *)de - kaddr;
242 unsigned to = from + SYSV_DIRSIZE;
243 int err; 249 int err;
244 250
245 lock_page(page); 251 lock_page(page);
246 err = mapping->a_ops->prepare_write(NULL, page, from, to); 252 err = __sysv_write_begin(NULL, mapping, pos, SYSV_DIRSIZE,
253 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
247 BUG_ON(err); 254 BUG_ON(err);
248 de->inode = 0; 255 de->inode = 0;
249 err = dir_commit_chunk(page, from, to); 256 err = dir_commit_chunk(page, pos, SYSV_DIRSIZE);
250 dir_put_page(page); 257 dir_put_page(page);
251 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; 258 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
252 mark_inode_dirty(inode); 259 mark_inode_dirty(inode);
@@ -263,12 +270,13 @@ int sysv_make_empty(struct inode *inode, struct inode *dir)
263 270
264 if (!page) 271 if (!page)
265 return -ENOMEM; 272 return -ENOMEM;
266 kmap(page); 273 err = __sysv_write_begin(NULL, mapping, 0, 2 * SYSV_DIRSIZE,
267 err = mapping->a_ops->prepare_write(NULL, page, 0, 2 * SYSV_DIRSIZE); 274 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
268 if (err) { 275 if (err) {
269 unlock_page(page); 276 unlock_page(page);
270 goto fail; 277 goto fail;
271 } 278 }
279 kmap(page);
272 280
273 base = (char*)page_address(page); 281 base = (char*)page_address(page);
274 memset(base, 0, PAGE_CACHE_SIZE); 282 memset(base, 0, PAGE_CACHE_SIZE);
@@ -280,9 +288,9 @@ int sysv_make_empty(struct inode *inode, struct inode *dir)
280 de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), dir->i_ino); 288 de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), dir->i_ino);
281 strcpy(de->name,".."); 289 strcpy(de->name,"..");
282 290
291 kunmap(page);
283 err = dir_commit_chunk(page, 0, 2 * SYSV_DIRSIZE); 292 err = dir_commit_chunk(page, 0, 2 * SYSV_DIRSIZE);
284fail: 293fail:
285 kunmap(page);
286 page_cache_release(page); 294 page_cache_release(page);
287 return err; 295 return err;
288} 296}
@@ -336,16 +344,18 @@ not_empty:
336void sysv_set_link(struct sysv_dir_entry *de, struct page *page, 344void sysv_set_link(struct sysv_dir_entry *de, struct page *page,
337 struct inode *inode) 345 struct inode *inode)
338{ 346{
339 struct inode *dir = (struct inode*)page->mapping->host; 347 struct address_space *mapping = page->mapping;
340 unsigned from = (char *)de-(char*)page_address(page); 348 struct inode *dir = mapping->host;
341 unsigned to = from + SYSV_DIRSIZE; 349 loff_t pos = page_offset(page) +
350 (char *)de-(char*)page_address(page);
342 int err; 351 int err;
343 352
344 lock_page(page); 353 lock_page(page);
345 err = page->mapping->a_ops->prepare_write(NULL, page, from, to); 354 err = __sysv_write_begin(NULL, mapping, pos, SYSV_DIRSIZE,
355 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
346 BUG_ON(err); 356 BUG_ON(err);
347 de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino); 357 de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
348 err = dir_commit_chunk(page, from, to); 358 err = dir_commit_chunk(page, pos, SYSV_DIRSIZE);
349 dir_put_page(page); 359 dir_put_page(page);
350 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; 360 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
351 mark_inode_dirty(dir); 361 mark_inode_dirty(dir);
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index f2bcccd1d6fc..f042eec464c2 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -453,23 +453,38 @@ static int sysv_writepage(struct page *page, struct writeback_control *wbc)
453{ 453{
454 return block_write_full_page(page,get_block,wbc); 454 return block_write_full_page(page,get_block,wbc);
455} 455}
456
456static int sysv_readpage(struct file *file, struct page *page) 457static int sysv_readpage(struct file *file, struct page *page)
457{ 458{
458 return block_read_full_page(page,get_block); 459 return block_read_full_page(page,get_block);
459} 460}
460static int sysv_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) 461
462int __sysv_write_begin(struct file *file, struct address_space *mapping,
463 loff_t pos, unsigned len, unsigned flags,
464 struct page **pagep, void **fsdata)
461{ 465{
462 return block_prepare_write(page,from,to,get_block); 466 return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
467 get_block);
463} 468}
469
470static int sysv_write_begin(struct file *file, struct address_space *mapping,
471 loff_t pos, unsigned len, unsigned flags,
472 struct page **pagep, void **fsdata)
473{
474 *pagep = NULL;
475 return __sysv_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
476}
477
464static sector_t sysv_bmap(struct address_space *mapping, sector_t block) 478static sector_t sysv_bmap(struct address_space *mapping, sector_t block)
465{ 479{
466 return generic_block_bmap(mapping,block,get_block); 480 return generic_block_bmap(mapping,block,get_block);
467} 481}
482
468const struct address_space_operations sysv_aops = { 483const struct address_space_operations sysv_aops = {
469 .readpage = sysv_readpage, 484 .readpage = sysv_readpage,
470 .writepage = sysv_writepage, 485 .writepage = sysv_writepage,
471 .sync_page = block_sync_page, 486 .sync_page = block_sync_page,
472 .prepare_write = sysv_prepare_write, 487 .write_begin = sysv_write_begin,
473 .commit_write = generic_commit_write, 488 .write_end = generic_write_end,
474 .bmap = sysv_bmap 489 .bmap = sysv_bmap
475}; 490};
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 5b4fedf17cc4..64c03bdf06a5 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -136,6 +136,9 @@ extern unsigned long sysv_count_free_blocks(struct super_block *);
136 136
137/* itree.c */ 137/* itree.c */
138extern void sysv_truncate(struct inode *); 138extern void sysv_truncate(struct inode *);
139extern int __sysv_write_begin(struct file *file, struct address_space *mapping,
140 loff_t pos, unsigned len, unsigned flags,
141 struct page **pagep, void **fsdata);
139 142
140/* inode.c */ 143/* inode.c */
141extern int sysv_write_inode(struct inode *, int); 144extern int sysv_write_inode(struct inode *, int);
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 5d7a4ea27753..7c7a1b39d56c 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -76,36 +76,29 @@ static int udf_adinicb_writepage(struct page *page, struct writeback_control *wb
76 return 0; 76 return 0;
77} 77}
78 78
79static int udf_adinicb_prepare_write(struct file *file, struct page *page, 79static int udf_adinicb_write_end(struct file *file,
80 unsigned offset, unsigned to) 80 struct address_space *mapping,
81 loff_t pos, unsigned len, unsigned copied,
82 struct page *page, void *fsdata)
81{ 83{
82 kmap(page); 84 struct inode *inode = mapping->host;
83 return 0; 85 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
84} 86 char *kaddr;
85
86static int udf_adinicb_commit_write(struct file *file, struct page *page,
87 unsigned offset, unsigned to)
88{
89 struct inode *inode = page->mapping->host;
90 char *kaddr = page_address(page);
91 87
88 kaddr = kmap_atomic(page, KM_USER0);
92 memcpy(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode) + offset, 89 memcpy(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode) + offset,
93 kaddr + offset, to - offset); 90 kaddr + offset, copied);
94 mark_inode_dirty(inode); 91 kunmap_atomic(kaddr, KM_USER0);
95 SetPageUptodate(page); 92
96 kunmap(page); 93 return simple_write_end(file, mapping, pos, len, copied, page, fsdata);
97 /* only one page here */
98 if (to > inode->i_size)
99 inode->i_size = to;
100 return 0;
101} 94}
102 95
103const struct address_space_operations udf_adinicb_aops = { 96const struct address_space_operations udf_adinicb_aops = {
104 .readpage = udf_adinicb_readpage, 97 .readpage = udf_adinicb_readpage,
105 .writepage = udf_adinicb_writepage, 98 .writepage = udf_adinicb_writepage,
106 .sync_page = block_sync_page, 99 .sync_page = block_sync_page,
107 .prepare_write = udf_adinicb_prepare_write, 100 .write_begin = simple_write_begin,
108 .commit_write = udf_adinicb_commit_write, 101 .write_end = udf_adinicb_write_end,
109}; 102};
110 103
111static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 104static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 1652b2c665bb..6ff8151984cf 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -133,10 +133,13 @@ static int udf_readpage(struct file *file, struct page *page)
133 return block_read_full_page(page, udf_get_block); 133 return block_read_full_page(page, udf_get_block);
134} 134}
135 135
136static int udf_prepare_write(struct file *file, struct page *page, 136static int udf_write_begin(struct file *file, struct address_space *mapping,
137 unsigned from, unsigned to) 137 loff_t pos, unsigned len, unsigned flags,
138 struct page **pagep, void **fsdata)
138{ 139{
139 return block_prepare_write(page, from, to, udf_get_block); 140 *pagep = NULL;
141 return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
142 udf_get_block);
140} 143}
141 144
142static sector_t udf_bmap(struct address_space *mapping, sector_t block) 145static sector_t udf_bmap(struct address_space *mapping, sector_t block)
@@ -148,8 +151,8 @@ const struct address_space_operations udf_aops = {
148 .readpage = udf_readpage, 151 .readpage = udf_readpage,
149 .writepage = udf_writepage, 152 .writepage = udf_writepage,
150 .sync_page = block_sync_page, 153 .sync_page = block_sync_page,
151 .prepare_write = udf_prepare_write, 154 .write_begin = udf_write_begin,
152 .commit_write = generic_commit_write, 155 .write_end = generic_write_end,
153 .bmap = udf_bmap, 156 .bmap = udf_bmap,
154}; 157};
155 158
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 154452172f43..2410ec6002db 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -19,6 +19,7 @@
19#include <linux/time.h> 19#include <linux/time.h>
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/ufs_fs.h> 21#include <linux/ufs_fs.h>
22#include <linux/swap.h>
22 23
23#include "swab.h" 24#include "swab.h"
24#include "util.h" 25#include "util.h"
@@ -38,12 +39,18 @@ static inline int ufs_match(struct super_block *sb, int len,
38 return !memcmp(name, de->d_name, len); 39 return !memcmp(name, de->d_name, len);
39} 40}
40 41
41static int ufs_commit_chunk(struct page *page, unsigned from, unsigned to) 42static int ufs_commit_chunk(struct page *page, loff_t pos, unsigned len)
42{ 43{
43 struct inode *dir = page->mapping->host; 44 struct address_space *mapping = page->mapping;
45 struct inode *dir = mapping->host;
44 int err = 0; 46 int err = 0;
47
45 dir->i_version++; 48 dir->i_version++;
46 page->mapping->a_ops->commit_write(NULL, page, from, to); 49 block_write_end(NULL, mapping, pos, len, len, page, NULL);
50 if (pos+len > dir->i_size) {
51 i_size_write(dir, pos+len);
52 mark_inode_dirty(dir);
53 }
47 if (IS_DIRSYNC(dir)) 54 if (IS_DIRSYNC(dir))
48 err = write_one_page(page, 1); 55 err = write_one_page(page, 1);
49 else 56 else
@@ -81,16 +88,20 @@ ino_t ufs_inode_by_name(struct inode *dir, struct dentry *dentry)
81void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, 88void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de,
82 struct page *page, struct inode *inode) 89 struct page *page, struct inode *inode)
83{ 90{
84 unsigned from = (char *) de - (char *) page_address(page); 91 loff_t pos = page_offset(page) +
85 unsigned to = from + fs16_to_cpu(dir->i_sb, de->d_reclen); 92 (char *) de - (char *) page_address(page);
93 unsigned len = fs16_to_cpu(dir->i_sb, de->d_reclen);
86 int err; 94 int err;
87 95
88 lock_page(page); 96 lock_page(page);
89 err = page->mapping->a_ops->prepare_write(NULL, page, from, to); 97 err = __ufs_write_begin(NULL, page->mapping, pos, len,
98 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
90 BUG_ON(err); 99 BUG_ON(err);
100
91 de->d_ino = cpu_to_fs32(dir->i_sb, inode->i_ino); 101 de->d_ino = cpu_to_fs32(dir->i_sb, inode->i_ino);
92 ufs_set_de_type(dir->i_sb, de, inode->i_mode); 102 ufs_set_de_type(dir->i_sb, de, inode->i_mode);
93 err = ufs_commit_chunk(page, from, to); 103
104 err = ufs_commit_chunk(page, pos, len);
94 ufs_put_page(page); 105 ufs_put_page(page);
95 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; 106 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
96 mark_inode_dirty(dir); 107 mark_inode_dirty(dir);
@@ -312,7 +323,7 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
312 unsigned long npages = ufs_dir_pages(dir); 323 unsigned long npages = ufs_dir_pages(dir);
313 unsigned long n; 324 unsigned long n;
314 char *kaddr; 325 char *kaddr;
315 unsigned from, to; 326 loff_t pos;
316 int err; 327 int err;
317 328
318 UFSD("ENTER, name %s, namelen %u\n", name, namelen); 329 UFSD("ENTER, name %s, namelen %u\n", name, namelen);
@@ -367,9 +378,10 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
367 return -EINVAL; 378 return -EINVAL;
368 379
369got_it: 380got_it:
370 from = (char*)de - (char*)page_address(page); 381 pos = page_offset(page) +
371 to = from + rec_len; 382 (char*)de - (char*)page_address(page);
372 err = page->mapping->a_ops->prepare_write(NULL, page, from, to); 383 err = __ufs_write_begin(NULL, page->mapping, pos, rec_len,
384 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
373 if (err) 385 if (err)
374 goto out_unlock; 386 goto out_unlock;
375 if (de->d_ino) { 387 if (de->d_ino) {
@@ -386,7 +398,7 @@ got_it:
386 de->d_ino = cpu_to_fs32(sb, inode->i_ino); 398 de->d_ino = cpu_to_fs32(sb, inode->i_ino);
387 ufs_set_de_type(sb, de, inode->i_mode); 399 ufs_set_de_type(sb, de, inode->i_mode);
388 400
389 err = ufs_commit_chunk(page, from, to); 401 err = ufs_commit_chunk(page, pos, rec_len);
390 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; 402 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
391 403
392 mark_inode_dirty(dir); 404 mark_inode_dirty(dir);
@@ -509,6 +521,7 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
509 char *kaddr = page_address(page); 521 char *kaddr = page_address(page);
510 unsigned from = ((char*)dir - kaddr) & ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1); 522 unsigned from = ((char*)dir - kaddr) & ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1);
511 unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen); 523 unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen);
524 loff_t pos;
512 struct ufs_dir_entry *pde = NULL; 525 struct ufs_dir_entry *pde = NULL;
513 struct ufs_dir_entry *de = (struct ufs_dir_entry *) (kaddr + from); 526 struct ufs_dir_entry *de = (struct ufs_dir_entry *) (kaddr + from);
514 int err; 527 int err;
@@ -532,13 +545,16 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
532 } 545 }
533 if (pde) 546 if (pde)
534 from = (char*)pde - (char*)page_address(page); 547 from = (char*)pde - (char*)page_address(page);
548
549 pos = page_offset(page) + from;
535 lock_page(page); 550 lock_page(page);
536 err = mapping->a_ops->prepare_write(NULL, page, from, to); 551 err = __ufs_write_begin(NULL, mapping, pos, to - from,
552 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
537 BUG_ON(err); 553 BUG_ON(err);
538 if (pde) 554 if (pde)
539 pde->d_reclen = cpu_to_fs16(sb, to-from); 555 pde->d_reclen = cpu_to_fs16(sb, to - from);
540 dir->d_ino = 0; 556 dir->d_ino = 0;
541 err = ufs_commit_chunk(page, from, to); 557 err = ufs_commit_chunk(page, pos, to - from);
542 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; 558 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
543 mark_inode_dirty(inode); 559 mark_inode_dirty(inode);
544out: 560out:
@@ -559,14 +575,15 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
559 575
560 if (!page) 576 if (!page)
561 return -ENOMEM; 577 return -ENOMEM;
562 kmap(page); 578
563 err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size); 579 err = __ufs_write_begin(NULL, mapping, 0, chunk_size,
580 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
564 if (err) { 581 if (err) {
565 unlock_page(page); 582 unlock_page(page);
566 goto fail; 583 goto fail;
567 } 584 }
568 585
569 586 kmap(page);
570 base = (char*)page_address(page); 587 base = (char*)page_address(page);
571 memset(base, 0, PAGE_CACHE_SIZE); 588 memset(base, 0, PAGE_CACHE_SIZE);
572 589
@@ -584,10 +601,10 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
584 de->d_reclen = cpu_to_fs16(sb, chunk_size - UFS_DIR_REC_LEN(1)); 601 de->d_reclen = cpu_to_fs16(sb, chunk_size - UFS_DIR_REC_LEN(1));
585 ufs_set_de_namlen(sb, de, 2); 602 ufs_set_de_namlen(sb, de, 2);
586 strcpy (de->d_name, ".."); 603 strcpy (de->d_name, "..");
604 kunmap(page);
587 605
588 err = ufs_commit_chunk(page, 0, chunk_size); 606 err = ufs_commit_chunk(page, 0, chunk_size);
589fail: 607fail:
590 kunmap(page);
591 page_cache_release(page); 608 page_cache_release(page);
592 return err; 609 return err;
593} 610}
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index f18b79122fa3..d84d4b0f4779 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -558,24 +558,39 @@ static int ufs_writepage(struct page *page, struct writeback_control *wbc)
558{ 558{
559 return block_write_full_page(page,ufs_getfrag_block,wbc); 559 return block_write_full_page(page,ufs_getfrag_block,wbc);
560} 560}
561
561static int ufs_readpage(struct file *file, struct page *page) 562static int ufs_readpage(struct file *file, struct page *page)
562{ 563{
563 return block_read_full_page(page,ufs_getfrag_block); 564 return block_read_full_page(page,ufs_getfrag_block);
564} 565}
565static int ufs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) 566
567int __ufs_write_begin(struct file *file, struct address_space *mapping,
568 loff_t pos, unsigned len, unsigned flags,
569 struct page **pagep, void **fsdata)
566{ 570{
567 return block_prepare_write(page,from,to,ufs_getfrag_block); 571 return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
572 ufs_getfrag_block);
568} 573}
574
575static int ufs_write_begin(struct file *file, struct address_space *mapping,
576 loff_t pos, unsigned len, unsigned flags,
577 struct page **pagep, void **fsdata)
578{
579 *pagep = NULL;
580 return __ufs_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
581}
582
569static sector_t ufs_bmap(struct address_space *mapping, sector_t block) 583static sector_t ufs_bmap(struct address_space *mapping, sector_t block)
570{ 584{
571 return generic_block_bmap(mapping,block,ufs_getfrag_block); 585 return generic_block_bmap(mapping,block,ufs_getfrag_block);
572} 586}
587
573const struct address_space_operations ufs_aops = { 588const struct address_space_operations ufs_aops = {
574 .readpage = ufs_readpage, 589 .readpage = ufs_readpage,
575 .writepage = ufs_writepage, 590 .writepage = ufs_writepage,
576 .sync_page = block_sync_page, 591 .sync_page = block_sync_page,
577 .prepare_write = ufs_prepare_write, 592 .write_begin = ufs_write_begin,
578 .commit_write = generic_commit_write, 593 .write_end = generic_write_end,
579 .bmap = ufs_bmap 594 .bmap = ufs_bmap
580}; 595};
581 596
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 06d344839c42..79a340a1909e 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -231,6 +231,9 @@ ufs_set_inode_gid(struct super_block *sb, struct ufs_inode *inode, u32 value)
231 231
232extern dev_t ufs_get_inode_dev(struct super_block *, struct ufs_inode_info *); 232extern dev_t ufs_get_inode_dev(struct super_block *, struct ufs_inode_info *);
233extern void ufs_set_inode_dev(struct super_block *, struct ufs_inode_info *, dev_t); 233extern void ufs_set_inode_dev(struct super_block *, struct ufs_inode_info *, dev_t);
234extern int __ufs_write_begin(struct file *file, struct address_space *mapping,
235 loff_t pos, unsigned len, unsigned flags,
236 struct page **pagep, void **fsdata);
234 237
235/* 238/*
236 * These functions manipulate ufs buffers 239 * These functions manipulate ufs buffers
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 6f4c29e9c3d9..354d68a32d4a 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1508,13 +1508,18 @@ xfs_vm_direct_IO(
1508} 1508}
1509 1509
1510STATIC int 1510STATIC int
1511xfs_vm_prepare_write( 1511xfs_vm_write_begin(
1512 struct file *file, 1512 struct file *file,
1513 struct page *page, 1513 struct address_space *mapping,
1514 unsigned int from, 1514 loff_t pos,
1515 unsigned int to) 1515 unsigned len,
1516 unsigned flags,
1517 struct page **pagep,
1518 void **fsdata)
1516{ 1519{
1517 return block_prepare_write(page, from, to, xfs_get_blocks); 1520 *pagep = NULL;
1521 return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
1522 xfs_get_blocks);
1518} 1523}
1519 1524
1520STATIC sector_t 1525STATIC sector_t
@@ -1568,8 +1573,8 @@ const struct address_space_operations xfs_address_space_operations = {
1568 .sync_page = block_sync_page, 1573 .sync_page = block_sync_page,
1569 .releasepage = xfs_vm_releasepage, 1574 .releasepage = xfs_vm_releasepage,
1570 .invalidatepage = xfs_vm_invalidatepage, 1575 .invalidatepage = xfs_vm_invalidatepage,
1571 .prepare_write = xfs_vm_prepare_write, 1576 .write_begin = xfs_vm_write_begin,
1572 .commit_write = generic_commit_write, 1577 .write_end = generic_write_end,
1573 .bmap = xfs_vm_bmap, 1578 .bmap = xfs_vm_bmap,
1574 .direct_IO = xfs_vm_direct_IO, 1579 .direct_IO = xfs_vm_direct_IO,
1575 .migratepage = buffer_migrate_page, 1580 .migratepage = buffer_migrate_page,
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 765ec16a6e39..7e7aeb4c8a08 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -134,45 +134,34 @@ xfs_iozero(
134 loff_t pos, /* offset in file */ 134 loff_t pos, /* offset in file */
135 size_t count) /* size of data to zero */ 135 size_t count) /* size of data to zero */
136{ 136{
137 unsigned bytes;
138 struct page *page; 137 struct page *page;
139 struct address_space *mapping; 138 struct address_space *mapping;
140 int status; 139 int status;
141 140
142 mapping = ip->i_mapping; 141 mapping = ip->i_mapping;
143 do { 142 do {
144 unsigned long index, offset; 143 unsigned offset, bytes;
144 void *fsdata;
145 145
146 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ 146 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
147 index = pos >> PAGE_CACHE_SHIFT;
148 bytes = PAGE_CACHE_SIZE - offset; 147 bytes = PAGE_CACHE_SIZE - offset;
149 if (bytes > count) 148 if (bytes > count)
150 bytes = count; 149 bytes = count;
151 150
152 status = -ENOMEM; 151 status = pagecache_write_begin(NULL, mapping, pos, bytes,
153 page = grab_cache_page(mapping, index); 152 AOP_FLAG_UNINTERRUPTIBLE,
154 if (!page) 153 &page, &fsdata);
155 break;
156
157 status = mapping->a_ops->prepare_write(NULL, page, offset,
158 offset + bytes);
159 if (status) 154 if (status)
160 goto unlock; 155 break;
161 156
162 zero_user_page(page, offset, bytes, KM_USER0); 157 zero_user_page(page, offset, bytes, KM_USER0);
163 158
164 status = mapping->a_ops->commit_write(NULL, page, offset, 159 status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
165 offset + bytes); 160 page, fsdata);
166 if (!status) { 161 WARN_ON(status <= 0); /* can't return less than zero! */
167 pos += bytes; 162 pos += bytes;
168 count -= bytes; 163 count -= bytes;
169 } 164 status = 0;
170
171unlock:
172 unlock_page(page);
173 page_cache_release(page);
174 if (status)
175 break;
176 } while (count); 165 } while (count);
177 166
178 return (-status); 167 return (-status);