From 7c96b7a146eb81d13be738709c36752c20abe4fc Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 2 May 2007 08:36:21 +0100 Subject: [JFFS2] Remove dead file histo_mips.h Its contents were subsumed into compr_rubin.c in a previous commit, but I forgot to git-rm it. Signed-off-by: David Woodhouse --- fs/jffs2/histo_mips.h | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 fs/jffs2/histo_mips.h (limited to 'fs') diff --git a/fs/jffs2/histo_mips.h b/fs/jffs2/histo_mips.h deleted file mode 100644 index fa3dac19a109..000000000000 --- a/fs/jffs2/histo_mips.h +++ /dev/null @@ -1,2 +0,0 @@ -#define BIT_DIVIDER_MIPS 1043 -static int bits_mips[8] = { 277,249,290,267,229,341,212,241}; /* mips32 */ -- cgit v1.2.2 From 3fddb6c985e3823c991399840d2d5ef5940e1b60 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 5 May 2007 09:52:49 +0100 Subject: [JFFS2] Don't advance c->wbuf_ofs to next eraseblock after wbuf flush After flushing the last page of an eraseblock, don't leave the wbuf 'offset' field pointing at the start of the next physical eraseblock. This was causing a BUG() on NOR-ECC (Sibley) flash, where we start writing a little further in, after the cleanmarker. Debugged by Alexander Belyakov Signed-off-by: David Woodhouse --- fs/jffs2/wbuf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index c556e85a565c..91d1d0f1c66c 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -637,7 +637,10 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad) memset(c->wbuf,0xff,c->wbuf_pagesize); /* adjust write buffer offset, else we get a non contiguous write bug */ - c->wbuf_ofs += c->wbuf_pagesize; + if (SECTOR_ADDR(c->wbuf_ofs) == SECTOR_ADDR(c->wbuf_ofs+c->wbuf_pagesize)) + c->wbuf_ofs += c->wbuf_pagesize; + else + c->wbuf_ofs = 0xffffffff; c->wbuf_len = 0; return 0; } -- cgit v1.2.2 From 1123e2a85941c7f506bd42c91c7e9ab74fc42d79 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 5 May 2007 16:29:34 +0100 Subject: [JFFS2] Remember to calculate overlap on nodes which replace older nodes This fixes a problem Artem found with the integck test tool -- we weren't correctly keeping track of the 'overlap' flag in some cases, which led to the nodes being played back in an incorrect order and file corruption. Signed-off-by: David Woodhouse --- fs/jffs2/readinode.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 6aff38930b50..b0645ac7769a 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -276,10 +276,9 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, /* Who cares if the new one is good; keep it for now anyway. */ rb_replace_node(&this->rb, &tn->rb, &rii->tn_root); /* Same overlapping from in front and behind */ - tn->overlapped = this->overlapped; jffs2_kill_tn(c, this); dbg_readinode("Like new node. Throw away old\n"); - return 0; + goto calc_overlaps; } } if (this->version < tn->version && @@ -293,7 +292,6 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, } /* ... and is good. Kill 'this'... */ rb_replace_node(&this->rb, &tn->rb, &rii->tn_root); - tn->overlapped = this->overlapped; jffs2_kill_tn(c, this); /* ... and any subsequent nodes which are also overlapped */ this = tn_next(tn); @@ -309,7 +307,7 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, this = next; } dbg_readinode("Done inserting new\n"); - return 0; + goto calc_overlaps; } if (this->version > tn->version && this->fn->ofs <= tn->fn->ofs && @@ -321,6 +319,7 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, return 0; } /* ... but 'this' was bad. Replace it... */ + tn->overlapped = this->overlapped; rb_replace_node(&this->rb, &tn->rb, &rii->tn_root); dbg_readinode("Bad CRC on old overlapping node. Kill it\n"); jffs2_kill_tn(c, this); @@ -359,6 +358,8 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, rb_link_node(&tn->rb, &insert_point->rb, link); rb_insert_color(&tn->rb, &rii->tn_root); } + + calc_overlaps: /* If there's anything behind that overlaps us, note it */ this = tn_prev(tn); if (this) { @@ -483,7 +484,7 @@ static int jffs2_build_inode_fragtree(struct jffs2_sb_info *c, vers_next = tn_prev(this); eat_last(&ver_root, &this->rb); if (check_tn_node(c, this)) { - dbg_readinode("node ver %x, 0x%x-0x%x failed CRC\n", + dbg_readinode("node ver %d, 0x%x-0x%x failed CRC\n", this->version, this->fn->ofs, this->fn->ofs+this->fn->size); jffs2_kill_tn(c, this); @@ -496,7 +497,7 @@ static int jffs2_build_inode_fragtree(struct jffs2_sb_info *c, high_ver = this->version; rii->latest_ref = this->fn->raw; } - dbg_readinode("Add %p (v %x, 0x%x-0x%x, ov %d) to fragtree\n", + dbg_readinode("Add %p (v %d, 0x%x-0x%x, ov %d) to fragtree\n", this, this->version, this->fn->ofs, this->fn->ofs+this->fn->size, this->overlapped); @@ -850,7 +851,7 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref return ret; } #ifdef JFFS2_DBG_READINODE_MESSAGES - dbg_readinode("After adding ver %d:\n", tn->version); + dbg_readinode("After adding ver %d:\n", je32_to_cpu(rd->version)); tn = tn_first(&rii->tn_root); while (tn) { dbg_readinode("%p: v %d r 0x%x-0x%x ov %d\n", -- cgit v1.2.2 From 96dd8d25d1ca8c233bd47752349d27a631c18719 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 6 May 2007 14:41:40 +0100 Subject: [JFFS2] Remove broken insert_point optimisation in jffs2_add_tn_to_tree() The original code would remember, during the first pass over the tree, a suitable place to start the insertion from when we eventually come to add a new node. The optimisation was broken, and we sometimes ended up inserting a new node in the wrong place because we started the insertion from the wrong point. Just ditch the optimisation and start the insertion from the root of the tree, for now. I'll try it again when I'm feeling cleverer. Signed-off-by: David Woodhouse --- fs/jffs2/readinode.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index b0645ac7769a..f461604cf01f 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -219,7 +219,7 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *tn) { uint32_t fn_end = tn->fn->ofs + tn->fn->size; - struct jffs2_tmp_dnode_info *insert_point = NULL, *this; + struct jffs2_tmp_dnode_info *this; dbg_readinode("insert fragment %#04x-%#04x, ver %u\n", tn->fn->ofs, fn_end, tn->version); @@ -248,9 +248,6 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, return 0; } - /* If we add a new node it'll be somewhere under here. */ - insert_point = this; - /* If the node is coincident with another at a lower address, back up until the other node is found. It may be relevant */ while (tn->overlapped) @@ -325,24 +322,16 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, jffs2_kill_tn(c, this); return 0; } - /* We want to be inserted under the last node which is - either at a lower offset _or_ has a smaller range */ - if (this->fn->ofs < tn->fn->ofs || - (this->fn->ofs == tn->fn->ofs && - this->fn->size <= tn->fn->size)) - insert_point = this; this = tn_next(this); } - dbg_readinode("insert_point %p, ver %d, 0x%x-0x%x, ov %d\n", - insert_point, insert_point->version, insert_point->fn->ofs, - insert_point->fn->ofs+insert_point->fn->size, - insert_point->overlapped); + /* We neither completely obsoleted nor were completely - obsoleted by an earlier node. Insert under insert_point */ + obsoleted by an earlier node. Insert into the tree */ { - struct rb_node *parent = &insert_point->rb; - struct rb_node **link = &parent; + struct rb_node *parent; + struct rb_node **link = &rii->tn_root.rb_node; + struct jffs2_tmp_dnode_info *insert_point; while (*link) { parent = *link; @@ -458,7 +447,7 @@ static int jffs2_build_inode_fragtree(struct jffs2_sb_info *c, this = tn_last(&rii->tn_root); while (this) { dbg_readinode("tn %p ver %d range 0x%x-0x%x ov %d\n", this, this->version, this->fn->ofs, - this->fn->ofs+this->fn->size, this->overlapped); + this->fn->ofs+this->fn->size, this->overlapped); this = tn_prev(this); } #endif -- cgit v1.2.2 From fcf3cafb3e7e7750f4876571492594c3a4854ee5 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 7 May 2007 13:16:13 +0100 Subject: [JFFS2] Remove another bogus optimisation in jffs2_add_tn_to_tree() We attempted to insert new nodes into the tree by just using rb_replace_node to let them replace an earlier node which they completely overlapped. However, that could place the new node into the wrong place in the tree, since its start could be node only before the start of the victim, but before the node _before_ the victim in the tree (if that previous node actually ends _after_ the new node, thus isn't entirely overlapped and wasn't itself chosen to be the victim). Signed-off-by: David Woodhouse --- fs/jffs2/readinode.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index f461604cf01f..bd31d4956c6d 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -221,7 +221,7 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, uint32_t fn_end = tn->fn->ofs + tn->fn->size; struct jffs2_tmp_dnode_info *this; - dbg_readinode("insert fragment %#04x-%#04x, ver %u\n", tn->fn->ofs, fn_end, tn->version); + dbg_readinode("insert fragment %#04x-%#04x, ver %u at %08x\n", tn->fn->ofs, fn_end, tn->version, ref_offset(tn->fn->raw)); /* If a node has zero dsize, we only have to keep if it if it might be the node with highest version -- i.e. the one which will end up as f->metadata. @@ -271,11 +271,11 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, return 0; } else { /* Who cares if the new one is good; keep it for now anyway. */ + dbg_readinode("Like new node. Throw away old\n"); rb_replace_node(&this->rb, &tn->rb, &rii->tn_root); - /* Same overlapping from in front and behind */ jffs2_kill_tn(c, this); - dbg_readinode("Like new node. Throw away old\n"); - goto calc_overlaps; + /* Same overlapping from in front and behind */ + return 0; } } if (this->version < tn->version && @@ -287,11 +287,7 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, jffs2_kill_tn(c, tn); return 0; } - /* ... and is good. Kill 'this'... */ - rb_replace_node(&this->rb, &tn->rb, &rii->tn_root); - jffs2_kill_tn(c, this); - /* ... and any subsequent nodes which are also overlapped */ - this = tn_next(tn); + /* ... and is good. Kill 'this' and any subsequent nodes which are also overlapped */ while (this && this->fn->ofs + this->fn->size < fn_end) { struct jffs2_tmp_dnode_info *next = tn_next(this); if (this->version < tn->version) { @@ -303,8 +299,8 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, } this = next; } - dbg_readinode("Done inserting new\n"); - goto calc_overlaps; + dbg_readinode("Done killing overlapped nodes\n"); + break; } if (this->version > tn->version && this->fn->ofs <= tn->fn->ofs && @@ -316,11 +312,10 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, return 0; } /* ... but 'this' was bad. Replace it... */ - tn->overlapped = this->overlapped; - rb_replace_node(&this->rb, &tn->rb, &rii->tn_root); dbg_readinode("Bad CRC on old overlapping node. Kill it\n"); + tn_erase(this, &rii->tn_root); jffs2_kill_tn(c, this); - return 0; + break; } this = tn_next(this); -- cgit v1.2.2 From 1c97964520503c60aa91680d4b525236c2885a36 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 8 May 2007 00:19:54 +0100 Subject: [JFFS2] Simplify and clean up jffs2_add_tn_to_tree() some more. Fixing at least a couple more bugs in the process. Signed-off-by: David Woodhouse --- fs/jffs2/readinode.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index bd31d4956c6d..4884d5edfe65 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -240,20 +240,16 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, /* Find the earliest node which _may_ be relevant to this one */ this = jffs2_lookup_tn(&rii->tn_root, tn->fn->ofs); - if (!this) { - /* First addition to empty tree. $DEITY how I love the easy cases */ - rb_link_node(&tn->rb, NULL, &rii->tn_root.rb_node); - rb_insert_color(&tn->rb, &rii->tn_root); - dbg_readinode("keep new frag\n"); - return 0; - } - - /* If the node is coincident with another at a lower address, - back up until the other node is found. It may be relevant */ - while (tn->overlapped) - tn = tn_prev(tn); + if (this) { + /* If the node is coincident with another at a lower address, + back up until the other node is found. It may be relevant */ + while (this->overlapped) + this = tn_prev(this); - dbg_readinode("'this' found %#04x-%#04x (%s)\n", this->fn->ofs, this->fn->ofs + this->fn->size, this->fn ? "data" : "hole"); + /* First node should never be marked overlapped */ + BUG_ON(!this); + dbg_readinode("'this' found %#04x-%#04x (%s)\n", this->fn->ofs, this->fn->ofs + this->fn->size, this->fn ? "data" : "hole"); + } while (this) { if (this->fn->ofs > fn_end) @@ -288,7 +284,7 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, return 0; } /* ... and is good. Kill 'this' and any subsequent nodes which are also overlapped */ - while (this && this->fn->ofs + this->fn->size < fn_end) { + while (this && this->fn->ofs + this->fn->size <= fn_end) { struct jffs2_tmp_dnode_info *next = tn_next(this); if (this->version < tn->version) { tn_erase(this, &rii->tn_root); @@ -300,7 +296,7 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, this = next; } dbg_readinode("Done killing overlapped nodes\n"); - break; + continue; } if (this->version > tn->version && this->fn->ofs <= tn->fn->ofs && @@ -326,7 +322,7 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, { struct rb_node *parent; struct rb_node **link = &rii->tn_root.rb_node; - struct jffs2_tmp_dnode_info *insert_point; + struct jffs2_tmp_dnode_info *insert_point = NULL; while (*link) { parent = *link; @@ -343,7 +339,6 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, rb_insert_color(&tn->rb, &rii->tn_root); } - calc_overlaps: /* If there's anything behind that overlaps us, note it */ this = tn_prev(tn); if (this) { -- cgit v1.2.2 From 0f8952c2fa981c75dbbb363ac83b320068fffa69 Mon Sep 17 00:00:00 2001 From: Ronni Nielsen Date: Wed, 9 May 2007 06:44:57 +0200 Subject: fs/libfs.c: >80 columns line break fix Signed-off-by: Ronni Nielsen Signed-off-by: Adrian Bunk --- fs/libfs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/libfs.c b/fs/libfs.c index 1247ee90253a..5294de1f40c4 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -159,7 +159,10 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) continue; spin_unlock(&dcache_lock); - if (filldir(dirent, next->d_name.name, next->d_name.len, filp->f_pos, next->d_inode->i_ino, dt_type(next->d_inode)) < 0) + if (filldir(dirent, next->d_name.name, + next->d_name.len, filp->f_pos, + next->d_inode->i_ino, + dt_type(next->d_inode)) < 0) return 0; spin_lock(&dcache_lock); /* next is still alive */ -- cgit v1.2.2 From ccf6780dc3d228f380e17b6858b93fc48e40afd4 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 9 May 2007 07:10:02 +0200 Subject: Style fix in fs/select.c Signed-off-by: WANG Cong Signed-off-by: Adrian Bunk --- fs/select.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/select.c b/fs/select.c index d86224154dec..a974082b0824 100644 --- a/fs/select.c +++ b/fs/select.c @@ -64,7 +64,7 @@ EXPORT_SYMBOL(poll_initwait); static void free_poll_entry(struct poll_table_entry *entry) { - remove_wait_queue(entry->wait_address,&entry->wait); + remove_wait_queue(entry->wait_address, &entry->wait); fput(entry->filp); } @@ -128,7 +128,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, entry->filp = filp; entry->wait_address = wait_address; init_waitqueue_entry(&entry->wait, current); - add_wait_queue(wait_address,&entry->wait); + add_wait_queue(wait_address, &entry->wait); } #define FDS_IN(fds, n) (fds->in + n) -- cgit v1.2.2 From beb7dd86a101263bf63a78c7c6d4da3849b35bd6 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Wed, 9 May 2007 07:14:03 +0200 Subject: Fix misspellings collected by members of KJ list. Fix the misspellings of "propogate", "writting" and (oh, the shame :-) "kenrel" in the source tree. Signed-off-by: Robert P. J. Day Signed-off-by: Adrian Bunk --- fs/direct-io.c | 2 +- fs/reiserfs/journal.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/direct-io.c b/fs/direct-io.c index d9d0833444f5..1e88d8d1d2a9 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -439,7 +439,7 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio) * Wait on and process all in-flight BIOs. This must only be called once * all bios have been issued so that the refcount can only decrease. * This just waits for all bios to make it through dio_bio_complete. IO - * errors are propogated through dio->io_error and should be propogated via + * errors are propagated through dio->io_error and should be propagated via * dio_complete(). */ static void dio_await_completion(struct dio *dio) diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index e073fd86cf60..f25086aeef5f 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1110,7 +1110,7 @@ static int flush_commit_list(struct super_block *s, if (!barrier) { /* If there was a write error in the journal - we can't commit * this transaction - it will be invalid and, if successful, - * will just end up propogating the write error out to + * will just end up propagating the write error out to * the file system. */ if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { if (buffer_dirty(jl->j_commit_bh)) @@ -1125,7 +1125,7 @@ static int flush_commit_list(struct super_block *s, /* If there was a write error in the journal - we can't commit this * transaction - it will be invalid and, if successful, will just end - * up propogating the write error out to the filesystem. */ + * up propagating the write error out to the filesystem. */ if (unlikely(!buffer_uptodate(jl->j_commit_bh))) { #ifdef CONFIG_REISERFS_CHECK reiserfs_warning(s, "journal-615: buffer write failed"); -- cgit v1.2.2 From 59c51591a0ac7568824f541f57de967e88adaa07 Mon Sep 17 00:00:00 2001 From: Michael Opdenacker Date: Wed, 9 May 2007 08:57:56 +0200 Subject: Fix occurrences of "the the " Signed-off-by: Michael Opdenacker Signed-off-by: Adrian Bunk --- fs/jfs/jfs_dmap.c | 2 +- fs/jfs/jfs_imap.c | 4 ++-- fs/jfs/jfs_logmgr.c | 2 +- fs/xfs/xfs_itable.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 82b0544bd76d..f3b1ebb22280 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -1507,7 +1507,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) if (l2nb < budmin) { /* search the lower level dmap control pages to get - * the starting block number of the the dmap that + * the starting block number of the dmap that * contains or starts off the free space. */ if ((rc = diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index c465607be991..c6530227cda6 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -386,7 +386,7 @@ int diRead(struct inode *ip) return -EIO; } - /* locate the the disk inode requested */ + /* locate the disk inode requested */ dp = (struct dinode *) mp->data; dp += rel_inode; @@ -1407,7 +1407,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) inum = pip->i_ino + 1; ino = inum & (INOSPERIAG - 1); - /* back off the the hint if it is outside of the iag */ + /* back off the hint if it is outside of the iag */ if (ino == 0) inum = pip->i_ino; diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 6a3f00dc8c83..44a2f33cb98d 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1960,7 +1960,7 @@ static void lbmfree(struct lbuf * bp) /* * NAME: lbmRedrive * - * FUNCTION: add a log buffer to the the log redrive list + * FUNCTION: add a log buffer to the log redrive list * * PARAMETER: * bp - log buffer diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 7775ddc0b3c6..e725ddd3de5f 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -809,7 +809,7 @@ xfs_inumbers( xfs_buf_relse(agbp); agbp = NULL; /* - * Move up the the last inode in the current + * Move up the last inode in the current * chunk. The lookup_ge will always get * us the first inode in the next chunk. */ -- cgit v1.2.2 From 148e423f909e14564d8af13239c5d060f7df273e Mon Sep 17 00:00:00 2001 From: "Alexander E. Patrakov" Date: Wed, 9 May 2007 07:31:54 +0200 Subject: Remove obsolete fat_cvf help text The text removed by the following patch refers to functionality that never worked, to non-existing documentation file, and to mount options marked as obsolete in the module. Signed-off-by: Alexander E. Patrakov Signed-off-by: Adrian Bunk --- fs/Kconfig | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs') diff --git a/fs/Kconfig b/fs/Kconfig index 4622dabb2253..0fa0c1193e81 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -724,10 +724,6 @@ config FAT_FS file system and use GNU tar's M option. GNU tar is a program available for Unix and DOS ("man tar" or "info tar"). - It is now also becoming possible to read and write compressed FAT - file systems; read for - details. - The FAT support will enlarge your kernel by about 37 KB. If unsure, say Y. -- cgit v1.2.2 From 5886269962f94fa9185c32db3ec936c612503235 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 9 May 2007 07:51:49 +0200 Subject: fix file specification in comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many files include the filename at the beginning, serveral used a wrong one. Signed-off-by: Uwe Kleine-König Signed-off-by: Adrian Bunk --- fs/jbd/checkpoint.c | 2 +- fs/jbd/recovery.c | 2 +- fs/jbd/revoke.c | 2 +- fs/jbd/transaction.c | 2 +- fs/jbd2/checkpoint.c | 2 +- fs/jbd2/recovery.c | 2 +- fs/jbd2/revoke.c | 2 +- fs/jbd2/transaction.c | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 0208cc7ac5d0..47552d4a6324 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -1,5 +1,5 @@ /* - * linux/fs/checkpoint.c + * linux/fs/jbd/checkpoint.c * * Written by Stephen C. Tweedie , 1999 * diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index 11563fe2a52b..2a5f4b833e35 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c @@ -1,5 +1,5 @@ /* - * linux/fs/recovery.c + * linux/fs/jbd/recovery.c * * Written by Stephen C. Tweedie , 1999 * diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index a68cbb605022..824e3b7d4ec1 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -1,5 +1,5 @@ /* - * linux/fs/revoke.c + * linux/fs/jbd/revoke.c * * Written by Stephen C. Tweedie , 2000 * diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index f9822fc07851..772b6531a2a2 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1,5 +1,5 @@ /* - * linux/fs/transaction.c + * linux/fs/jbd/transaction.c * * Written by Stephen C. Tweedie , 1998 * diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 68039fa9a566..3fccde7ba008 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -1,5 +1,5 @@ /* - * linux/fs/checkpoint.c + * linux/fs/jbd2/checkpoint.c * * Written by Stephen C. Tweedie , 1999 * diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 9f10acafaf70..395c92a04ac9 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -1,5 +1,5 @@ /* - * linux/fs/recovery.c + * linux/fs/jbd2/recovery.c * * Written by Stephen C. Tweedie , 1999 * diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 1e864dcc49ea..9246e763da78 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -1,5 +1,5 @@ /* - * linux/fs/revoke.c + * linux/fs/jbd2/revoke.c * * Written by Stephen C. Tweedie , 2000 * diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index e347d8c078bc..7946ff43fc40 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1,5 +1,5 @@ /* - * linux/fs/transaction.c + * linux/fs/jbd2/transaction.c * * Written by Stephen C. Tweedie , 1998 * -- cgit v1.2.2 From 92f4c701aad794de9e4cf7341d0a486aed027c46 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 9 May 2007 02:33:32 -0700 Subject: use simple_read_from_buffer() in fs/ Cleanup using simple_read_from_buffer() in binfmt_misc, configfs, and sysfs. Cc: Greg Kroah-Hartman Cc: Joel Becker Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_misc.c | 13 +------------ fs/configfs/file.c | 33 ++------------------------------- fs/sysfs/file.c | 33 ++------------------------------- 3 files changed, 5 insertions(+), 74 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 18657f001b43..72d0b412c376 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -675,19 +675,8 @@ static ssize_t bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { char *s = enabled ? "enabled" : "disabled"; - int len = strlen(s); - loff_t pos = *ppos; - if (pos < 0) - return -EINVAL; - if (pos >= len) - return 0; - if (len < pos + nbytes) - nbytes = len - pos; - if (copy_to_user(buf, s + pos, nbytes)) - return -EFAULT; - *ppos = pos + nbytes; - return nbytes; + return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s)); } static ssize_t bm_status_write(struct file * file, const char __user * buffer, diff --git a/fs/configfs/file.c b/fs/configfs/file.c index d98be5e01328..3527c7c6def8 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -77,36 +77,6 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf return ret; } - -/** - * flush_read_buffer - push buffer to userspace. - * @buffer: data buffer for file. - * @userbuf: user-passed buffer. - * @count: number of bytes requested. - * @ppos: file position. - * - * Copy the buffer we filled in fill_read_buffer() to userspace. - * This is done at the reader's leisure, copying and advancing - * the amount they specify each time. - * This may be called continuously until the buffer is empty. - */ -static int flush_read_buffer(struct configfs_buffer * buffer, char __user * buf, - size_t count, loff_t * ppos) -{ - int error; - - if (*ppos > buffer->count) - return 0; - - if (count > (buffer->count - *ppos)) - count = buffer->count - *ppos; - - error = copy_to_user(buf,buffer->page + *ppos,count); - if (!error) - *ppos += count; - return error ? -EFAULT : count; -} - /** * configfs_read_file - read an attribute. * @file: file pointer. @@ -139,7 +109,8 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp } pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", __FUNCTION__, count, *ppos, buffer->page); - retval = flush_read_buffer(buffer,buf,count,ppos); + retval = simple_read_from_buffer(buf, count, ppos, buffer->page, + buffer->count); out: up(&buffer->sem); return retval; diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 0e637adc2b87..b502c7197ec0 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -111,36 +111,6 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer return ret; } - -/** - * flush_read_buffer - push buffer to userspace. - * @buffer: data buffer for file. - * @buf: user-passed buffer. - * @count: number of bytes requested. - * @ppos: file position. - * - * Copy the buffer we filled in fill_read_buffer() to userspace. - * This is done at the reader's leisure, copying and advancing - * the amount they specify each time. - * This may be called continuously until the buffer is empty. - */ -static int flush_read_buffer(struct sysfs_buffer * buffer, char __user * buf, - size_t count, loff_t * ppos) -{ - int error; - - if (*ppos > buffer->count) - return 0; - - if (count > (buffer->count - *ppos)) - count = buffer->count - *ppos; - - error = copy_to_user(buf,buffer->page + *ppos,count); - if (!error) - *ppos += count; - return error ? -EFAULT : count; -} - /** * sysfs_read_file - read an attribute. * @file: file pointer. @@ -177,7 +147,8 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) } pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", __FUNCTION__, count, *ppos, buffer->page); - retval = flush_read_buffer(buffer,buf,count,ppos); + retval = simple_read_from_buffer(buf, count, ppos, buffer->page, + buffer->count); out: up(&buffer->sem); return retval; -- cgit v1.2.2 From 62ce39c531860aee6c12128c629b0a82f656a306 Mon Sep 17 00:00:00 2001 From: Josef 'Jeff' Sipek Date: Wed, 9 May 2007 02:33:41 -0700 Subject: fs: fix indentation in do_path_lookup Signed-off-by: Josef 'Jeff' Sipek Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namei.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 856b2f5da51d..19f178ec5744 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1155,11 +1155,9 @@ static int fastcall do_path_lookup(int dfd, const char *name, current->total_link_count = 0; retval = link_path_walk(name, nd); out: - if (likely(retval == 0)) { - if (unlikely(!audit_dummy_context() && nd && nd->dentry && + if (unlikely(!retval && !audit_dummy_context() && nd->dentry && nd->dentry->d_inode)) audit_inode(name, nd->dentry->d_inode); - } out_fail: return retval; -- cgit v1.2.2 From 2dfdd266b9a2f2d93a3fdbee89969f6ea9ec5377 Mon Sep 17 00:00:00 2001 From: Josef 'Jeff' Sipek Date: Wed, 9 May 2007 02:33:41 -0700 Subject: fs: use path_walk in do_path_lookup Since path_walk sets the total_link_count to 0 and calls link_path_walk, we can just call path_walk directly. Signed-off-by: Josef 'Jeff' Sipek Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 19f178ec5744..b3780e3fc88e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1152,8 +1152,8 @@ static int fastcall do_path_lookup(int dfd, const char *name, fput_light(file, fput_needed); } - current->total_link_count = 0; - retval = link_path_walk(name, nd); + + retval = path_walk(name, nd); out: if (unlikely(!retval && !audit_dummy_context() && nd->dentry && nd->dentry->d_inode)) -- cgit v1.2.2 From 416351f28d2b31d15ff73e9aff699b2163704c95 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 9 May 2007 02:33:45 -0700 Subject: AFS: AFS fixups Make some miscellaneous changes to the AFS filesystem: (1) Assert RCU barriers on module exit to make sure RCU has finished with callbacks in this module. (2) Correctly handle the AFS server returning a zero-length read. (3) Split out data zapping calls into one function (afs_zap_data). (4) Rename some afs_file_*() functions to afs_*() where they apply to non-regular files too. (5) Be consistent about the presentation of volume ID:vnode ID in debugging output. Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/afs/callback.c | 9 +++---- fs/afs/dir.c | 18 ++++++------- fs/afs/file.c | 75 +++++++++++++++++++------------------------------------ fs/afs/fsclient.c | 37 ++++++++++++++------------- fs/afs/inode.c | 25 +++++++++++++------ fs/afs/internal.h | 29 +++++++++++++++------ fs/afs/main.c | 2 ++ fs/afs/misc.c | 1 + fs/afs/mntpt.c | 5 ++-- fs/afs/security.c | 12 +++++---- fs/afs/server.c | 3 +++ fs/afs/vnode.c | 14 +++++------ 12 files changed, 121 insertions(+), 109 deletions(-) (limited to 'fs') diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 9bdbf36a9aa9..f64e40fefc02 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -44,7 +44,7 @@ void afs_init_callback_state(struct afs_server *server) while (!RB_EMPTY_ROOT(&server->cb_promises)) { vnode = rb_entry(server->cb_promises.rb_node, struct afs_vnode, cb_promise); - _debug("UNPROMISE { vid=%x vn=%u uq=%u}", + _debug("UNPROMISE { vid=%x:%u uq=%u}", vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); rb_erase(&vnode->cb_promise, &server->cb_promises); vnode->cb_promised = false; @@ -84,11 +84,8 @@ void afs_broken_callback_work(struct work_struct *work) /* if the vnode's data version number changed then its contents * are different */ - if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { - _debug("zap data {%x:%u}", - vnode->fid.vid, vnode->fid.vnode); - invalidate_remote_inode(&vnode->vfs_inode); - } + if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) + afs_zap_data(vnode); } out: diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 0c1e902f17a3..51f177af5dd6 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -55,7 +55,7 @@ const struct inode_operations afs_dir_inode_operations = { .rmdir = afs_rmdir, .rename = afs_rename, .permission = afs_permission, - .getattr = afs_inode_getattr, + .getattr = afs_getattr, }; static struct dentry_operations afs_fs_dentry_operations = { @@ -491,7 +491,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, vnode = AFS_FS_I(dir); - _enter("{%x:%d},%p{%s},", + _enter("{%x:%u},%p{%s},", vnode->fid.vid, vnode->fid.vnode, dentry, dentry->d_name.name); ASSERTCMP(dentry->d_inode, ==, NULL); @@ -731,7 +731,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode) dvnode = AFS_FS_I(dir); - _enter("{%x:%d},{%s},%o", + _enter("{%x:%u},{%s},%o", dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); ret = -ENAMETOOLONG; @@ -796,7 +796,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) dvnode = AFS_FS_I(dir); - _enter("{%x:%d},{%s}", + _enter("{%x:%u},{%s}", dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); ret = -ENAMETOOLONG; @@ -842,7 +842,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) dvnode = AFS_FS_I(dir); - _enter("{%x:%d},{%s}", + _enter("{%x:%u},{%s}", dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); ret = -ENAMETOOLONG; @@ -916,7 +916,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, int mode, dvnode = AFS_FS_I(dir); - _enter("{%x:%d},{%s},%o,", + _enter("{%x:%u},{%s},%o,", dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); ret = -ENAMETOOLONG; @@ -983,7 +983,7 @@ static int afs_link(struct dentry *from, struct inode *dir, vnode = AFS_FS_I(from->d_inode); dvnode = AFS_FS_I(dir); - _enter("{%x:%d},{%x:%d},{%s}", + _enter("{%x:%u},{%x:%u},{%s}", vnode->fid.vid, vnode->fid.vnode, dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); @@ -1032,7 +1032,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, dvnode = AFS_FS_I(dir); - _enter("{%x:%d},{%s},%s", + _enter("{%x:%u},{%s},%s", dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, content); @@ -1104,7 +1104,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, orig_dvnode = AFS_FS_I(old_dir); new_dvnode = AFS_FS_I(new_dir); - _enter("{%x:%d},{%x:%d},{%x:%d},{%s}", + _enter("{%x:%u},{%x:%u},{%x:%u},{%s}", orig_dvnode->fid.vid, orig_dvnode->fid.vnode, vnode->fid.vid, vnode->fid.vnode, new_dvnode->fid.vid, new_dvnode->fid.vnode, diff --git a/fs/afs/file.c b/fs/afs/file.c index ae256498f4f7..3eb3fc7b36be 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -17,9 +17,9 @@ #include #include "internal.h" -static int afs_file_readpage(struct file *file, struct page *page); -static void afs_file_invalidatepage(struct page *page, unsigned long offset); -static int afs_file_releasepage(struct page *page, gfp_t gfp_flags); +static int afs_readpage(struct file *file, struct page *page); +static void afs_invalidatepage(struct page *page, unsigned long offset); +static int afs_releasepage(struct page *page, gfp_t gfp_flags); const struct file_operations afs_file_operations = { .open = afs_open, @@ -32,15 +32,15 @@ const struct file_operations afs_file_operations = { }; const struct inode_operations afs_file_inode_operations = { - .getattr = afs_inode_getattr, + .getattr = afs_getattr, .permission = afs_permission, }; const struct address_space_operations afs_fs_aops = { - .readpage = afs_file_readpage, + .readpage = afs_readpage, .set_page_dirty = __set_page_dirty_nobuffers, - .releasepage = afs_file_releasepage, - .invalidatepage = afs_file_invalidatepage, + .releasepage = afs_releasepage, + .invalidatepage = afs_invalidatepage, }; /* @@ -52,7 +52,7 @@ int afs_open(struct inode *inode, struct file *file) struct key *key; int ret; - _enter("{%x:%x},", vnode->fid.vid, vnode->fid.vnode); + _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) { @@ -78,7 +78,7 @@ int afs_release(struct inode *inode, struct file *file) { struct afs_vnode *vnode = AFS_FS_I(inode); - _enter("{%x:%x},", vnode->fid.vid, vnode->fid.vnode); + _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); key_put(file->private_data); _leave(" = 0"); @@ -89,10 +89,10 @@ int afs_release(struct inode *inode, struct file *file) * deal with notification that a page was read from the cache */ #ifdef AFS_CACHING_SUPPORT -static void afs_file_readpage_read_complete(void *cookie_data, - struct page *page, - void *data, - int error) +static void afs_readpage_read_complete(void *cookie_data, + struct page *page, + void *data, + int error) { _enter("%p,%p,%p,%d", cookie_data, page, data, error); @@ -109,10 +109,10 @@ static void afs_file_readpage_read_complete(void *cookie_data, * deal with notification that a page was written to the cache */ #ifdef AFS_CACHING_SUPPORT -static void afs_file_readpage_write_complete(void *cookie_data, - struct page *page, - void *data, - int error) +static void afs_readpage_write_complete(void *cookie_data, + struct page *page, + void *data, + int error) { _enter("%p,%p,%p,%d", cookie_data, page, data, error); @@ -121,9 +121,9 @@ static void afs_file_readpage_write_complete(void *cookie_data, #endif /* - * AFS read page from file (or symlink) + * AFS read page from file, directory or symlink */ -static int afs_file_readpage(struct file *file, struct page *page) +static int afs_readpage(struct file *file, struct page *page) { struct afs_vnode *vnode; struct inode *inode; @@ -218,31 +218,14 @@ error: return ret; } -/* - * get a page cookie for the specified page - */ -#ifdef AFS_CACHING_SUPPORT -int afs_cache_get_page_cookie(struct page *page, - struct cachefs_page **_page_cookie) -{ - int ret; - - _enter(""); - ret = cachefs_page_get_private(page,_page_cookie, GFP_NOIO); - - _leave(" = %d", ret); - return ret; -} -#endif - /* * invalidate part or all of a page */ -static void afs_file_invalidatepage(struct page *page, unsigned long offset) +static void afs_invalidatepage(struct page *page, unsigned long offset) { int ret = 1; - _enter("{%lu},%lu", page->index, offset); + kenter("{%lu},%lu", page->index, offset); BUG_ON(!PageLocked(page)); @@ -274,23 +257,17 @@ static void afs_file_invalidatepage(struct page *page, unsigned long offset) /* * release a page and cleanup its private data */ -static int afs_file_releasepage(struct page *page, gfp_t gfp_flags) +static int afs_releasepage(struct page *page, gfp_t gfp_flags) { - struct cachefs_page *pageio; + struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); - _enter("{%lu},%x", page->index, gfp_flags); + _enter("{{%x:%u}[%lu],%lx},%x", + vnode->fid.vid, vnode->fid.vnode, page->index, page->flags, + gfp_flags); if (PagePrivate(page)) { -#ifdef AFS_CACHING_SUPPORT - struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); - cachefs_uncache_page(vnode->cache, page); -#endif - - pageio = (struct cachefs_page *) page_private(page); set_page_private(page, 0); ClearPagePrivate(page); - - kfree(pageio); } _leave(" = 0"); diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index e54e6c2ad343..1e65fee36413 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -206,7 +206,7 @@ int afs_fs_fetch_file_status(struct afs_server *server, struct afs_call *call; __be32 *bp; - _enter(",%x,{%x:%d},,", + _enter(",%x,{%x:%u},,", key_serial(key), vnode->fid.vid, vnode->fid.vnode); call = afs_alloc_flat_call(&afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4); @@ -265,25 +265,20 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, call->offset = 0; call->unmarshall++; - if (call->count < PAGE_SIZE) { - page = call->reply3; - buffer = kmap_atomic(page, KM_USER0); - memset(buffer + PAGE_SIZE - call->count, 0, - call->count); - kunmap_atomic(buffer, KM_USER0); - } - /* extract the returned data */ case 2: _debug("extract data"); - page = call->reply3; - buffer = kmap_atomic(page, KM_USER0); - ret = afs_extract_data(call, skb, last, buffer, call->count); - kunmap_atomic(buffer, KM_USER0); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; + if (call->count > 0) { + page = call->reply3; + buffer = kmap_atomic(page, KM_USER0); + ret = afs_extract_data(call, skb, last, buffer, + call->count); + kunmap_atomic(buffer, KM_USER0); + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } } call->offset = 0; @@ -318,6 +313,14 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, if (!last) return 0; + if (call->count < PAGE_SIZE) { + _debug("clear"); + page = call->reply3; + buffer = kmap_atomic(page, KM_USER0); + memset(buffer + call->count, 0, PAGE_SIZE - call->count); + kunmap_atomic(buffer, KM_USER0); + } + _leave(" = 0 [done]"); return 0; } diff --git a/fs/afs/inode.c b/fs/afs/inode.c index c184a4ee5995..9c984cc42cc9 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -125,7 +125,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, struct inode *inode; int ret; - _enter(",{%u,%u,%u},,", fid->vid, fid->vnode, fid->unique); + _enter(",{%x:%u.%u},,", fid->vid, fid->vnode, fid->unique); as = sb->s_fs_info; data.volume = as->volume; @@ -203,6 +203,19 @@ bad_inode: return ERR_PTR(ret); } +/* + * mark the data attached to an inode as obsolete due to a write on the server + * - might also want to ditch all the outstanding writes and dirty pages + */ +void afs_zap_data(struct afs_vnode *vnode) +{ + kenter("zap data {%x:%u}", vnode->fid.vid, vnode->fid.vnode); + + /* nuke all the non-dirty pages that aren't locked, mapped or being + * written back */ + invalidate_remote_inode(&vnode->vfs_inode); +} + /* * validate a vnode/inode * - there are several things we need to check @@ -258,10 +271,8 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) /* if the vnode's data version number changed then its contents are * different */ - if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { - _debug("zap data {%x:%d}", vnode->fid.vid, vnode->fid.vnode); - invalidate_remote_inode(&vnode->vfs_inode); - } + if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) + afs_zap_data(vnode); clear_bit(AFS_VNODE_MODIFIED, &vnode->flags); mutex_unlock(&vnode->validate_lock); @@ -278,7 +289,7 @@ error_unlock: /* * read the attributes of an inode */ -int afs_inode_getattr(struct vfsmount *mnt, struct dentry *dentry, +int afs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode; @@ -301,7 +312,7 @@ void afs_clear_inode(struct inode *inode) vnode = AFS_FS_I(inode); - _enter("{%x:%d.%d} v=%u x=%u t=%u }", + _enter("{%x:%u.%d} v=%u x=%u t=%u }", vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, diff --git a/fs/afs/internal.h b/fs/afs/internal.h index d90c158cd934..9feb5c59d8fc 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -433,10 +433,6 @@ extern const struct file_operations afs_file_operations; extern int afs_open(struct inode *, struct file *); extern int afs_release(struct inode *, struct file *); -#ifdef AFS_CACHING_SUPPORT -extern int afs_cache_get_page_cookie(struct page *, struct cachefs_page **); -#endif - /* * fsclient.c */ @@ -474,10 +470,9 @@ extern int afs_fs_rename(struct afs_server *, struct key *, extern struct inode *afs_iget(struct super_block *, struct key *, struct afs_fid *, struct afs_file_status *, struct afs_callback *); +extern void afs_zap_data(struct afs_vnode *); extern int afs_validate(struct afs_vnode *, struct key *); -extern int afs_inode_getattr(struct vfsmount *, struct dentry *, - struct kstat *); -extern void afs_zap_permits(struct rcu_head *); +extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern void afs_clear_inode(struct inode *); /* @@ -533,6 +528,7 @@ extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *, */ extern void afs_clear_permits(struct afs_vnode *); extern void afs_cache_permit(struct afs_vnode *, struct key *, long); +extern void afs_zap_permits(struct rcu_head *); extern struct key *afs_request_key(struct afs_cell *); extern int afs_permission(struct inode *, int, struct nameidata *); @@ -726,6 +722,21 @@ do { \ } \ } while(0) +#define ASSERTRANGE(L, OP1, N, OP2, H) \ +do { \ + if (unlikely(!((L) OP1 (N)) || !((N) OP2 (H)))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "AFS: Assertion failed\n"); \ + printk(KERN_ERR "%lu "#OP1" %lu "#OP2" %lu is false\n", \ + (unsigned long)(L), (unsigned long)(N), \ + (unsigned long)(H)); \ + printk(KERN_ERR "0x%lx "#OP1" 0x%lx "#OP2" 0x%lx is false\n", \ + (unsigned long)(L), (unsigned long)(N), \ + (unsigned long)(H)); \ + BUG(); \ + } \ +} while(0) + #define ASSERTIF(C, X) \ do { \ if (unlikely((C) && !(X))) { \ @@ -758,6 +769,10 @@ do { \ do { \ } while(0) +#define ASSERTRANGE(L, OP1, N, OP2, H) \ +do { \ +} while(0) + #define ASSERTIF(C, X) \ do { \ } while(0) diff --git a/fs/afs/main.c b/fs/afs/main.c index 80ec6fd19a73..f1f71ff7d5c6 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -149,6 +149,7 @@ error_cache: afs_vlocation_purge(); afs_cell_purge(); afs_proc_cleanup(); + rcu_barrier(); printk(KERN_ERR "kAFS: failed to register: %d\n", ret); return ret; } @@ -176,6 +177,7 @@ static void __exit afs_exit(void) cachefs_unregister_netfs(&afs_cache_netfs); #endif afs_proc_cleanup(); + rcu_barrier(); } module_exit(afs_exit); diff --git a/fs/afs/misc.c b/fs/afs/misc.c index cdb9792d8161..d1a889c40742 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c @@ -22,6 +22,7 @@ int afs_abort_to_error(u32 abort_code) { switch (abort_code) { case 13: return -EACCES; + case 27: return -EFBIG; case 30: return -EROFS; case VSALVAGE: return -EIO; case VNOVNODE: return -ENOENT; diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 034fcfd4e330..a3684dcc76e7 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -36,7 +36,7 @@ const struct inode_operations afs_mntpt_inode_operations = { .lookup = afs_mntpt_lookup, .follow_link = afs_mntpt_follow_link, .readlink = page_readlink, - .getattr = afs_inode_getattr, + .getattr = afs_getattr, }; static LIST_HEAD(afs_vfsmounts); @@ -58,7 +58,8 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key) char *buf; int ret; - _enter("{%u,%u}", vnode->fid.vnode, vnode->fid.unique); + _enter("{%x:%u,%u}", + vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); /* read the contents of the symlink into the pagecache */ page = read_mapping_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, &file); diff --git a/fs/afs/security.c b/fs/afs/security.c index f9f424d80458..e0ea88b63ebf 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -109,7 +109,7 @@ void afs_clear_permits(struct afs_vnode *vnode) { struct afs_permits *permits; - _enter("{%x}", vnode->fid.vnode); + _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); mutex_lock(&vnode->permits_lock); permits = vnode->permits; @@ -132,7 +132,8 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, long acl_order) struct afs_vnode *auth_vnode; int count, loop; - _enter("{%x},%x,%lx", vnode->fid.vnode, key_serial(key), acl_order); + _enter("{%x:%u},%x,%lx", + vnode->fid.vid, vnode->fid.vnode, key_serial(key), acl_order); auth_vnode = afs_get_auth_inode(vnode, key); if (IS_ERR(auth_vnode)) { @@ -220,7 +221,8 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key, bool valid; int loop, ret; - _enter(""); + _enter("{%x:%u},%x", + vnode->fid.vid, vnode->fid.vnode, key_serial(key)); auth_vnode = afs_get_auth_inode(vnode, key); if (IS_ERR(auth_vnode)) { @@ -268,9 +270,9 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key, _leave(" = %d", ret); return ret; } + *_access = vnode->status.caller_access; } - *_access = vnode->status.caller_access; iput(&auth_vnode->vfs_inode); _leave(" = 0 [access %x]", *_access); return 0; @@ -288,7 +290,7 @@ int afs_permission(struct inode *inode, int mask, struct nameidata *nd) struct key *key; int ret; - _enter("{{%x:%x},%lx},%x,", + _enter("{{%x:%u},%lx},%x,", vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask); key = afs_request_key(vnode->volume->cell); diff --git a/fs/afs/server.c b/fs/afs/server.c index 96bb23b476a2..231ae4150279 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -252,6 +252,9 @@ static void afs_destroy_server(struct afs_server *server) { _enter("%p", server); + ASSERTIF(server->cb_break_head != server->cb_break_tail, + delayed_work_pending(&server->cb_break_work)); + ASSERTCMP(server->fs_vnodes.rb_node, ==, NULL); ASSERTCMP(server->cb_promises.rb_node, ==, NULL); ASSERTCMP(server->cb_break_head, ==, server->cb_break_tail); diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c index a1904ab8426a..0e37f9949cb7 100644 --- a/fs/afs/vnode.c +++ b/fs/afs/vnode.c @@ -261,7 +261,7 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode, DECLARE_WAITQUEUE(myself, current); - _enter("%s,{%u,%u,%u}", + _enter("%s,{%x:%u.%u}", vnode->volume->vlocation->vldb.name, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); @@ -389,7 +389,7 @@ int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_server *server; int ret; - _enter("%s{%u,%u,%u},%x,,,", + _enter("%s{%x:%u.%u},%x,,,", vnode->volume->vlocation->vldb.name, vnode->fid.vid, vnode->fid.vnode, @@ -446,7 +446,7 @@ int afs_vnode_create(struct afs_vnode *vnode, struct key *key, struct afs_server *server; int ret; - _enter("%s{%u,%u,%u},%x,%s,,", + _enter("%s{%x:%u.%u},%x,%s,,", vnode->volume->vlocation->vldb.name, vnode->fid.vid, vnode->fid.vnode, @@ -502,7 +502,7 @@ int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name, struct afs_server *server; int ret; - _enter("%s{%u,%u,%u},%x,%s", + _enter("%s{%x:%u.%u},%x,%s", vnode->volume->vlocation->vldb.name, vnode->fid.vid, vnode->fid.vnode, @@ -557,7 +557,7 @@ extern int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode, struct afs_server *server; int ret; - _enter("%s{%u,%u,%u},%s{%u,%u,%u},%x,%s", + _enter("%s{%x:%u.%u},%s{%x:%u.%u},%x,%s", dvnode->volume->vlocation->vldb.name, dvnode->fid.vid, dvnode->fid.vnode, @@ -628,7 +628,7 @@ int afs_vnode_symlink(struct afs_vnode *vnode, struct key *key, struct afs_server *server; int ret; - _enter("%s{%u,%u,%u},%x,%s,%s,,,", + _enter("%s{%x:%u.%u},%x,%s,%s,,,", vnode->volume->vlocation->vldb.name, vnode->fid.vid, vnode->fid.vnode, @@ -687,7 +687,7 @@ int afs_vnode_rename(struct afs_vnode *orig_dvnode, struct afs_server *server; int ret; - _enter("%s{%u,%u,%u},%s{%u,%u,%u},%x,%s,%s", + _enter("%s{%x:%u.%u},%s{%u,%u,%u},%x,%s,%s", orig_dvnode->volume->vlocation->vldb.name, orig_dvnode->fid.vid, orig_dvnode->fid.vnode, -- cgit v1.2.2 From 31143d5d515ece617ffccb7df5ff75e4d1dfa120 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 9 May 2007 02:33:46 -0700 Subject: AFS: implement basic file write support Implement support for writing to regular AFS files, including: (1) write (2) truncate (3) fsync, fdatasync (4) chmod, chown, chgrp, utime. AFS writeback attempts to batch writes into as chunks as large as it can manage up to the point that it writes back 65535 pages in one chunk or it meets a locked page. Furthermore, if a page has been written to using a particular key, then should another write to that page use some other key, the first write will be flushed before the second is allowed to take place. If the first write fails due to a security error, then the page will be scrapped and reread before the second write takes place. If a page is dirty and the callback on it is broken by the server, then the dirty data is not discarded (same behaviour as NFS). Shared-writable mappings are not supported by this patch. [akpm@linux-foundation.org: fix a bunch of warnings] Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/afs/Makefile | 3 +- fs/afs/afs_fs.h | 2 + fs/afs/dir.c | 1 + fs/afs/file.c | 32 ++- fs/afs/fsclient.c | 332 +++++++++++++++++++++- fs/afs/inode.c | 47 ++- fs/afs/internal.h | 66 ++++- fs/afs/rxrpc.c | 80 +++++- fs/afs/super.c | 5 +- fs/afs/vnode.c | 107 +++++++ fs/afs/write.c | 835 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 11 files changed, 1483 insertions(+), 27 deletions(-) create mode 100644 fs/afs/write.c (limited to 'fs') diff --git a/fs/afs/Makefile b/fs/afs/Makefile index cf83e5d63512..73ce561f3ea0 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -22,6 +22,7 @@ kafs-objs := \ vlclient.o \ vlocation.o \ vnode.o \ - volume.o + volume.o \ + write.o obj-$(CONFIG_AFS_FS) := kafs.o diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h index 89e0d1650a72..2198006d2d03 100644 --- a/fs/afs/afs_fs.h +++ b/fs/afs/afs_fs.h @@ -18,6 +18,8 @@ enum AFS_FS_Operations { FSFETCHDATA = 130, /* AFS Fetch file data */ FSFETCHSTATUS = 132, /* AFS Fetch file status */ + FSSTOREDATA = 133, /* AFS Store file data */ + FSSTORESTATUS = 135, /* AFS Store file status */ FSREMOVEFILE = 136, /* AFS Remove a file */ FSCREATEFILE = 137, /* AFS Create a file */ FSRENAME = 138, /* AFS Rename or move a file or directory */ diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 51f177af5dd6..2fb31276196b 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -56,6 +56,7 @@ const struct inode_operations afs_dir_inode_operations = { .rename = afs_rename, .permission = afs_permission, .getattr = afs_getattr, + .setattr = afs_setattr, }; static struct dentry_operations afs_fs_dentry_operations = { diff --git a/fs/afs/file.c b/fs/afs/file.c index 3eb3fc7b36be..3e25795e5a42 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -15,32 +15,43 @@ #include #include #include +#include #include "internal.h" static int afs_readpage(struct file *file, struct page *page); static void afs_invalidatepage(struct page *page, unsigned long offset); static int afs_releasepage(struct page *page, gfp_t gfp_flags); +static int afs_launder_page(struct page *page); const struct file_operations afs_file_operations = { .open = afs_open, .release = afs_release, .llseek = generic_file_llseek, .read = do_sync_read, + .write = do_sync_write, .aio_read = generic_file_aio_read, + .aio_write = afs_file_write, .mmap = generic_file_readonly_mmap, .sendfile = generic_file_sendfile, + .fsync = afs_fsync, }; const struct inode_operations afs_file_inode_operations = { .getattr = afs_getattr, + .setattr = afs_setattr, .permission = afs_permission, }; const struct address_space_operations afs_fs_aops = { .readpage = afs_readpage, - .set_page_dirty = __set_page_dirty_nobuffers, + .set_page_dirty = afs_set_page_dirty, + .launder_page = afs_launder_page, .releasepage = afs_releasepage, .invalidatepage = afs_invalidatepage, + .prepare_write = afs_prepare_write, + .commit_write = afs_commit_write, + .writepage = afs_writepage, + .writepages = afs_writepages, }; /* @@ -230,11 +241,6 @@ static void afs_invalidatepage(struct page *page, unsigned long offset) BUG_ON(!PageLocked(page)); if (PagePrivate(page)) { -#ifdef AFS_CACHING_SUPPORT - struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); - cachefs_uncache_page(vnode->cache,page); -#endif - /* We release buffers only if the entire page is being * invalidated. * The get_block cached value has been unconditionally @@ -254,20 +260,34 @@ static void afs_invalidatepage(struct page *page, unsigned long offset) _leave(" = %d", ret); } +/* + * write back a dirty page + */ +static int afs_launder_page(struct page *page) +{ + _enter("{%lu}", page->index); + + return 0; +} + /* * release a page and cleanup its private data */ static int afs_releasepage(struct page *page, gfp_t gfp_flags) { struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); + struct afs_writeback *wb; _enter("{{%x:%u}[%lu],%lx},%x", vnode->fid.vid, vnode->fid.vnode, page->index, page->flags, gfp_flags); if (PagePrivate(page)) { + wb = (struct afs_writeback *) page_private(page); + ASSERT(wb != NULL); set_page_private(page, 0); ClearPagePrivate(page); + afs_put_writeback(wb); } _leave(" = 0"); diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 1e65fee36413..025b1903d9e1 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -33,8 +33,10 @@ static void xdr_decode_AFSFid(const __be32 **_bp, struct afs_fid *fid) */ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, struct afs_file_status *status, - struct afs_vnode *vnode) + struct afs_vnode *vnode, + afs_dataversion_t *store_version) { + afs_dataversion_t expected_version; const __be32 *bp = *_bp; umode_t mode; u64 data_version, size; @@ -101,7 +103,11 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime; } - if (status->data_version != data_version) { + expected_version = status->data_version; + if (store_version) + expected_version = *store_version; + + if (expected_version != data_version) { status->data_version = data_version; if (vnode && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) { _debug("vnode modified %llx on {%x:%u}", @@ -110,6 +116,8 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, set_bit(AFS_VNODE_MODIFIED, &vnode->flags); set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags); } + } else if (store_version) { + status->data_version = data_version; } } @@ -155,6 +163,44 @@ static void xdr_decode_AFSVolSync(const __be32 **_bp, *_bp = bp; } +/* + * encode the requested attributes into an AFSStoreStatus block + */ +static void xdr_encode_AFS_StoreStatus(__be32 **_bp, struct iattr *attr) +{ + __be32 *bp = *_bp; + u32 mask = 0, mtime = 0, owner = 0, group = 0, mode = 0; + + mask = 0; + if (attr->ia_valid & ATTR_MTIME) { + mask |= AFS_SET_MTIME; + mtime = attr->ia_mtime.tv_sec; + } + + if (attr->ia_valid & ATTR_UID) { + mask |= AFS_SET_OWNER; + owner = attr->ia_uid; + } + + if (attr->ia_valid & ATTR_GID) { + mask |= AFS_SET_GROUP; + group = attr->ia_gid; + } + + if (attr->ia_valid & ATTR_MODE) { + mask |= AFS_SET_MODE; + mode = attr->ia_mode & S_IALLUGO; + } + + *bp++ = htonl(mask); + *bp++ = htonl(mtime); + *bp++ = htonl(owner); + *bp++ = htonl(group); + *bp++ = htonl(mode); + *bp++ = 0; /* segment size */ + *_bp = bp; +} + /* * deliver reply data to an FS.FetchStatus */ @@ -175,7 +221,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call, /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); + xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); xdr_decode_AFSCallBack(&bp, vnode); if (call->reply2) xdr_decode_AFSVolSync(&bp, call->reply2); @@ -295,7 +341,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, } bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); + xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); xdr_decode_AFSCallBack(&bp, vnode); if (call->reply2) xdr_decode_AFSVolSync(&bp, call->reply2); @@ -479,8 +525,8 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call, /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFid(&bp, call->reply2); - xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL); - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); + xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL, NULL); + xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); xdr_decode_AFSCallBack_raw(&bp, call->reply4); /* xdr_decode_AFSVolSync(&bp, call->replyX); */ @@ -577,7 +623,7 @@ static int afs_deliver_fs_remove(struct afs_call *call, /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); + xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); /* xdr_decode_AFSVolSync(&bp, call->replyX); */ _leave(" = 0 [done]"); @@ -660,8 +706,8 @@ static int afs_deliver_fs_link(struct afs_call *call, /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); - xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode); + xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); + xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode, NULL); /* xdr_decode_AFSVolSync(&bp, call->replyX); */ _leave(" = 0 [done]"); @@ -749,8 +795,8 @@ static int afs_deliver_fs_symlink(struct afs_call *call, /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFid(&bp, call->reply2); - xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL); - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); + xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL, NULL); + xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); /* xdr_decode_AFSVolSync(&bp, call->replyX); */ _leave(" = 0 [done]"); @@ -855,9 +901,10 @@ static int afs_deliver_fs_rename(struct afs_call *call, /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &orig_dvnode->status, orig_dvnode); + xdr_decode_AFSFetchStatus(&bp, &orig_dvnode->status, orig_dvnode, NULL); if (new_dvnode != orig_dvnode) - xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode); + xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode, + NULL); /* xdr_decode_AFSVolSync(&bp, call->replyX); */ _leave(" = 0 [done]"); @@ -939,3 +986,262 @@ int afs_fs_rename(struct afs_server *server, return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); } + +/* + * deliver reply data to an FS.StoreData + */ +static int afs_deliver_fs_store_data(struct afs_call *call, + struct sk_buff *skb, bool last) +{ + struct afs_vnode *vnode = call->reply; + const __be32 *bp; + + _enter(",,%u", last); + + afs_transfer_reply(call, skb); + if (!last) { + _leave(" = 0 [more]"); + return 0; + } + + if (call->reply_size != call->reply_max) { + _leave(" = -EBADMSG [%u != %u]", + call->reply_size, call->reply_max); + return -EBADMSG; + } + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, + &call->store_version); + /* xdr_decode_AFSVolSync(&bp, call->replyX); */ + + afs_pages_written_back(vnode, call); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.StoreData operation type + */ +static const struct afs_call_type afs_RXFSStoreData = { + .name = "FS.StoreData", + .deliver = afs_deliver_fs_store_data, + .abort_to_error = afs_abort_to_error, + .destructor = afs_flat_call_destructor, +}; + +/* + * store a set of pages + */ +int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, + pgoff_t first, pgoff_t last, + unsigned offset, unsigned to, + const struct afs_wait_mode *wait_mode) +{ + struct afs_vnode *vnode = wb->vnode; + struct afs_call *call; + loff_t size, pos, i_size; + __be32 *bp; + + _enter(",%x,{%x:%u},,", + key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode); + + size = to - offset; + if (first != last) + size += (loff_t)(last - first) << PAGE_SHIFT; + pos = (loff_t)first << PAGE_SHIFT; + pos += offset; + + i_size = i_size_read(&vnode->vfs_inode); + if (pos + size > i_size) + i_size = size + pos; + + _debug("size %llx, at %llx, i_size %llx", + (unsigned long long) size, (unsigned long long) pos, + (unsigned long long) i_size); + + BUG_ON(i_size > 0xffffffff); // TODO: use 64-bit store + + call = afs_alloc_flat_call(&afs_RXFSStoreData, + (4 + 6 + 3) * 4, + (21 + 6) * 4); + if (!call) + return -ENOMEM; + + call->wb = wb; + call->key = wb->key; + call->reply = vnode; + call->service_id = FS_SERVICE; + call->port = htons(AFS_FS_PORT); + call->mapping = vnode->vfs_inode.i_mapping; + call->first = first; + call->last = last; + call->first_offset = offset; + call->last_to = to; + call->send_pages = true; + call->store_version = vnode->status.data_version + 1; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSSTOREDATA); + *bp++ = htonl(vnode->fid.vid); + *bp++ = htonl(vnode->fid.vnode); + *bp++ = htonl(vnode->fid.unique); + + *bp++ = 0; /* mask */ + *bp++ = 0; /* mtime */ + *bp++ = 0; /* owner */ + *bp++ = 0; /* group */ + *bp++ = 0; /* unix mode */ + *bp++ = 0; /* segment size */ + + *bp++ = htonl(pos); + *bp++ = htonl(size); + *bp++ = htonl(i_size); + + return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); +} + +/* + * deliver reply data to an FS.StoreStatus + */ +static int afs_deliver_fs_store_status(struct afs_call *call, + struct sk_buff *skb, bool last) +{ + afs_dataversion_t *store_version; + struct afs_vnode *vnode = call->reply; + const __be32 *bp; + + _enter(",,%u", last); + + afs_transfer_reply(call, skb); + if (!last) { + _leave(" = 0 [more]"); + return 0; + } + + if (call->reply_size != call->reply_max) { + _leave(" = -EBADMSG [%u != %u]", + call->reply_size, call->reply_max); + return -EBADMSG; + } + + /* unmarshall the reply once we've received all of it */ + store_version = NULL; + if (call->operation_ID == FSSTOREDATA) + store_version = &call->store_version; + + bp = call->buffer; + xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, store_version); + /* xdr_decode_AFSVolSync(&bp, call->replyX); */ + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.StoreStatus operation type + */ +static const struct afs_call_type afs_RXFSStoreStatus = { + .name = "FS.StoreStatus", + .deliver = afs_deliver_fs_store_status, + .abort_to_error = afs_abort_to_error, + .destructor = afs_flat_call_destructor, +}; + +static const struct afs_call_type afs_RXFSStoreData_as_Status = { + .name = "FS.StoreData", + .deliver = afs_deliver_fs_store_status, + .abort_to_error = afs_abort_to_error, + .destructor = afs_flat_call_destructor, +}; + +/* + * set the attributes on a file, using FS.StoreData rather than FS.StoreStatus + * so as to alter the file size also + */ +static int afs_fs_setattr_size(struct afs_server *server, struct key *key, + struct afs_vnode *vnode, struct iattr *attr, + const struct afs_wait_mode *wait_mode) +{ + struct afs_call *call; + __be32 *bp; + + _enter(",%x,{%x:%u},,", + key_serial(key), vnode->fid.vid, vnode->fid.vnode); + + ASSERT(attr->ia_valid & ATTR_SIZE); + ASSERTCMP(attr->ia_size, <=, 0xffffffff); // TODO: use 64-bit store + + call = afs_alloc_flat_call(&afs_RXFSStoreData_as_Status, + (4 + 6 + 3) * 4, + (21 + 6) * 4); + if (!call) + return -ENOMEM; + + call->key = key; + call->reply = vnode; + call->service_id = FS_SERVICE; + call->port = htons(AFS_FS_PORT); + call->store_version = vnode->status.data_version + 1; + call->operation_ID = FSSTOREDATA; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSSTOREDATA); + *bp++ = htonl(vnode->fid.vid); + *bp++ = htonl(vnode->fid.vnode); + *bp++ = htonl(vnode->fid.unique); + + xdr_encode_AFS_StoreStatus(&bp, attr); + + *bp++ = 0; /* position of start of write */ + *bp++ = 0; /* size of write */ + *bp++ = htonl(attr->ia_size); /* new file length */ + + return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); +} + +/* + * set the attributes on a file, using FS.StoreData if there's a change in file + * size, and FS.StoreStatus otherwise + */ +int afs_fs_setattr(struct afs_server *server, struct key *key, + struct afs_vnode *vnode, struct iattr *attr, + const struct afs_wait_mode *wait_mode) +{ + struct afs_call *call; + __be32 *bp; + + if (attr->ia_valid & ATTR_SIZE) + return afs_fs_setattr_size(server, key, vnode, attr, + wait_mode); + + _enter(",%x,{%x:%u},,", + key_serial(key), vnode->fid.vid, vnode->fid.vnode); + + call = afs_alloc_flat_call(&afs_RXFSStoreStatus, + (4 + 6) * 4, + (21 + 6) * 4); + if (!call) + return -ENOMEM; + + call->key = key; + call->reply = vnode; + call->service_id = FS_SERVICE; + call->port = htons(AFS_FS_PORT); + call->operation_ID = FSSTORESTATUS; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSSTORESTATUS); + *bp++ = htonl(vnode->fid.vid); + *bp++ = htonl(vnode->fid.vnode); + *bp++ = htonl(vnode->fid.unique); + + xdr_encode_AFS_StoreStatus(&bp, attr); + + return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); +} diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 9c984cc42cc9..515a5d12d8fb 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -209,7 +209,7 @@ bad_inode: */ void afs_zap_data(struct afs_vnode *vnode) { - kenter("zap data {%x:%u}", vnode->fid.vid, vnode->fid.vnode); + _enter("zap data {%x:%u}", vnode->fid.vid, vnode->fid.vnode); /* nuke all the non-dirty pages that aren't locked, mapped or being * written back */ @@ -334,6 +334,7 @@ void afs_clear_inode(struct inode *inode) vnode->server = NULL; } + ASSERT(list_empty(&vnode->writebacks)); ASSERT(!vnode->cb_promised); #ifdef AFS_CACHING_SUPPORT @@ -350,3 +351,47 @@ void afs_clear_inode(struct inode *inode) _leave(""); } + +/* + * set the attributes of an inode + */ +int afs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); + struct key *key; + int ret; + + _enter("{%x:%u},{n=%s},%x", + vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name, + attr->ia_valid); + + if (!(attr->ia_valid & (ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID | + ATTR_MTIME))) { + _leave(" = 0 [unsupported]"); + return 0; + } + + /* flush any dirty data outstanding on a regular file */ + if (S_ISREG(vnode->vfs_inode.i_mode)) { + filemap_write_and_wait(vnode->vfs_inode.i_mapping); + afs_writeback_all(vnode); + } + + if (attr->ia_valid & ATTR_FILE) { + key = attr->ia_file->private_data; + } else { + key = afs_request_key(vnode->volume->cell); + if (IS_ERR(key)) { + ret = PTR_ERR(key); + goto error; + } + } + + ret = afs_vnode_setattr(vnode, key, attr); + if (!(attr->ia_valid & ATTR_FILE)) + key_put(key); + +error: + _leave(" = %d", ret); + return ret; +} diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 9feb5c59d8fc..a30d4fa768e3 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -21,6 +21,7 @@ #define AFS_CELL_MAX_ADDRS 15 +struct pagevec; struct afs_call; typedef enum { @@ -75,12 +76,15 @@ struct afs_call { struct key *key; /* security for this call */ struct afs_server *server; /* server affected by incoming CM call */ void *request; /* request data (first part) */ - void *request2; /* request data (second part) */ + struct address_space *mapping; /* page set */ + struct afs_writeback *wb; /* writeback being performed */ void *buffer; /* reply receive buffer */ void *reply; /* reply buffer (first part) */ void *reply2; /* reply buffer (second part) */ void *reply3; /* reply buffer (third part) */ void *reply4; /* reply buffer (fourth part) */ + pgoff_t first; /* first page in mapping to deal with */ + pgoff_t last; /* last page in mapping to deal with */ enum { /* call state */ AFS_CALL_REQUESTING, /* request is being sent for outgoing call */ AFS_CALL_AWAIT_REPLY, /* awaiting reply to outgoing call */ @@ -97,14 +101,18 @@ struct afs_call { unsigned request_size; /* size of request data */ unsigned reply_max; /* maximum size of reply */ unsigned reply_size; /* current size of reply */ + unsigned first_offset; /* offset into mapping[first] */ + unsigned last_to; /* amount of mapping[last] */ unsigned short offset; /* offset into received data store */ unsigned char unmarshall; /* unmarshalling phase */ bool incoming; /* T if incoming call */ + bool send_pages; /* T if data from mapping should be sent */ u16 service_id; /* RxRPC service ID to call */ __be16 port; /* target UDP port */ __be32 operation_ID; /* operation ID for an incoming call */ u32 count; /* count for use in unmarshalling */ __be32 tmp; /* place to extract temporary data */ + afs_dataversion_t store_version; /* updated version expected from store */ }; struct afs_call_type { @@ -123,6 +131,32 @@ struct afs_call_type { void (*destructor)(struct afs_call *call); }; +/* + * record of an outstanding writeback on a vnode + */ +struct afs_writeback { + struct list_head link; /* link in vnode->writebacks */ + struct work_struct writer; /* work item to perform the writeback */ + struct afs_vnode *vnode; /* vnode to which this write applies */ + struct key *key; /* owner of this write */ + wait_queue_head_t waitq; /* completion and ready wait queue */ + pgoff_t first; /* first page in batch */ + pgoff_t point; /* last page in current store op */ + pgoff_t last; /* last page in batch (inclusive) */ + unsigned offset_first; /* offset into first page of start of write */ + unsigned to_last; /* offset into last page of end of write */ + int num_conflicts; /* count of conflicting writes in list */ + int usage; + bool conflicts; /* T if has dependent conflicts */ + enum { + AFS_WBACK_SYNCING, /* synchronisation being performed */ + AFS_WBACK_PENDING, /* write pending */ + AFS_WBACK_CONFLICTING, /* conflicting writes posted */ + AFS_WBACK_WRITING, /* writing back */ + AFS_WBACK_COMPLETE /* the writeback record has been unlinked */ + } state __attribute__((packed)); +}; + /* * AFS superblock private data * - there's one superblock per volume @@ -305,6 +339,7 @@ struct afs_vnode { wait_queue_head_t update_waitq; /* status fetch waitqueue */ int update_cnt; /* number of outstanding ops that will update the * status */ + spinlock_t writeback_lock; /* lock for writebacks */ spinlock_t lock; /* waitqueue/flags lock */ unsigned long flags; #define AFS_VNODE_CB_BROKEN 0 /* set if vnode's callback was broken */ @@ -316,6 +351,8 @@ struct afs_vnode { long acl_order; /* ACL check count (callback break count) */ + struct list_head writebacks; /* alterations in pagecache that need writing */ + /* outstanding callback notification on this file */ struct rb_node server_rb; /* link in server->fs_vnodes */ struct rb_node cb_promise; /* link in server->cb_promises */ @@ -463,6 +500,12 @@ extern int afs_fs_rename(struct afs_server *, struct key *, struct afs_vnode *, const char *, struct afs_vnode *, const char *, const struct afs_wait_mode *); +extern int afs_fs_store_data(struct afs_server *, struct afs_writeback *, + pgoff_t, pgoff_t, unsigned, unsigned, + const struct afs_wait_mode *); +extern int afs_fs_setattr(struct afs_server *, struct key *, + struct afs_vnode *, struct iattr *, + const struct afs_wait_mode *); /* * inode.c @@ -473,6 +516,7 @@ extern struct inode *afs_iget(struct super_block *, struct key *, extern void afs_zap_data(struct afs_vnode *); extern int afs_validate(struct afs_vnode *, struct key *); extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *); +extern int afs_setattr(struct dentry *, struct iattr *); extern void afs_clear_inode(struct inode *); /* @@ -625,6 +669,9 @@ extern int afs_vnode_symlink(struct afs_vnode *, struct key *, const char *, struct afs_file_status *, struct afs_server **); extern int afs_vnode_rename(struct afs_vnode *, struct afs_vnode *, struct key *, const char *, const char *); +extern int afs_vnode_store_data(struct afs_writeback *, pgoff_t, pgoff_t, + unsigned, unsigned); +extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *); /* * volume.c @@ -641,6 +688,23 @@ extern struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *); extern int afs_volume_release_fileserver(struct afs_vnode *, struct afs_server *, int); +/* + * write.c + */ +extern int afs_set_page_dirty(struct page *); +extern void afs_put_writeback(struct afs_writeback *); +extern int afs_prepare_write(struct file *, struct page *, unsigned, unsigned); +extern int afs_commit_write(struct file *, struct page *, unsigned, unsigned); +extern int afs_writepage(struct page *, struct writeback_control *); +extern int afs_writepages(struct address_space *, struct writeback_control *); +extern int afs_write_inode(struct inode *, int); +extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); +extern ssize_t afs_file_write(struct kiocb *, const struct iovec *, + unsigned long, loff_t); +extern int afs_writeback_all(struct afs_vnode *); +extern int afs_fsync(struct file *, struct dentry *, int); + + /*****************************************************************************/ /* * debug tracing diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 222c1a3abbb8..04189c47d6a0 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -236,6 +236,70 @@ void afs_flat_call_destructor(struct afs_call *call) call->buffer = NULL; } +/* + * attach the data from a bunch of pages on an inode to a call + */ +int afs_send_pages(struct afs_call *call, struct msghdr *msg, struct kvec *iov) +{ + struct page *pages[8]; + unsigned count, n, loop, offset, to; + pgoff_t first = call->first, last = call->last; + int ret; + + _enter(""); + + offset = call->first_offset; + call->first_offset = 0; + + do { + _debug("attach %lx-%lx", first, last); + + count = last - first + 1; + if (count > ARRAY_SIZE(pages)) + count = ARRAY_SIZE(pages); + n = find_get_pages_contig(call->mapping, first, count, pages); + ASSERTCMP(n, ==, count); + + loop = 0; + do { + msg->msg_flags = 0; + to = PAGE_SIZE; + if (first + loop >= last) + to = call->last_to; + else + msg->msg_flags = MSG_MORE; + iov->iov_base = kmap(pages[loop]) + offset; + iov->iov_len = to - offset; + offset = 0; + + _debug("- range %u-%u%s", + offset, to, msg->msg_flags ? " [more]" : ""); + msg->msg_iov = (struct iovec *) iov; + msg->msg_iovlen = 1; + + /* have to change the state *before* sending the last + * packet as RxRPC might give us the reply before it + * returns from sending the request */ + if (first + loop >= last) + call->state = AFS_CALL_AWAIT_REPLY; + ret = rxrpc_kernel_send_data(call->rxcall, msg, + to - offset); + kunmap(pages[loop]); + if (ret < 0) + break; + } while (++loop < count); + first += count; + + for (loop = 0; loop < count; loop++) + put_page(pages[loop]); + if (ret < 0) + break; + } while (first < last); + + _leave(" = %d", ret); + return ret; +} + /* * initiate a call */ @@ -253,8 +317,9 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, ASSERT(call->type != NULL); ASSERT(call->type->name != NULL); - _debug("MAKE %p{%s} [%d]", - call, call->type->name, atomic_read(&afs_outstanding_calls)); + _debug("____MAKE %p{%s,%x} [%d]____", + call, call->type->name, key_serial(call->key), + atomic_read(&afs_outstanding_calls)); call->wait_mode = wait_mode; INIT_WORK(&call->async_work, afs_process_async_call); @@ -289,16 +354,23 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, msg.msg_iovlen = 1; msg.msg_control = NULL; msg.msg_controllen = 0; - msg.msg_flags = 0; + msg.msg_flags = (call->send_pages ? MSG_MORE : 0); /* have to change the state *before* sending the last packet as RxRPC * might give us the reply before it returns from sending the * request */ - call->state = AFS_CALL_AWAIT_REPLY; + if (!call->send_pages) + call->state = AFS_CALL_AWAIT_REPLY; ret = rxrpc_kernel_send_data(rxcall, &msg, call->request_size); if (ret < 0) goto error_do_abort; + if (call->send_pages) { + ret = afs_send_pages(call, &msg, iov); + if (ret < 0) + goto error_do_abort; + } + /* at this point, an async call may no longer exist as it may have * already completed */ return wait_mode->wait(call); diff --git a/fs/afs/super.c b/fs/afs/super.c index 7030d76155fc..d24be334b608 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -50,6 +50,7 @@ static const struct super_operations afs_super_ops = { .statfs = simple_statfs, .alloc_inode = afs_alloc_inode, .drop_inode = generic_delete_inode, + .write_inode = afs_write_inode, .destroy_inode = afs_destroy_inode, .clear_inode = afs_clear_inode, .umount_begin = afs_umount_begin, @@ -66,7 +67,7 @@ enum { afs_opt_vol, }; -static const match_table_t afs_options_list = { +static match_table_t afs_options_list = { { afs_opt_cell, "cell=%s" }, { afs_opt_rwpath, "rwpath" }, { afs_opt_vol, "vol=%s" }, @@ -459,7 +460,9 @@ static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep, init_waitqueue_head(&vnode->update_waitq); mutex_init(&vnode->permits_lock); mutex_init(&vnode->validate_lock); + spin_lock_init(&vnode->writeback_lock); spin_lock_init(&vnode->lock); + INIT_LIST_HEAD(&vnode->writebacks); INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work); } } diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c index 0e37f9949cb7..ec814660209f 100644 --- a/fs/afs/vnode.c +++ b/fs/afs/vnode.c @@ -753,3 +753,110 @@ no_server: _leave(" = %ld [cnt %d]", PTR_ERR(server), orig_dvnode->update_cnt); return PTR_ERR(server); } + +/* + * write to a file + */ +int afs_vnode_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last, + unsigned offset, unsigned to) +{ + struct afs_server *server; + struct afs_vnode *vnode = wb->vnode; + int ret; + + _enter("%s{%x:%u.%u},%x,%lx,%lx,%x,%x", + vnode->volume->vlocation->vldb.name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(wb->key), + first, last, offset, to); + + /* this op will fetch the status */ + spin_lock(&vnode->lock); + vnode->update_cnt++; + spin_unlock(&vnode->lock); + + do { + /* pick a server to query */ + server = afs_volume_pick_fileserver(vnode); + if (IS_ERR(server)) + goto no_server; + + _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); + + ret = afs_fs_store_data(server, wb, first, last, offset, to, + &afs_sync_call); + + } while (!afs_volume_release_fileserver(vnode, server, ret)); + + /* adjust the flags */ + if (ret == 0) { + afs_vnode_finalise_status_update(vnode, server); + afs_put_server(server); + } else { + afs_vnode_status_update_failed(vnode, ret); + } + + _leave(" = %d", ret); + return ret; + +no_server: + spin_lock(&vnode->lock); + vnode->update_cnt--; + ASSERTCMP(vnode->update_cnt, >=, 0); + spin_unlock(&vnode->lock); + return PTR_ERR(server); +} + +/* + * set the attributes on a file + */ +int afs_vnode_setattr(struct afs_vnode *vnode, struct key *key, + struct iattr *attr) +{ + struct afs_server *server; + int ret; + + _enter("%s{%x:%u.%u},%x", + vnode->volume->vlocation->vldb.name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(key)); + + /* this op will fetch the status */ + spin_lock(&vnode->lock); + vnode->update_cnt++; + spin_unlock(&vnode->lock); + + do { + /* pick a server to query */ + server = afs_volume_pick_fileserver(vnode); + if (IS_ERR(server)) + goto no_server; + + _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); + + ret = afs_fs_setattr(server, key, vnode, attr, &afs_sync_call); + + } while (!afs_volume_release_fileserver(vnode, server, ret)); + + /* adjust the flags */ + if (ret == 0) { + afs_vnode_finalise_status_update(vnode, server); + afs_put_server(server); + } else { + afs_vnode_status_update_failed(vnode, ret); + } + + _leave(" = %d", ret); + return ret; + +no_server: + spin_lock(&vnode->lock); + vnode->update_cnt--; + ASSERTCMP(vnode->update_cnt, >=, 0); + spin_unlock(&vnode->lock); + return PTR_ERR(server); +} diff --git a/fs/afs/write.c b/fs/afs/write.c new file mode 100644 index 000000000000..83ff29262816 --- /dev/null +++ b/fs/afs/write.c @@ -0,0 +1,835 @@ +/* handling of writes to regular files and writing back to the server + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include "internal.h" + +static int afs_write_back_from_locked_page(struct afs_writeback *wb, + struct page *page); + +/* + * mark a page as having been made dirty and thus needing writeback + */ +int afs_set_page_dirty(struct page *page) +{ + _enter(""); + return __set_page_dirty_nobuffers(page); +} + +/* + * unlink a writeback record because its usage has reached zero + * - must be called with the wb->vnode->writeback_lock held + */ +static void afs_unlink_writeback(struct afs_writeback *wb) +{ + struct afs_writeback *front; + struct afs_vnode *vnode = wb->vnode; + + list_del_init(&wb->link); + if (!list_empty(&vnode->writebacks)) { + /* if an fsync rises to the front of the queue then wake it + * up */ + front = list_entry(vnode->writebacks.next, + struct afs_writeback, link); + if (front->state == AFS_WBACK_SYNCING) { + _debug("wake up sync"); + front->state = AFS_WBACK_COMPLETE; + wake_up(&front->waitq); + } + } +} + +/* + * free a writeback record + */ +static void afs_free_writeback(struct afs_writeback *wb) +{ + _enter(""); + key_put(wb->key); + kfree(wb); +} + +/* + * dispose of a reference to a writeback record + */ +void afs_put_writeback(struct afs_writeback *wb) +{ + struct afs_vnode *vnode = wb->vnode; + + _enter("{%d}", wb->usage); + + spin_lock(&vnode->writeback_lock); + if (--wb->usage == 0) + afs_unlink_writeback(wb); + else + wb = NULL; + spin_unlock(&vnode->writeback_lock); + if (wb) + afs_free_writeback(wb); +} + +/* + * partly or wholly fill a page that's under preparation for writing + */ +static int afs_fill_page(struct afs_vnode *vnode, struct key *key, + unsigned start, unsigned len, struct page *page) +{ + int ret; + + _enter(",,%u,%u", start, len); + + ASSERTCMP(start + len, <=, PAGE_SIZE); + + ret = afs_vnode_fetch_data(vnode, key, start, len, page); + if (ret < 0) { + if (ret == -ENOENT) { + _debug("got NOENT from server" + " - marking file deleted and stale"); + set_bit(AFS_VNODE_DELETED, &vnode->flags); + ret = -ESTALE; + } + } + + _leave(" = %d", ret); + return ret; +} + +/* + * prepare a page for being written to + */ +static int afs_prepare_page(struct afs_vnode *vnode, struct page *page, + struct key *key, unsigned offset, unsigned to) +{ + unsigned eof, tail, start, stop, len; + loff_t i_size, pos; + void *p; + int ret; + + _enter(""); + + if (offset == 0 && to == PAGE_SIZE) + return 0; + + p = kmap(page); + + i_size = i_size_read(&vnode->vfs_inode); + pos = (loff_t) page->index << PAGE_SHIFT; + if (pos >= i_size) { + /* partial write, page beyond EOF */ + _debug("beyond"); + if (offset > 0) + memset(p, 0, offset); + if (to < PAGE_SIZE) + memset(p + to, 0, PAGE_SIZE - to); + kunmap(page); + return 0; + } + + if (i_size - pos >= PAGE_SIZE) { + /* partial write, page entirely before EOF */ + _debug("before"); + tail = eof = PAGE_SIZE; + } else { + /* partial write, page overlaps EOF */ + eof = i_size - pos; + _debug("overlap %u", eof); + tail = max(eof, to); + if (tail < PAGE_SIZE) + memset(p + tail, 0, PAGE_SIZE - tail); + if (offset > eof) + memset(p + eof, 0, PAGE_SIZE - eof); + } + + kunmap(p); + + ret = 0; + if (offset > 0 || eof > to) { + /* need to fill one or two bits that aren't going to be written + * (cover both fillers in one read if there are two) */ + start = (offset > 0) ? 0 : to; + stop = (eof > to) ? eof : offset; + len = stop - start; + _debug("wr=%u-%u av=0-%u rd=%u@%u", + offset, to, eof, start, len); + ret = afs_fill_page(vnode, key, start, len, page); + } + + _leave(" = %d", ret); + return ret; +} + +/* + * prepare to perform part of a write to a page + * - the caller holds the page locked, preventing it from being written out or + * modified by anyone else + */ +int afs_prepare_write(struct file *file, struct page *page, + unsigned offset, unsigned to) +{ + struct afs_writeback *candidate, *wb; + struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); + struct key *key = file->private_data; + pgoff_t index; + int ret; + + _enter("{%x:%u},{%lx},%u,%u", + vnode->fid.vid, vnode->fid.vnode, page->index, offset, to); + + candidate = kzalloc(sizeof(*candidate), GFP_KERNEL); + if (!candidate) + return -ENOMEM; + candidate->vnode = vnode; + candidate->first = candidate->last = page->index; + candidate->offset_first = offset; + candidate->to_last = to; + candidate->usage = 1; + candidate->state = AFS_WBACK_PENDING; + init_waitqueue_head(&candidate->waitq); + + if (!PageUptodate(page)) { + _debug("not up to date"); + ret = afs_prepare_page(vnode, page, key, offset, to); + if (ret < 0) { + kfree(candidate); + _leave(" = %d [prep]", ret); + return ret; + } + SetPageUptodate(page); + } + +try_again: + index = page->index; + spin_lock(&vnode->writeback_lock); + + /* see if this page is already pending a writeback under a suitable key + * - if so we can just join onto that one */ + wb = (struct afs_writeback *) page_private(page); + if (wb) { + if (wb->key == key && wb->state == AFS_WBACK_PENDING) + goto subsume_in_current_wb; + goto flush_conflicting_wb; + } + + if (index > 0) { + /* see if we can find an already pending writeback that we can + * append this page to */ + list_for_each_entry(wb, &vnode->writebacks, link) { + if (wb->last == index - 1 && wb->key == key && + wb->state == AFS_WBACK_PENDING) + goto append_to_previous_wb; + } + } + + list_add_tail(&candidate->link, &vnode->writebacks); + candidate->key = key_get(key); + spin_unlock(&vnode->writeback_lock); + SetPagePrivate(page); + set_page_private(page, (unsigned long) candidate); + _leave(" = 0 [new]"); + return 0; + +subsume_in_current_wb: + _debug("subsume"); + ASSERTRANGE(wb->first, <=, index, <=, wb->last); + if (index == wb->first && offset < wb->offset_first) + wb->offset_first = offset; + if (index == wb->last && to > wb->to_last) + wb->to_last = to; + spin_unlock(&vnode->writeback_lock); + kfree(candidate); + _leave(" = 0 [sub]"); + return 0; + +append_to_previous_wb: + _debug("append into %lx-%lx", wb->first, wb->last); + wb->usage++; + wb->last++; + wb->to_last = to; + spin_unlock(&vnode->writeback_lock); + SetPagePrivate(page); + set_page_private(page, (unsigned long) wb); + kfree(candidate); + _leave(" = 0 [app]"); + return 0; + + /* the page is currently bound to another context, so if it's dirty we + * need to flush it before we can use the new context */ +flush_conflicting_wb: + _debug("flush conflict"); + if (wb->state == AFS_WBACK_PENDING) + wb->state = AFS_WBACK_CONFLICTING; + spin_unlock(&vnode->writeback_lock); + if (PageDirty(page)) { + ret = afs_write_back_from_locked_page(wb, page); + if (ret < 0) { + afs_put_writeback(candidate); + _leave(" = %d", ret); + return ret; + } + } + + /* the page holds a ref on the writeback record */ + afs_put_writeback(wb); + set_page_private(page, 0); + ClearPagePrivate(page); + goto try_again; +} + +/* + * finalise part of a write to a page + */ +int afs_commit_write(struct file *file, struct page *page, + unsigned offset, unsigned to) +{ + struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); + loff_t i_size, maybe_i_size; + + _enter("{%x:%u},{%lx},%u,%u", + vnode->fid.vid, vnode->fid.vnode, page->index, offset, to); + + maybe_i_size = (loff_t) page->index << PAGE_SHIFT; + maybe_i_size += to; + + i_size = i_size_read(&vnode->vfs_inode); + if (maybe_i_size > i_size) { + spin_lock(&vnode->writeback_lock); + i_size = i_size_read(&vnode->vfs_inode); + if (maybe_i_size > i_size) + i_size_write(&vnode->vfs_inode, maybe_i_size); + spin_unlock(&vnode->writeback_lock); + } + + set_page_dirty(page); + + if (PageDirty(page)) + _debug("dirtied"); + + return 0; +} + +/* + * kill all the pages in the given range + */ +static void afs_kill_pages(struct afs_vnode *vnode, bool error, + pgoff_t first, pgoff_t last) +{ + struct pagevec pv; + unsigned count, loop; + + _enter("{%x:%u},%lx-%lx", + vnode->fid.vid, vnode->fid.vnode, first, last); + + pagevec_init(&pv, 0); + + do { + _debug("kill %lx-%lx", first, last); + + count = last - first + 1; + if (count > PAGEVEC_SIZE) + count = PAGEVEC_SIZE; + pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping, + first, count, pv.pages); + ASSERTCMP(pv.nr, ==, count); + + for (loop = 0; loop < count; loop++) { + ClearPageUptodate(pv.pages[loop]); + if (error) + SetPageError(pv.pages[loop]); + end_page_writeback(pv.pages[loop]); + } + + __pagevec_release(&pv); + } while (first < last); + + _leave(""); +} + +/* + * synchronously write back the locked page and any subsequent non-locked dirty + * pages also covered by the same writeback record + */ +static int afs_write_back_from_locked_page(struct afs_writeback *wb, + struct page *primary_page) +{ + struct page *pages[8], *page; + unsigned long count; + unsigned n, offset, to; + pgoff_t start, first, last; + int loop, ret; + + _enter(",%lx", primary_page->index); + + count = 1; + if (!clear_page_dirty_for_io(primary_page)) + BUG(); + if (test_set_page_writeback(primary_page)) + BUG(); + + /* find all consecutive lockable dirty pages, stopping when we find a + * page that is not immediately lockable, is not dirty or is missing, + * or we reach the end of the range */ + start = primary_page->index; + if (start >= wb->last) + goto no_more; + start++; + do { + _debug("more %lx [%lx]", start, count); + n = wb->last - start + 1; + if (n > ARRAY_SIZE(pages)) + n = ARRAY_SIZE(pages); + n = find_get_pages_contig(wb->vnode->vfs_inode.i_mapping, + start, n, pages); + _debug("fgpc %u", n); + if (n == 0) + goto no_more; + if (pages[0]->index != start) { + for (n--; n >= 0; n--) + put_page(pages[n]); + goto no_more; + } + + for (loop = 0; loop < n; loop++) { + page = pages[loop]; + if (page->index > wb->last) + break; + if (TestSetPageLocked(page)) + break; + if (!PageDirty(page) || + page_private(page) != (unsigned long) wb) { + unlock_page(page); + break; + } + if (!clear_page_dirty_for_io(page)) + BUG(); + if (test_set_page_writeback(page)) + BUG(); + unlock_page(page); + put_page(page); + } + count += loop; + if (loop < n) { + for (; loop < n; loop++) + put_page(pages[loop]); + goto no_more; + } + + start += loop; + } while (start <= wb->last && count < 65536); + +no_more: + /* we now have a contiguous set of dirty pages, each with writeback set + * and the dirty mark cleared; the first page is locked and must remain + * so, all the rest are unlocked */ + first = primary_page->index; + last = first + count - 1; + + offset = (first == wb->first) ? wb->offset_first : 0; + to = (last == wb->last) ? wb->to_last : PAGE_SIZE; + + _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to); + + ret = afs_vnode_store_data(wb, first, last, offset, to); + if (ret < 0) { + switch (ret) { + case -EDQUOT: + case -ENOSPC: + set_bit(AS_ENOSPC, + &wb->vnode->vfs_inode.i_mapping->flags); + break; + case -EROFS: + case -EIO: + case -EREMOTEIO: + case -EFBIG: + case -ENOENT: + case -ENOMEDIUM: + case -ENXIO: + afs_kill_pages(wb->vnode, true, first, last); + set_bit(AS_EIO, &wb->vnode->vfs_inode.i_mapping->flags); + break; + case -EACCES: + case -EPERM: + case -ENOKEY: + case -EKEYEXPIRED: + case -EKEYREJECTED: + case -EKEYREVOKED: + afs_kill_pages(wb->vnode, false, first, last); + break; + default: + break; + } + } else { + ret = count; + } + + _leave(" = %d", ret); + return ret; +} + +/* + * write a page back to the server + * - the caller locked the page for us + */ +int afs_writepage(struct page *page, struct writeback_control *wbc) +{ + struct backing_dev_info *bdi = page->mapping->backing_dev_info; + struct afs_writeback *wb; + int ret; + + _enter("{%lx},", page->index); + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || !PageDirty(page)) { + unlock_page(page); + return 0; + } + + wb = (struct afs_writeback *) page_private(page); + ASSERT(wb != NULL); + + ret = afs_write_back_from_locked_page(wb, page); + unlock_page(page); + if (ret < 0) { + _leave(" = %d", ret); + return 0; + } + + wbc->nr_to_write -= ret; + if (wbc->nonblocking && bdi_write_congested(bdi)) + wbc->encountered_congestion = 1; + + _leave(" = 0"); + return 0; +} + +/* + * write a region of pages back to the server + */ +int afs_writepages_region(struct address_space *mapping, + struct writeback_control *wbc, + pgoff_t index, pgoff_t end, pgoff_t *_next) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + struct afs_writeback *wb; + struct page *page; + int ret, n; + + _enter(",,%lx,%lx,", index, end); + + do { + n = find_get_pages_tag(mapping, &index, PAGECACHE_TAG_DIRTY, + 1, &page); + if (!n) + break; + + _debug("wback %lx", page->index); + + if (page->index > end) { + *_next = index; + page_cache_release(page); + _leave(" = 0 [%lx]", *_next); + return 0; + } + + /* at this point we hold neither mapping->tree_lock nor lock on + * the page itself: the page may be truncated or invalidated + * (changing page->mapping to NULL), or even swizzled back from + * swapper_space to tmpfs file mapping + */ + lock_page(page); + + if (page->mapping != mapping) { + unlock_page(page); + page_cache_release(page); + continue; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || !PageDirty(page)) { + unlock_page(page); + continue; + } + + wb = (struct afs_writeback *) page_private(page); + ASSERT(wb != NULL); + + spin_lock(&wb->vnode->writeback_lock); + wb->state = AFS_WBACK_WRITING; + spin_unlock(&wb->vnode->writeback_lock); + + ret = afs_write_back_from_locked_page(wb, page); + unlock_page(page); + page_cache_release(page); + if (ret < 0) { + _leave(" = %d", ret); + return ret; + } + + wbc->nr_to_write -= ret; + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + break; + } + + cond_resched(); + } while (index < end && wbc->nr_to_write > 0); + + *_next = index; + _leave(" = 0 [%lx]", *_next); + return 0; +} + +/* + * write some of the pending data back to the server + */ +int afs_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + pgoff_t start, end, next; + int ret; + + _enter(""); + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + _leave(" = 0 [congest]"); + return 0; + } + + if (wbc->range_cyclic) { + start = mapping->writeback_index; + end = -1; + ret = afs_writepages_region(mapping, wbc, start, end, &next); + if (start > 0 && wbc->nr_to_write > 0 && ret == 0 && + !(wbc->nonblocking && wbc->encountered_congestion)) + ret = afs_writepages_region(mapping, wbc, 0, start, + &next); + mapping->writeback_index = next; + } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { + end = (pgoff_t)(LLONG_MAX >> PAGE_CACHE_SHIFT); + ret = afs_writepages_region(mapping, wbc, 0, end, &next); + if (wbc->nr_to_write > 0) + mapping->writeback_index = next; + } else { + start = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + ret = afs_writepages_region(mapping, wbc, start, end, &next); + } + + _leave(" = %d", ret); + return ret; +} + +/* + * write an inode back + */ +int afs_write_inode(struct inode *inode, int sync) +{ + struct afs_vnode *vnode = AFS_FS_I(inode); + int ret; + + _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); + + ret = 0; + if (sync) { + ret = filemap_fdatawait(inode->i_mapping); + if (ret < 0) + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + } + + _leave(" = %d", ret); + return ret; +} + +/* + * completion of write to server + */ +void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) +{ + struct afs_writeback *wb = call->wb; + struct pagevec pv; + unsigned count, loop; + pgoff_t first = call->first, last = call->last; + bool free_wb; + + _enter("{%x:%u},{%lx-%lx}", + vnode->fid.vid, vnode->fid.vnode, first, last); + + ASSERT(wb != NULL); + + pagevec_init(&pv, 0); + + do { + _debug("attach %lx-%lx", first, last); + + count = last - first + 1; + if (count > PAGEVEC_SIZE) + count = PAGEVEC_SIZE; + pv.nr = find_get_pages_contig(call->mapping, first, count, + pv.pages); + ASSERTCMP(pv.nr, ==, count); + + spin_lock(&vnode->writeback_lock); + for (loop = 0; loop < count; loop++) { + struct page *page = pv.pages[loop]; + end_page_writeback(page); + if (page_private(page) == (unsigned long) wb) { + set_page_private(page, 0); + ClearPagePrivate(page); + wb->usage--; + } + } + free_wb = false; + if (wb->usage == 0) { + afs_unlink_writeback(wb); + free_wb = true; + } + spin_unlock(&vnode->writeback_lock); + first += count; + if (free_wb) { + afs_free_writeback(wb); + wb = NULL; + } + + __pagevec_release(&pv); + } while (first < last); + + _leave(""); +} + +/* + * write to an AFS file + */ +ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct dentry *dentry = iocb->ki_filp->f_path.dentry; + struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); + ssize_t result; + size_t count = iov_length(iov, nr_segs); + int ret; + + _enter("{%x.%u},{%zu},%lu,", + vnode->fid.vid, vnode->fid.vnode, count, nr_segs); + + if (IS_SWAPFILE(&vnode->vfs_inode)) { + printk(KERN_INFO + "AFS: Attempt to write to active swap file!\n"); + return -EBUSY; + } + + if (!count) + return 0; + + result = generic_file_aio_write(iocb, iov, nr_segs, pos); + if (IS_ERR_VALUE(result)) { + _leave(" = %zd", result); + return result; + } + + /* return error values for O_SYNC and IS_SYNC() */ + if (IS_SYNC(&vnode->vfs_inode) || iocb->ki_filp->f_flags & O_SYNC) { + ret = afs_fsync(iocb->ki_filp, dentry, 1); + if (ret < 0) + result = ret; + } + + _leave(" = %zd", result); + return result; +} + +/* + * flush the vnode to the fileserver + */ +int afs_writeback_all(struct afs_vnode *vnode) +{ + struct address_space *mapping = vnode->vfs_inode.i_mapping; + struct writeback_control wbc = { + .bdi = mapping->backing_dev_info, + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .for_writepages = 1, + .range_cyclic = 1, + }; + int ret; + + _enter(""); + + ret = mapping->a_ops->writepages(mapping, &wbc); + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + + _leave(" = %d", ret); + return ret; +} + +/* + * flush any dirty pages for this process, and check for write errors. + * - the return status from this call provides a reliable indication of + * whether any write errors occurred for this process. + */ +int afs_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + struct afs_writeback *wb, *xwb; + struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); + int ret; + + _enter("{%x:%u},{n=%s},%d", + vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name, + datasync); + + /* use a writeback record as a marker in the queue - when this reaches + * the front of the queue, all the outstanding writes are either + * completed or rejected */ + wb = kzalloc(sizeof(*wb), GFP_KERNEL); + if (!wb) + return -ENOMEM; + wb->vnode = vnode; + wb->first = 0; + wb->last = -1; + wb->offset_first = 0; + wb->to_last = PAGE_SIZE; + wb->usage = 1; + wb->state = AFS_WBACK_SYNCING; + init_waitqueue_head(&wb->waitq); + + spin_lock(&vnode->writeback_lock); + list_for_each_entry(xwb, &vnode->writebacks, link) { + if (xwb->state == AFS_WBACK_PENDING) + xwb->state = AFS_WBACK_CONFLICTING; + } + list_add_tail(&wb->link, &vnode->writebacks); + spin_unlock(&vnode->writeback_lock); + + /* push all the outstanding writebacks to the server */ + ret = afs_writeback_all(vnode); + if (ret < 0) { + afs_put_writeback(wb); + _leave(" = %d [wb]", ret); + return ret; + } + + /* wait for the preceding writes to actually complete */ + ret = wait_event_interruptible(wb->waitq, + wb->state == AFS_WBACK_COMPLETE || + vnode->writebacks.next == &wb->link); + afs_put_writeback(wb); + _leave(" = %d", ret); + return ret; +} -- cgit v1.2.2 From a9df62c7585e6caa1e7d2425b2b14460ec3afc20 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 9 May 2007 02:33:54 -0700 Subject: aio: use flush_work() Migrate AIO over to use flush_work(). Cc: "Maciej W. Rozycki" Cc: David Howells Cc: Zach Brown Cc: Benjamin LaHaise Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index b97ab8028b6d..d18690bb03e9 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -346,10 +346,9 @@ void fastcall exit_aio(struct mm_struct *mm) wait_for_all_aios(ctx); /* - * this is an overkill, but ensures we don't leave - * the ctx on the aio_wq + * Ensure we don't leave the ctx on the aio_wq */ - flush_workqueue(aio_wq); + flush_work(aio_wq, &ctx->wq.work); if (1 != atomic_read(&ctx->users)) printk(KERN_DEBUG @@ -372,7 +371,7 @@ void fastcall __put_ioctx(struct kioctx *ctx) BUG_ON(ctx->reqs_active); cancel_delayed_work(&ctx->wq); - flush_workqueue(aio_wq); + flush_work(aio_wq, &ctx->wq.work); aio_free_ring(ctx); mmdrop(ctx->mm); ctx->mm = NULL; -- cgit v1.2.2 From 28e53bddf814485699a4142bc056fd37d4e11dd4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 9 May 2007 02:34:22 -0700 Subject: unify flush_work/flush_work_keventd and rename it to cancel_work_sync flush_work(wq, work) doesn't need the first parameter, we can use cwq->wq (this was possible from the very beginnig, I missed this). So we can unify flush_work_keventd and flush_work. Also, rename flush_work() to cancel_work_sync() and fix all callers. Perhaps this is not the best name, but "flush_work" is really bad. (akpm: this is why the earlier patches bypassed maintainers) Signed-off-by: Oleg Nesterov Cc: Jeff Garzik Cc: "David S. Miller" Cc: Jens Axboe Cc: Tejun Heo Cc: Auke Kok , Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index d18690bb03e9..ac1c1587aa02 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -348,7 +348,7 @@ void fastcall exit_aio(struct mm_struct *mm) /* * Ensure we don't leave the ctx on the aio_wq */ - flush_work(aio_wq, &ctx->wq.work); + cancel_work_sync(&ctx->wq.work); if (1 != atomic_read(&ctx->users)) printk(KERN_DEBUG @@ -371,7 +371,7 @@ void fastcall __put_ioctx(struct kioctx *ctx) BUG_ON(ctx->reqs_active); cancel_delayed_work(&ctx->wq); - flush_work(aio_wq, &ctx->wq.work); + cancel_work_sync(&ctx->wq.work); aio_free_ring(ctx); mmdrop(ctx->mm); ctx->mm = NULL; -- cgit v1.2.2 From 8842c9655b2b7f0e8e6c50a773b649e5d8a57678 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 9 May 2007 02:34:46 -0700 Subject: remove nfs4_acl_add_ace() nfs4_acl_add_ace() can now be removed. Signed-off-by: Adrian Bunk Acked-by: Neil Brown Acked-by: J. Bruce Fields Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4acl.c | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 673a53c014a3..cc3b7badd486 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -137,7 +137,6 @@ struct ace_container { static short ace2type(struct nfs4_ace *); static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, unsigned int); -void nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t); struct nfs4_acl * nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl, @@ -785,21 +784,6 @@ nfs4_acl_new(int n) return acl; } -void -nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask, - int whotype, uid_t who) -{ - struct nfs4_ace *ace = acl->aces + acl->naces; - - ace->type = type; - ace->flag = flag; - ace->access_mask = access_mask; - ace->whotype = whotype; - ace->who = who; - - acl->naces++; -} - static struct { char *string; int stringlen; @@ -851,6 +835,5 @@ nfs4_acl_write_who(int who, char *p) } EXPORT_SYMBOL(nfs4_acl_new); -EXPORT_SYMBOL(nfs4_acl_add_ace); EXPORT_SYMBOL(nfs4_acl_get_whotype); EXPORT_SYMBOL(nfs4_acl_write_who); -- cgit v1.2.2 From f34b95689d2ce001c157b1604289ff240b4bdee0 Mon Sep 17 00:00:00 2001 From: Peter Staubach Date: Wed, 9 May 2007 02:34:48 -0700 Subject: The NFSv2/NFSv3 server does not handle zero length WRITE requests correctly The NFSv2 and NFSv3 servers do not handle WRITE requests for 0 bytes correctly. The specifications indicate that the server should accept the request, but it should mostly turn into a no-op. Currently, the server will return an XDR decode error, which it should not. Attached is a patch which addresses this issue. It also adds some boundary checking to ensure that the request contains as much data as was requested to be written. It also correctly handles an NFSv3 request which requests to write more data than the server has stated that it is prepared to handle. Previously, there was some support which looked like it should work, but wasn't quite right. Signed-off-by: Peter Staubach Acked-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs3xdr.c | 48 ++++++++++++++++++++++++++++++++++++++---------- fs/nfsd/nfsxdr.c | 46 +++++++++++++++++++++++++++++++++++++++------- 2 files changed, 77 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 7e4bb0af24d7..43fb360784b7 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -369,7 +369,7 @@ int nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_writeargs *args) { - unsigned int len, v, hdr; + unsigned int len, v, hdr, dlen; u32 max_blocksize = svc_max_payload(rqstp); if (!(p = decode_fh(p, &args->fh)) @@ -379,18 +379,47 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, args->count = ntohl(*p++); args->stable = ntohl(*p++); len = args->len = ntohl(*p++); + /* + * The count must equal the amount of data passed. + */ + if (args->count != args->len) + return 0; + /* + * Check to make sure that we got the right number of + * bytes. + * + * If more than one page was used, then compute the length + * of the data in the request as the total size of the + * request minus the transport protocol headers minus the + * RPC protocol headers minus the NFS protocol fields + * already consumed. If the request fits into a single + * page, then compete the length of the data as the size + * of the NFS portion of the request minus the NFS + * protocol fields already consumed. + */ hdr = (void*)p - rqstp->rq_arg.head[0].iov_base; - if (rqstp->rq_arg.len < hdr || - rqstp->rq_arg.len - hdr < len) + if (rqstp->rq_respages != rqstp->rq_pages + 1) { + dlen = rqstp->rq_arg.len - + (PAGE_SIZE - rqstp->rq_arg.head[0].iov_len) - hdr; + } else { + dlen = rqstp->rq_arg.head[0].iov_len - hdr; + } + /* + * Round the length of the data which was specified up to + * the next multiple of XDR units and then compare that + * against the length which was actually received. + */ + if (dlen != ((len + 3) & ~0x3)) return 0; + if (args->count > max_blocksize) { + args->count = max_blocksize; + len = args->len = max_blocksize; + } rqstp->rq_vec[0].iov_base = (void*)p; rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; - - if (len > max_blocksize) - len = max_blocksize; - v= 0; + v = 0; while (len > rqstp->rq_vec[v].iov_len) { len -= rqstp->rq_vec[v].iov_len; v++; @@ -398,9 +427,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, rqstp->rq_vec[v].iov_len = PAGE_SIZE; } rqstp->rq_vec[v].iov_len = len; - args->vlen = v+1; - - return args->count == args->len && rqstp->rq_vec[0].iov_len > 0; + args->vlen = v + 1; + return 1; } int diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 0c24b9e24fe8..6035e03655c6 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -284,8 +284,9 @@ int nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_writeargs *args) { - unsigned int len; + unsigned int len, hdr, dlen; int v; + if (!(p = decode_fh(p, &args->fh))) return 0; @@ -293,11 +294,42 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, args->offset = ntohl(*p++); /* offset */ p++; /* totalcount */ len = args->len = ntohl(*p++); - rqstp->rq_vec[0].iov_base = (void*)p; - rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - - (((void*)p) - rqstp->rq_arg.head[0].iov_base); + /* + * The protocol specifies a maximum of 8192 bytes. + */ if (len > NFSSVC_MAXBLKSIZE_V2) - len = NFSSVC_MAXBLKSIZE_V2; + return 0; + + /* + * Check to make sure that we got the right number of + * bytes. + * + * If more than one page was used, then compute the length + * of the data in the request as the total size of the + * request minus the transport protocol headers minus the + * RPC protocol headers minus the NFS protocol fields + * already consumed. If the request fits into a single + * page, then compete the length of the data as the size + * of the NFS portion of the request minus the NFS + * protocol fields already consumed. + */ + hdr = (void*)p - rqstp->rq_arg.head[0].iov_base; + if (rqstp->rq_respages != rqstp->rq_pages + 1) { + dlen = rqstp->rq_arg.len - + (PAGE_SIZE - rqstp->rq_arg.head[0].iov_len) - hdr; + } else { + dlen = rqstp->rq_arg.head[0].iov_len - hdr; + } + /* + * Round the length of the data which was specified up to + * the next multiple of XDR units and then compare that + * against the length which was actually received. + */ + if (dlen != ((len + 3) & ~0x3)) + return 0; + + rqstp->rq_vec[0].iov_base = (void*)p; + rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; v = 0; while (len > rqstp->rq_vec[v].iov_len) { len -= rqstp->rq_vec[v].iov_len; @@ -306,8 +338,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, rqstp->rq_vec[v].iov_len = PAGE_SIZE; } rqstp->rq_vec[v].iov_len = len; - args->vlen = v+1; - return rqstp->rq_vec[0].iov_len > 0; + args->vlen = v + 1; + return 1; } int -- cgit v1.2.2 From 669716433598a1498049e75a84a5aaf69c8da173 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 9 May 2007 02:34:49 -0700 Subject: nfsd/nfs4state: remove unnecessary daemonize call Acked-by: Neil Brown Cc: Trond Myklebust Signed-off-by: Eric W. Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4state.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 678f3be88ac0..3cc8ce422ab1 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1326,8 +1326,6 @@ do_recall(void *__dp) { struct nfs4_delegation *dp = __dp; - daemonize("nfsv4-recall"); - nfsd4_cb_recall(dp); return 0; } -- cgit v1.2.2 From cd123012d99fde4759500fee611e724e4f3016e3 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 9 May 2007 02:34:50 -0700 Subject: RPC: add wrapper for svc_reserve to account for checksum When the kernel calls svc_reserve to downsize the expected size of an RPC reply, it fails to account for the possibility of a checksum at the end of the packet. If a client mounts a NFSv2/3 with sec=krb5i/p, and does I/O then you'll generally see messages similar to this in the server's ring buffer: RPC request reserved 164 but used 208 While I was never able to verify it, I suspect that this problem is also the root cause of some oopses I've seen under these conditions: https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=227726 This is probably also a problem for other sec= types and for NFSv4. The large reserved size for NFSv4 compound packets seems to generally paper over the problem, however. This patch adds a wrapper for svc_reserve that accounts for the possibility of a checksum. It also fixes up the appropriate callers of svc_reserve to call the wrapper. For now, it just uses a hardcoded value that I determined via testing. That value may need to be revised upward as things change, or we may want to eventually add a new auth_op that attempts to calculate this somehow. Unfortunately, there doesn't seem to be a good way to reliably determine the expected checksum length prior to actually calculating it, particularly with schemes like spkm3. Signed-off-by: Jeff Layton Acked-by: Neil Brown Cc: Trond Myklebust Acked-by: J. Bruce Fields Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs3proc.c | 2 +- fs/nfsd/nfsproc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 7f5bad0393b1..eac82830bfd7 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -177,7 +177,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, if (max_blocksize < resp->count) resp->count = max_blocksize; - svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); + svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); fh_copy(&resp->fh, &argp->fh); nfserr = nfsd_read(rqstp, &resp->fh, NULL, diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 5cc2eec981b8..b2c7147aa921 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -155,7 +155,7 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp, argp->count); argp->count = NFSSVC_MAXBLKSIZE_V2; } - svc_reserve(rqstp, (19<<2) + argp->count + 4); + svc_reserve_auth(rqstp, (19<<2) + argp->count + 4); resp->count = argp->count; nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, -- cgit v1.2.2 From 402acd29e552cb80109d1d5c0ada53f634465d87 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 9 May 2007 02:34:52 -0700 Subject: knfsd: avoid use of unitialised variables on error path when nfs exports We need to zero various parts of 'exp' before any 'goto out', otherwise when we go to free the contents... we die. Signed-off-by: Neil Brown Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/export.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 6f24768272a1..79bd03b8bbf8 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -469,6 +469,13 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) nd.dentry = NULL; exp.ex_path = NULL; + /* fs locations */ + exp.ex_fslocs.locations = NULL; + exp.ex_fslocs.locations_count = 0; + exp.ex_fslocs.migrated = 0; + + exp.ex_uuid = NULL; + if (mesg[mlen-1] != '\n') return -EINVAL; mesg[mlen-1] = 0; @@ -509,13 +516,6 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) if (exp.h.expiry_time == 0) goto out; - /* fs locations */ - exp.ex_fslocs.locations = NULL; - exp.ex_fslocs.locations_count = 0; - exp.ex_fslocs.migrated = 0; - - exp.ex_uuid = NULL; - /* flags */ err = get_int(&mesg, &an_int); if (err == -ENOENT) -- cgit v1.2.2 From f725b217b16e2cb1777c5a6e13c99f7913f1514a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 9 May 2007 02:34:56 -0700 Subject: knfsd: trivial makefile cleanup kbuild directly interprets -y as objects to build into a module, no need to assign it to the old foo-objs variable. Signed-off-by: Christoph Hellwig Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/Makefile | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index ce341dc76d5e..9b118ee20193 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -11,4 +11,3 @@ nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ nfs4acl.o nfs4callback.o nfs4recover.o -nfsd-objs := $(nfsd-y) -- cgit v1.2.2 From 072f62ed85a71bbb3429a52678500ec9f9441e0d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 9 May 2007 02:34:57 -0700 Subject: knfsd: various nfsd xdr cleanups 1/ decode_sattr and decode_sattr3 never return NULL, so remove several checks for that. ditto for xdr_decode_hyper. 2/ replace some open coded XDR_QUADLEN calls with calls to XDR_QUADLEN 3/ in decode_writeargs, simply an 'if' to use a single calculation. .page_len is the length of that part of the packet that did not fit in the first page (the head). So the length of the data part is the remainder of the head, plus page_len. 3/ other minor cleanups. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs3xdr.c | 55 ++++++++++++++++++++----------------------------------- fs/nfsd/nfsxdr.c | 39 ++++++++++++++------------------------- 2 files changed, 34 insertions(+), 60 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 43fb360784b7..10f6e7dcf633 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -239,7 +239,7 @@ static __be32 * encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) { struct dentry *dentry = fhp->fh_dentry; - if (dentry && dentry->d_inode != NULL) { + if (dentry && dentry->d_inode) { int err; struct kstat stat; @@ -300,9 +300,9 @@ int nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_sattrargs *args) { - if (!(p = decode_fh(p, &args->fh)) - || !(p = decode_sattr3(p, &args->attrs))) + if (!(p = decode_fh(p, &args->fh))) return 0; + p = decode_sattr3(p, &args->attrs); if ((args->check_guard = ntohl(*p++)) != 0) { struct timespec time; @@ -343,9 +343,9 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, int v,pn; u32 max_blocksize = svc_max_payload(rqstp); - if (!(p = decode_fh(p, &args->fh)) - || !(p = xdr_decode_hyper(p, &args->offset))) + if (!(p = decode_fh(p, &args->fh))) return 0; + p = xdr_decode_hyper(p, &args->offset); len = args->count = ntohl(*p++); @@ -372,9 +372,9 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, unsigned int len, v, hdr, dlen; u32 max_blocksize = svc_max_payload(rqstp); - if (!(p = decode_fh(p, &args->fh)) - || !(p = xdr_decode_hyper(p, &args->offset))) + if (!(p = decode_fh(p, &args->fh))) return 0; + p = xdr_decode_hyper(p, &args->offset); args->count = ntohl(*p++); args->stable = ntohl(*p++); @@ -388,29 +388,16 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, /* * Check to make sure that we got the right number of * bytes. - * - * If more than one page was used, then compute the length - * of the data in the request as the total size of the - * request minus the transport protocol headers minus the - * RPC protocol headers minus the NFS protocol fields - * already consumed. If the request fits into a single - * page, then compete the length of the data as the size - * of the NFS portion of the request minus the NFS - * protocol fields already consumed. */ hdr = (void*)p - rqstp->rq_arg.head[0].iov_base; - if (rqstp->rq_respages != rqstp->rq_pages + 1) { - dlen = rqstp->rq_arg.len - - (PAGE_SIZE - rqstp->rq_arg.head[0].iov_len) - hdr; - } else { - dlen = rqstp->rq_arg.head[0].iov_len - hdr; - } + dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len + - hdr; /* * Round the length of the data which was specified up to * the next multiple of XDR units and then compare that * against the length which was actually received. */ - if (dlen != ((len + 3) & ~0x3)) + if (dlen != XDR_QUADLEN(len)*4) return 0; if (args->count > max_blocksize) { @@ -442,8 +429,7 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p, switch (args->createmode = ntohl(*p++)) { case NFS3_CREATE_UNCHECKED: case NFS3_CREATE_GUARDED: - if (!(p = decode_sattr3(p, &args->attrs))) - return 0; + p = decode_sattr3(p, &args->attrs); break; case NFS3_CREATE_EXCLUSIVE: args->verf = p; @@ -459,10 +445,10 @@ int nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_createargs *args) { - if (!(p = decode_fh(p, &args->fh)) - || !(p = decode_filename(p, &args->name, &args->len)) - || !(p = decode_sattr3(p, &args->attrs))) + if (!(p = decode_fh(p, &args->fh)) || + !(p = decode_filename(p, &args->name, &args->len))) return 0; + p = decode_sattr3(p, &args->attrs); return xdr_argsize_check(rqstp, p); } @@ -476,11 +462,12 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, char *old, *new; struct kvec *vec; - if (!(p = decode_fh(p, &args->ffh)) - || !(p = decode_filename(p, &args->fname, &args->flen)) - || !(p = decode_sattr3(p, &args->attrs)) + if (!(p = decode_fh(p, &args->ffh)) || + !(p = decode_filename(p, &args->fname, &args->flen)) ) return 0; + p = decode_sattr3(p, &args->attrs); + /* now decode the pathname, which might be larger than the first page. * As we have to check for nul's anyway, we copy it into a new page * This page appears in the rq_res.pages list, but as pages_len is always @@ -530,10 +517,8 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p, args->ftype = ntohl(*p++); if (args->ftype == NF3BLK || args->ftype == NF3CHR - || args->ftype == NF3SOCK || args->ftype == NF3FIFO) { - if (!(p = decode_sattr3(p, &args->attrs))) - return 0; - } + || args->ftype == NF3SOCK || args->ftype == NF3FIFO) + p = decode_sattr3(p, &args->attrs); if (args->ftype == NF3BLK || args->ftype == NF3CHR) { args->major = ntohl(*p++); diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 6035e03655c6..cb3e7fadb772 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -231,9 +231,10 @@ int nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_sattrargs *args) { - if (!(p = decode_fh(p, &args->fh)) - || !(p = decode_sattr(p, &args->attrs))) + p = decode_fh(p, &args->fh); + if (!p) return 0; + p = decode_sattr(p, &args->attrs); return xdr_argsize_check(rqstp, p); } @@ -303,29 +304,17 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, /* * Check to make sure that we got the right number of * bytes. - * - * If more than one page was used, then compute the length - * of the data in the request as the total size of the - * request minus the transport protocol headers minus the - * RPC protocol headers minus the NFS protocol fields - * already consumed. If the request fits into a single - * page, then compete the length of the data as the size - * of the NFS portion of the request minus the NFS - * protocol fields already consumed. */ hdr = (void*)p - rqstp->rq_arg.head[0].iov_base; - if (rqstp->rq_respages != rqstp->rq_pages + 1) { - dlen = rqstp->rq_arg.len - - (PAGE_SIZE - rqstp->rq_arg.head[0].iov_len) - hdr; - } else { - dlen = rqstp->rq_arg.head[0].iov_len - hdr; - } + dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len + - hdr; + /* * Round the length of the data which was specified up to * the next multiple of XDR units and then compare that * against the length which was actually received. */ - if (dlen != ((len + 3) & ~0x3)) + if (dlen != XDR_QUADLEN(len)*4) return 0; rqstp->rq_vec[0].iov_base = (void*)p; @@ -346,10 +335,10 @@ int nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_createargs *args) { - if (!(p = decode_fh(p, &args->fh)) - || !(p = decode_filename(p, &args->name, &args->len)) - || !(p = decode_sattr(p, &args->attrs))) + if ( !(p = decode_fh(p, &args->fh)) + || !(p = decode_filename(p, &args->name, &args->len))) return 0; + p = decode_sattr(p, &args->attrs); return xdr_argsize_check(rqstp, p); } @@ -393,11 +382,11 @@ int nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_symlinkargs *args) { - if (!(p = decode_fh(p, &args->ffh)) - || !(p = decode_filename(p, &args->fname, &args->flen)) - || !(p = decode_pathname(p, &args->tname, &args->tlen)) - || !(p = decode_sattr(p, &args->attrs))) + if ( !(p = decode_fh(p, &args->ffh)) + || !(p = decode_filename(p, &args->fname, &args->flen)) + || !(p = decode_pathname(p, &args->tname, &args->tlen))) return 0; + p = decode_sattr(p, &args->attrs); return xdr_argsize_check(rqstp, p); } -- cgit v1.2.2 From b41eeef14d7c73af6d16c7d02b7a939082a137ff Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 9 May 2007 02:34:57 -0700 Subject: knfsd: avoid Oops if buggy userspace performs confusing filehandle->dentry mapping When a lookup request arrives, nfsd uses information provided by userspace (mountd) to find the right filesystem. It then assumes that the same filehandle type as the incoming filehandle can be used to create an outgoing filehandle. However if mountd is buggy, or maybe just being creative, the filesystem may not support that filesystem type, and the kernel could oops, particularly if 'ex_uuid' is NULL but a FSID_UUID* filehandle type is used. So add some proper checking that the fsid version/type from the incoming filehandle is actually supportable, and ignore that information if it isn't supportable. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfsfh.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 739dd3c5c3b2..6ca2d24fc216 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -323,7 +323,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, * */ - u8 version = 1; + u8 version; u8 fsid_type = 0; struct inode * inode = dentry->d_inode; struct dentry *parent = dentry->d_parent; @@ -341,15 +341,59 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, * the reference filehandle (if it is in the same export) * or the export options. */ + retry: + version = 1; if (ref_fh && ref_fh->fh_export == exp) { version = ref_fh->fh_handle.fh_version; - if (version == 0xca) + fsid_type = ref_fh->fh_handle.fh_fsid_type; + + if (ref_fh == fhp) + fh_put(ref_fh); + ref_fh = NULL; + + switch (version) { + case 0xca: fsid_type = FSID_DEV; - else - fsid_type = ref_fh->fh_handle.fh_fsid_type; - /* We know this version/type works for this export - * so there is no need for further checks. + break; + case 1: + break; + default: + goto retry; + } + + /* Need to check that this type works for this + * export point. As the fsid -> filesystem mapping + * was guided by user-space, there is no guarantee + * that the filesystem actually supports that fsid + * type. If it doesn't we loop around again without + * ref_fh set. */ + switch(fsid_type) { + case FSID_DEV: + if (!old_valid_dev(ex_dev)) + goto retry; + /* FALL THROUGH */ + case FSID_MAJOR_MINOR: + case FSID_ENCODE_DEV: + if (!(exp->ex_dentry->d_inode->i_sb->s_type->fs_flags + & FS_REQUIRES_DEV)) + goto retry; + break; + case FSID_NUM: + if (! (exp->ex_flags & NFSEXP_FSID)) + goto retry; + break; + case FSID_UUID8: + case FSID_UUID16: + if (!root_export) + goto retry; + /* fall through */ + case FSID_UUID4_INUM: + case FSID_UUID16_INUM: + if (exp->ex_uuid == NULL) + goto retry; + break; + } } else if (exp->ex_uuid) { if (fhp->fh_maxsize >= 64) { if (root_export) -- cgit v1.2.2 From 01f2705daf5a36208e69d7cf95db9c330f843af6 Mon Sep 17 00:00:00 2001 From: Nate Diller Date: Wed, 9 May 2007 02:35:07 -0700 Subject: fs: convert core functions to zero_user_page It's very common for file systems to need to zero part or all of a page, the simplist way is just to use kmap_atomic() and memset(). There's actually a library function in include/linux/highmem.h that does exactly that, but it's confusingly named memclear_highpage_flush(), which is descriptive of *how* it does the work rather than what the *purpose* is. So this patchset renames the function to zero_user_page(), and calls it from the various places that currently open code it. This first patch introduces the new function call, and converts all the core kernel callsites, both the open-coded ones and the old memclear_highpage_flush() ones. Following this patch is a series of conversions for each file system individually, per AKPM, and finally a patch deprecating the old call. The diffstat below shows the entire patchset. [akpm@linux-foundation.org: fix a few things] Signed-off-by: Nate Diller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 56 ++++++++++++-------------------------------------------- fs/direct-io.c | 8 ++------ fs/mpage.c | 15 +++++---------- 3 files changed, 19 insertions(+), 60 deletions(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index eb820b82a636..fc2d763a8d78 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1846,13 +1846,8 @@ static int __block_prepare_write(struct inode *inode, struct page *page, if (block_start >= to) break; if (buffer_new(bh)) { - void *kaddr; - clear_buffer_new(bh); - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr+block_start, 0, bh->b_size); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, block_start, bh->b_size, KM_USER0); set_buffer_uptodate(bh); mark_buffer_dirty(bh); } @@ -1940,10 +1935,8 @@ int block_read_full_page(struct page *page, get_block_t *get_block) SetPageError(page); } if (!buffer_mapped(bh)) { - void *kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + i * blocksize, 0, blocksize); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, i * blocksize, blocksize, + KM_USER0); if (!err) set_buffer_uptodate(bh); continue; @@ -2086,7 +2079,6 @@ int cont_prepare_write(struct page *page, unsigned offset, long status; unsigned zerofrom; unsigned blocksize = 1 << inode->i_blkbits; - void *kaddr; while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) { status = -ENOMEM; @@ -2108,10 +2100,8 @@ int cont_prepare_write(struct page *page, unsigned offset, PAGE_CACHE_SIZE, get_block); if (status) goto out_unmap; - kaddr = kmap_atomic(new_page, KM_USER0); - memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom); - flush_dcache_page(new_page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, zerofrom, PAGE_CACHE_SIZE - zerofrom, + KM_USER0); generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE); unlock_page(new_page); page_cache_release(new_page); @@ -2138,10 +2128,7 @@ int cont_prepare_write(struct page *page, unsigned offset, if (status) goto out1; if (zerofrom < offset) { - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr+zerofrom, 0, offset-zerofrom); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, zerofrom, offset - zerofrom, KM_USER0); __block_commit_write(inode, page, zerofrom, offset); } return 0; @@ -2340,10 +2327,7 @@ failed: * Error recovery is pretty slack. Clear the page and mark it dirty * so we'll later zero out any blocks which _were_ allocated. */ - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr, 0, PAGE_CACHE_SIZE); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); SetPageUptodate(page); set_page_dirty(page); return ret; @@ -2382,7 +2366,6 @@ int nobh_writepage(struct page *page, get_block_t *get_block, loff_t i_size = i_size_read(inode); const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; unsigned offset; - void *kaddr; int ret; /* Is the page fully inside i_size? */ @@ -2413,10 +2396,7 @@ int nobh_writepage(struct page *page, get_block_t *get_block, * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); out: ret = mpage_writepage(page, get_block, wbc); if (ret == -EAGAIN) @@ -2437,7 +2417,6 @@ int nobh_truncate_page(struct address_space *mapping, loff_t from) unsigned to; struct page *page; const struct address_space_operations *a_ops = mapping->a_ops; - char *kaddr; int ret = 0; if ((offset & (blocksize - 1)) == 0) @@ -2451,10 +2430,8 @@ int nobh_truncate_page(struct address_space *mapping, loff_t from) to = (offset + blocksize) & ~(blocksize - 1); ret = a_ops->prepare_write(NULL, page, offset, to); if (ret == 0) { - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, + KM_USER0); /* * It would be more correct to call aops->commit_write() * here, but this is more efficient. @@ -2480,7 +2457,6 @@ int block_truncate_page(struct address_space *mapping, struct inode *inode = mapping->host; struct page *page; struct buffer_head *bh; - void *kaddr; int err; blocksize = 1 << inode->i_blkbits; @@ -2534,11 +2510,7 @@ int block_truncate_page(struct address_space *mapping, goto unlock; } - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, length); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - + zero_user_page(page, offset, length, KM_USER0); mark_buffer_dirty(bh); err = 0; @@ -2559,7 +2531,6 @@ int block_write_full_page(struct page *page, get_block_t *get_block, loff_t i_size = i_size_read(inode); const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; unsigned offset; - void *kaddr; /* Is the page fully inside i_size? */ if (page->index < end_index) @@ -2585,10 +2556,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block, * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); return __block_write_full_page(inode, page, get_block, wbc); } diff --git a/fs/direct-io.c b/fs/direct-io.c index d9d0833444f5..8aa2d8b04ef1 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -867,7 +867,6 @@ static int do_direct_IO(struct dio *dio) do_holes: /* Handle holes */ if (!buffer_mapped(map_bh)) { - char *kaddr; loff_t i_size_aligned; /* AKPM: eargh, -ENOTBLK is a hack */ @@ -888,11 +887,8 @@ do_holes: page_cache_release(page); goto out; } - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + (block_in_page << blkbits), - 0, 1 << blkbits); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, block_in_page << blkbits, + 1 << blkbits, KM_USER0); dio->block_in_file++; block_in_page++; goto next_block; diff --git a/fs/mpage.c b/fs/mpage.c index fa2441f57b41..0fb914fc2ee0 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -284,11 +284,9 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, } if (first_hole != blocks_per_page) { - char *kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + (first_hole << blkbits), 0, - PAGE_CACHE_SIZE - (first_hole << blkbits)); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, first_hole << blkbits, + PAGE_CACHE_SIZE - (first_hole << blkbits), + KM_USER0); if (first_hole == 0) { SetPageUptodate(page); unlock_page(page); @@ -576,14 +574,11 @@ page_is_mapped: * written out to the file." */ unsigned offset = i_size & (PAGE_CACHE_SIZE - 1); - char *kaddr; if (page->index > end_index || !offset) goto confused; - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, + KM_USER0); } /* -- cgit v1.2.2 From f36dca90e674a1a62cad810f630629c0008b2128 Mon Sep 17 00:00:00 2001 From: Nate Diller Date: Wed, 9 May 2007 02:35:07 -0700 Subject: affs: use zero_user_page Use zero_user_page() instead of open-coding it. Signed-off-by: Nate Diller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/affs/file.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/affs/file.c b/fs/affs/file.c index 4aa8079e71be..c8796906f584 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -628,11 +628,7 @@ static int affs_prepare_write_ofs(struct file *file, struct page *page, unsigned return err; } if (to < PAGE_CACHE_SIZE) { - char *kaddr = kmap_atomic(page, KM_USER0); - - memset(kaddr + to, 0, PAGE_CACHE_SIZE - to); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, to, PAGE_CACHE_SIZE - to, KM_USER0); if (size > offset + to) { if (size < offset + PAGE_CACHE_SIZE) tmp = size & ~PAGE_CACHE_MASK; -- cgit v1.2.2 From 0c11d7a9e9e9793219baf715048c190a84bead57 Mon Sep 17 00:00:00 2001 From: Nate Diller Date: Wed, 9 May 2007 02:35:08 -0700 Subject: ext3: use zero_user_page Use zero_user_page() instead of open-coding it. Signed-off-by: Nate Diller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/inode.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index e1bb03171986..a6cb6171c3af 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1767,7 +1767,6 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, struct inode *inode = mapping->host; struct buffer_head *bh; int err = 0; - void *kaddr; blocksize = inode->i_sb->s_blocksize; length = blocksize - (offset & (blocksize - 1)); @@ -1779,10 +1778,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, */ if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode) && PageUptodate(page)) { - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, length); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, offset, length, KM_USER0); set_page_dirty(page); goto unlock; } @@ -1835,11 +1831,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, goto unlock; } - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, length); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - + zero_user_page(page, offset, length, KM_USER0); BUFFER_TRACE(bh, "zeroed end of block"); err = 0; -- cgit v1.2.2 From f2fff596955867d407cc7e8e426097bd9ad2be9b Mon Sep 17 00:00:00 2001 From: Nate Diller Date: Wed, 9 May 2007 02:35:09 -0700 Subject: reiserfs: use zero_user_page Use zero_user_page() instead of open-coding it. Signed-off-by: Nate Diller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/file.c | 39 +++++++++++---------------------------- fs/reiserfs/inode.c | 13 ++----------- 2 files changed, 13 insertions(+), 39 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index ab45db529c80..9e451a68580f 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -1059,20 +1059,12 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode maping blocks, since there is none, so we just zero out remaining parts of first and last pages in write area (if needed) */ if ((pos & ~((loff_t) PAGE_CACHE_SIZE - 1)) > inode->i_size) { - if (from != 0) { /* First page needs to be partially zeroed */ - char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0); - memset(kaddr, 0, from); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(prepared_pages[0]); - } - if (to != PAGE_CACHE_SIZE) { /* Last page needs to be partially zeroed */ - char *kaddr = - kmap_atomic(prepared_pages[num_pages - 1], - KM_USER0); - memset(kaddr + to, 0, PAGE_CACHE_SIZE - to); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(prepared_pages[num_pages - 1]); - } + if (from != 0) /* First page needs to be partially zeroed */ + zero_user_page(prepared_pages[0], 0, from, KM_USER0); + + if (to != PAGE_CACHE_SIZE) /* Last page needs to be partially zeroed */ + zero_user_page(prepared_pages[num_pages-1], to, + PAGE_CACHE_SIZE - to, KM_USER0); /* Since all blocks are new - use already calculated value */ return blocks; @@ -1199,13 +1191,9 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode ll_rw_block(READ, 1, &bh); *wait_bh++ = bh; } else { /* Not mapped, zero it */ - char *kaddr = - kmap_atomic(prepared_pages[0], - KM_USER0); - memset(kaddr + block_start, 0, - from - block_start); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(prepared_pages[0]); + zero_user_page(prepared_pages[0], + block_start, + from - block_start, KM_USER0); set_buffer_uptodate(bh); } } @@ -1237,13 +1225,8 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode ll_rw_block(READ, 1, &bh); *wait_bh++ = bh; } else { /* Not mapped, zero it */ - char *kaddr = - kmap_atomic(prepared_pages - [num_pages - 1], - KM_USER0); - memset(kaddr + to, 0, block_end - to); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(prepared_pages[num_pages - 1]); + zero_user_page(prepared_pages[num_pages-1], + to, block_end - to, KM_USER0); set_buffer_uptodate(bh); } } diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 9fcbfe316977..1272d11399fb 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2148,13 +2148,8 @@ int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) length = offset & (blocksize - 1); /* if we are not on a block boundary */ if (length) { - char *kaddr; - length = blocksize - length; - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, length); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, offset, length, KM_USER0); if (buffer_mapped(bh) && bh->b_blocknr != 0) { mark_buffer_dirty(bh); } @@ -2370,7 +2365,6 @@ static int reiserfs_write_full_page(struct page *page, ** last byte in the file */ if (page->index >= end_index) { - char *kaddr; unsigned last_offset; last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1); @@ -2379,10 +2373,7 @@ static int reiserfs_write_full_page(struct page *page, unlock_page(page); return 0; } - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + last_offset, 0, PAGE_CACHE_SIZE - last_offset); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, last_offset, PAGE_CACHE_SIZE - last_offset, KM_USER0); } bh = head; block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits); -- cgit v1.2.2 From 8bb7844286fb8c9fce6f65d8288aeb09d03a5e0d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 9 May 2007 02:35:10 -0700 Subject: Add suspend-related notifications for CPU hotplug Since nonboot CPUs are now disabled after tasks and devices have been frozen and the CPU hotplug infrastructure is used for this purpose, we need special CPU hotplug notifications that will help the CPU-hotplug-aware subsystems distinguish normal CPU hotplug events from CPU hotplug events related to a system-wide suspend or resume operation in progress. This patch introduces such notifications and causes them to be used during suspend and resume transitions. It also changes all of the CPU-hotplug-aware subsystems to take these notifications into consideration (for now they are handled in the same way as the corresponding "normal" ones). [oleg@tv-sign.ru: cleanups] Signed-off-by: Rafael J. Wysocki Cc: Gautham R Shenoy Cc: Pavel Machek Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 2 +- fs/xfs/xfs_mount.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index fc2d763a8d78..aecd057cd0e0 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2946,7 +2946,7 @@ static void buffer_exit_cpu(int cpu) static int buffer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - if (action == CPU_DEAD) + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) buffer_exit_cpu((unsigned long)hcpu); return NOTIFY_OK; } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index f5aa3ef855fb..a96bde6df96d 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1734,11 +1734,13 @@ xfs_icsb_cpu_notify( per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu); switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: /* Easy Case - initialize the area and locks, and * then rebalance when online does everything else for us. */ memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); break; case CPU_ONLINE: + case CPU_ONLINE_FROZEN: xfs_icsb_lock(mp); xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0); xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0); @@ -1746,6 +1748,7 @@ xfs_icsb_cpu_notify( xfs_icsb_unlock(mp); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: /* Disable all the counters, then fold the dead cpu's * count into the total on the global superblock and * re-enable the counters. */ -- cgit v1.2.2 From e62c2bba1fb7cf068eb78d731da46e4447a9efb1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 9 May 2007 09:00:17 -0400 Subject: NFS: Fix a jiffie wraparound issue dentry verifiers are always set to the parent directory's cache_change_attribute. There is no reason to be testing for anything other than equality when we're trying to find out if the dentry has been checked since the last time the directory was modified. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 625d8e5fb39d..fced7d1d48de 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -650,12 +650,17 @@ int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) */ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) { + unsigned long verf; + if (IS_ROOT(dentry)) return 1; + verf = (unsigned long)dentry->d_fsdata; if ((NFS_I(dir)->cache_validity & NFS_INO_INVALID_ATTR) != 0 - || nfs_attribute_timeout(dir)) + || nfs_attribute_timeout(dir) + || nfs_caches_unstable(dir) + || verf != NFS_I(dir)->cache_change_attribute) return 0; - return nfs_verify_change_attribute(dir, (unsigned long)dentry->d_fsdata); + return 1; } static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) @@ -665,8 +670,7 @@ static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) static void nfs_refresh_verifier(struct dentry * dentry, unsigned long verf) { - if (time_after(verf, (unsigned long)dentry->d_fsdata)) - nfs_set_verifier(dentry, verf); + nfs_set_verifier(dentry, verf); } /* -- cgit v1.2.2 From e70c490810dc683fad39e57cf00e69d5f120c542 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 9 May 2007 09:00:18 -0400 Subject: NFS: Remove redundant check in nfs_check_verifier() The check for nfs_attribute_timeout(dir) in nfs_check_verifier is redundant: nfs_lookup_revalidate() will already call nfs_revalidate_inode() on the parent dir when necessary. The only case where this is not done is the case of a negative dentry. Fix this case by moving up the revalidation code. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index fced7d1d48de..469cf66e1dc0 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -655,9 +655,7 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) if (IS_ROOT(dentry)) return 1; verf = (unsigned long)dentry->d_fsdata; - if ((NFS_I(dir)->cache_validity & NFS_INO_INVALID_ATTR) != 0 - || nfs_attribute_timeout(dir) - || nfs_caches_unstable(dir) + if (nfs_caches_unstable(dir) || verf != NFS_I(dir)->cache_change_attribute) return 0; return 1; @@ -769,6 +767,10 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); inode = dentry->d_inode; + /* Revalidate parent directory attribute cache */ + if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) + goto out_zap_parent; + if (!inode) { if (nfs_neg_need_reval(dir, dentry, nd)) goto out_bad; @@ -782,10 +784,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) goto out_bad; } - /* Revalidate parent directory attribute cache */ - if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) - goto out_zap_parent; - /* Force a full look up iff the parent directory has changed */ if (nfs_check_verifier(dir, dentry)) { if (nfs_lookup_verify_inode(inode, nd)) -- cgit v1.2.2 From 6ce7dc940701cf3fde3c6e826a696b333092cbb1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 8 May 2007 18:23:28 -0400 Subject: NFS: NFS client underestimates how large an NFSv4 SETATTR reply can be The maximum size of an NFSv4 SETATTR compound reply should include the GETATTR operation that we send. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index b8c28f2380a5..f1e2b8cdf33c 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -224,7 +224,8 @@ static int nfs4_stat_to_errno(int); encode_getattr_maxsz) #define NFS4_dec_setattr_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - op_decode_hdr_maxsz + 3) + op_decode_hdr_maxsz + 3 + \ + nfs4_fattr_maxsz) #define NFS4_enc_fsinfo_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_fsinfo_maxsz) -- cgit v1.2.2 From e4cc6ee2e40bdd57990577b7f851fa2ca48edf47 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 8 May 2007 18:23:28 -0400 Subject: NFS: Clean up NFSv4 XDR error message Make it more useful for debugging purposes. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index f1e2b8cdf33c..938f37166788 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2080,9 +2080,11 @@ out: #define READ_BUF(nbytes) do { \ p = xdr_inline_decode(xdr, nbytes); \ - if (!p) { \ - printk(KERN_WARNING "%s: reply buffer overflowed in line %d.", \ - __FUNCTION__, __LINE__); \ + if (unlikely(!p)) { \ + printk(KERN_INFO "%s: prematurely hit end of receive" \ + " buffer\n", __FUNCTION__); \ + printk(KERN_INFO "%s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \ + __FUNCTION__, xdr->p, nbytes, xdr->end); \ return -EIO; \ } \ } while (0) -- cgit v1.2.2 From fee7f23feaf0845fdfd47d20cddc75652552fbb8 Mon Sep 17 00:00:00 2001 From: Milind Arun Choudhary Date: Thu, 26 Apr 2007 00:29:03 -0700 Subject: NFS: use __set_current_state() use __set_current_state(TASK_*) instead of current->state = TASK_*, in fs/nfs Signed-off-by: Milind Arun Choudhary Cc: Trond Myklebust Cc: "J. Bruce Fields" Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 9d4a6b2d1996..d11eb055265c 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -272,7 +272,7 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h, set_current_state(TASK_UNINTERRUPTIBLE); mutex_unlock(&idmap->idmap_im_lock); schedule(); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); remove_wait_queue(&idmap->idmap_wq, &wq); mutex_lock(&idmap->idmap_im_lock); @@ -333,7 +333,7 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, set_current_state(TASK_UNINTERRUPTIBLE); mutex_unlock(&idmap->idmap_im_lock); schedule(); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); remove_wait_queue(&idmap->idmap_wq, &wq); mutex_lock(&idmap->idmap_im_lock); -- cgit v1.2.2 From 7a13e932281e7042a592f4f14db0b348199e7aac Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Thu, 26 Apr 2007 00:29:02 -0700 Subject: NFS: Kill the obsolete NFS_PARANOIA Signed-off-by: Jesper Juhl Acked-by: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 17 ++--------------- fs/nfs/getroot.c | 1 - fs/nfs/inode.c | 3 --- fs/nfs/nfs2xdr.c | 1 - fs/nfs/pagelist.c | 7 ------- 5 files changed, 2 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 469cf66e1dc0..3df428816559 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -38,7 +38,6 @@ #include "delegation.h" #include "iostat.h" -#define NFS_PARANOIA 1 /* #define NFS_DEBUG_VERBOSE 1 */ static int nfs_opendir(struct inode *, struct file *); @@ -1362,11 +1361,6 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) atomic_read(&dentry->d_count)); nfs_inc_stats(dir, NFSIOS_SILLYRENAME); -#ifdef NFS_PARANOIA -if (!dentry->d_inode) -printk("NFS: silly-renaming %s/%s, negative dentry??\n", -dentry->d_parent->d_name.name, dentry->d_name.name); -#endif /* * We don't allow a dentry to be silly-renamed twice. */ @@ -1683,16 +1677,9 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_inode = NULL; /* instantiate the replacement target */ d_instantiate(new_dentry, NULL); - } else if (atomic_read(&new_dentry->d_count) > 1) { - /* dentry still busy? */ -#ifdef NFS_PARANOIA - printk("nfs_rename: target %s/%s busy, d_count=%d\n", - new_dentry->d_parent->d_name.name, - new_dentry->d_name.name, - atomic_read(&new_dentry->d_count)); -#endif + } else if (atomic_read(&new_dentry->d_count) > 1) + /* dentry still busy? */ goto out; - } } else drop_nlink(new_inode); diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 234778576f09..d1cbf0a0fbb2 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -41,7 +41,6 @@ #include "internal.h" #define NFSDBG_FACILITY NFSDBG_CLIENT -#define NFS_PARANOIA 1 /* * get an NFS2/NFS3 root dentry from the root filehandle diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 1e9a915d1fea..2a3fd9573207 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -48,7 +48,6 @@ #include "internal.h" #define NFSDBG_FACILITY NFSDBG_VFS -#define NFS_PARANOIA 1 static void nfs_invalidate_inode(struct inode *); static int nfs_update_inode(struct inode *, struct nfs_fattr *); @@ -1075,10 +1074,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* * Big trouble! The inode has become a different object. */ -#ifdef NFS_PARANOIA printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n", __FUNCTION__, inode->i_ino, inode->i_mode, fattr->mode); -#endif out_err: /* * No need to worry about unhashing the dentry, as the diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index abd9f8b48943..cd3ca7b5d3db 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -26,7 +26,6 @@ #include "internal.h" #define NFSDBG_FACILITY NFSDBG_XDR -/* #define NFS_PARANOIA 1 */ /* Mapping from NFS error code to "errno" error code. */ #define errno_NFSERR_IO EIO diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 388950118f59..e12054c86d0d 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -20,8 +20,6 @@ #include "internal.h" -#define NFS_PARANOIA 1 - static struct kmem_cache *nfs_page_cachep; static inline struct nfs_page * @@ -167,11 +165,6 @@ nfs_release_request(struct nfs_page *req) if (!atomic_dec_and_test(&req->wb_count)) return; -#ifdef NFS_PARANOIA - BUG_ON (!list_empty(&req->wb_list)); - BUG_ON (NFS_WBACK_BUSY(req)); -#endif - /* Release struct file or cached credential */ nfs_clear_request(req); put_nfs_open_context(req->wb_context); -- cgit v1.2.2