aboutsummaryrefslogtreecommitdiffstats
path: root/fs/gfs2
diff options
context:
space:
mode:
authorSteven Whitehouse <swhiteho@redhat.com>2006-02-08 06:50:51 -0500
committerSteven Whitehouse <swhiteho@redhat.com>2006-02-08 06:50:51 -0500
commit18ec7d5c3f434aed9661ed10a9e1f48cdeb4981d (patch)
treea7161a4c4b3592052e6772e1c23849de16cac649 /fs/gfs2
parent257f9b4e97e9a6cceeb247cead92119a4396d37b (diff)
[GFS2] Make journaled data files identical to normal files on disk
This is a very large patch, with a few still to be resolved issues so you might want to check out the previous head of the tree since this is known to be unstable. Fixes for the various bugs will be forthcoming shortly. This patch removes the special data format which has been used up till now for journaled data files. Directories still retain the old format so that they will remain on disk compatible with earlier releases. As a result you can now do the following with journaled data files: 1) mmap them 2) export them over NFS 3) convert to/from normal files whenever you want to (the zero length restriction is gone) In addition the level at which GFS' locking is done has changed for all files (since they all now use the page cache) such that the locking is done at the page cache level rather than the level of the fs operations. This should mean that things like loopback mounts and other things which touch the page cache directly should now work. Current known issues: 1. There is a lock mode inversion problem related to the resource group hold function which needs to be resolved. 2. Any significant amount of I/O causes an oops with an offset of hex 320 (NULL pointer dereference) which appears to be related to a journaled data buffer appearing on a list where it shouldn't be. 3. Direct I/O writes are disabled for the time being (will reappear later) 4. There is probably a deadlock between the page lock and GFS' locks under certain combinations of mmap and fs operation I/O. 5. Issue relating to ref counting on internally used inodes causes a hang on umount (discovered before this patch, and not fixed by it) 6. One part of the directory metadata is different from GFS1 and will need to be resolved before next release. Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/gfs2')
-rw-r--r--fs/gfs2/Makefile1
-rw-r--r--fs/gfs2/bmap.c59
-rw-r--r--fs/gfs2/dir.c4
-rw-r--r--fs/gfs2/dir.h2
-rw-r--r--fs/gfs2/inode.h7
-rw-r--r--fs/gfs2/jdata.c389
-rw-r--r--fs/gfs2/jdata.h52
-rw-r--r--fs/gfs2/log.c4
-rw-r--r--fs/gfs2/lops.c280
-rw-r--r--fs/gfs2/meta_io.c16
-rw-r--r--fs/gfs2/ops_address.c260
-rw-r--r--fs/gfs2/ops_file.c967
-rw-r--r--fs/gfs2/ops_vm.c3
-rw-r--r--fs/gfs2/page.c10
-rw-r--r--fs/gfs2/quota.c114
-rw-r--r--fs/gfs2/trans.c19
-rw-r--r--fs/gfs2/trans.h1
-rw-r--r--fs/gfs2/util.c3
18 files changed, 714 insertions, 1477 deletions
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index 4e87b8661af0..88f927948113 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -10,7 +10,6 @@ gfs2-y := \
10 glock.o \ 10 glock.o \
11 glops.o \ 11 glops.o \
12 inode.o \ 12 inode.o \
13 jdata.o \
14 lm.o \ 13 lm.o \
15 log.o \ 14 log.o \
16 lops.o \ 15 lops.o \
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index bd194f645c52..4efcd8a39e98 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -18,12 +18,12 @@
18#include "bmap.h" 18#include "bmap.h"
19#include "glock.h" 19#include "glock.h"
20#include "inode.h" 20#include "inode.h"
21#include "jdata.h"
22#include "meta_io.h" 21#include "meta_io.h"
23#include "page.h" 22#include "page.h"
24#include "quota.h" 23#include "quota.h"
25#include "rgrp.h" 24#include "rgrp.h"
26#include "trans.h" 25#include "trans.h"
26#include "dir.h"
27 27
28/* This doesn't need to be that large as max 64 bit pointers in a 4k 28/* This doesn't need to be that large as max 64 bit pointers in a 4k
29 * block is 512, so __u16 is fine for that. It saves stack space to 29 * block is 512, so __u16 is fine for that. It saves stack space to
@@ -90,7 +90,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, gfs2_unstuffer_t unstuffer,
90{ 90{
91 struct buffer_head *bh, *dibh; 91 struct buffer_head *bh, *dibh;
92 uint64_t block = 0; 92 uint64_t block = 0;
93 int journaled = gfs2_is_jdata(ip); 93 int isdir = gfs2_is_dir(ip);
94 int error; 94 int error;
95 95
96 down_write(&ip->i_rw_mutex); 96 down_write(&ip->i_rw_mutex);
@@ -103,10 +103,10 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, gfs2_unstuffer_t unstuffer,
103 /* Get a free block, fill it with the stuffed data, 103 /* Get a free block, fill it with the stuffed data,
104 and write it out to disk */ 104 and write it out to disk */
105 105
106 if (journaled) { 106 if (isdir) {
107 block = gfs2_alloc_meta(ip); 107 block = gfs2_alloc_meta(ip);
108 108
109 error = gfs2_jdata_get_buffer(ip, block, 1, &bh); 109 error = gfs2_dir_get_buffer(ip, block, 1, &bh);
110 if (error) 110 if (error)
111 goto out_brelse; 111 goto out_brelse;
112 gfs2_buffer_copy_tail(bh, 112 gfs2_buffer_copy_tail(bh,
@@ -168,7 +168,7 @@ static unsigned int calc_tree_height(struct gfs2_inode *ip, uint64_t size)
168 if (ip->i_di.di_size > size) 168 if (ip->i_di.di_size > size)
169 size = ip->i_di.di_size; 169 size = ip->i_di.di_size;
170 170
171 if (gfs2_is_jdata(ip)) { 171 if (gfs2_is_dir(ip)) {
172 arr = sdp->sd_jheightsize; 172 arr = sdp->sd_jheightsize;
173 max = sdp->sd_max_jheight; 173 max = sdp->sd_max_jheight;
174 } else { 174 } else {
@@ -377,7 +377,7 @@ static void lookup_block(struct gfs2_inode *ip, struct buffer_head *bh,
377 return; 377 return;
378 378
379 if (height == ip->i_di.di_height - 1 && 379 if (height == ip->i_di.di_height - 1 &&
380 !gfs2_is_jdata(ip)) 380 !gfs2_is_dir(ip))
381 *block = gfs2_alloc_data(ip); 381 *block = gfs2_alloc_data(ip);
382 else 382 else
383 *block = gfs2_alloc_meta(ip); 383 *block = gfs2_alloc_meta(ip);
@@ -430,7 +430,7 @@ int gfs2_block_map(struct gfs2_inode *ip, uint64_t lblock, int *new,
430 if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip))) 430 if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
431 goto out; 431 goto out;
432 432
433 bsize = (gfs2_is_jdata(ip)) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize; 433 bsize = (gfs2_is_dir(ip)) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize;
434 434
435 height = calc_tree_height(ip, (lblock + 1) * bsize); 435 height = calc_tree_height(ip, (lblock + 1) * bsize);
436 if (ip->i_di.di_height < height) { 436 if (ip->i_di.di_height < height) {
@@ -618,7 +618,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
618 sm->sm_first = 0; 618 sm->sm_first = 0;
619 } 619 }
620 620
621 metadata = (height != ip->i_di.di_height - 1) || gfs2_is_jdata(ip); 621 metadata = (height != ip->i_di.di_height - 1);
622 if (metadata) 622 if (metadata)
623 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; 623 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
624 624
@@ -814,33 +814,6 @@ static int do_grow(struct gfs2_inode *ip, uint64_t size)
814 return error; 814 return error;
815} 815}
816 816
817static int truncator_journaled(struct gfs2_inode *ip, uint64_t size)
818{
819 uint64_t lbn, dbn;
820 uint32_t off;
821 struct buffer_head *bh;
822 int new = 0;
823 int error;
824
825 lbn = size;
826 off = do_div(lbn, ip->i_sbd->sd_jbsize);
827
828 error = gfs2_block_map(ip, lbn, &new, &dbn, NULL);
829 if (error || !dbn)
830 return error;
831
832 error = gfs2_jdata_get_buffer(ip, dbn, 0, &bh);
833 if (error)
834 return error;
835
836 gfs2_trans_add_bh(ip->i_gl, bh, 1);
837 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header) + off);
838
839 brelse(bh);
840
841 return 0;
842}
843
844static int trunc_start(struct gfs2_inode *ip, uint64_t size) 817static int trunc_start(struct gfs2_inode *ip, uint64_t size)
845{ 818{
846 struct gfs2_sbd *sdp = ip->i_sbd; 819 struct gfs2_sbd *sdp = ip->i_sbd;
@@ -866,12 +839,7 @@ static int trunc_start(struct gfs2_inode *ip, uint64_t size)
866 error = 1; 839 error = 1;
867 840
868 } else { 841 } else {
869 if (journaled) { 842 if (size & (uint64_t)(sdp->sd_sb.sb_bsize - 1))
870 uint64_t junk = size;
871 /* we're just interested in the modulus */
872 if (do_div(junk, sdp->sd_jbsize))
873 error = truncator_journaled(ip, size);
874 } else if (size & (uint64_t)(sdp->sd_sb.sb_bsize - 1))
875 error = gfs2_block_truncate_page(ip->i_vnode->i_mapping); 843 error = gfs2_block_truncate_page(ip->i_vnode->i_mapping);
876 844
877 if (!error) { 845 if (!error) {
@@ -900,10 +868,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, uint64_t size)
900 868
901 if (!size) 869 if (!size)
902 lblock = 0; 870 lblock = 0;
903 else if (gfs2_is_jdata(ip)) { 871 else
904 lblock = size - 1;
905 do_div(lblock, ip->i_sbd->sd_jbsize);
906 } else
907 lblock = (size - 1) >> ip->i_sbd->sd_sb.sb_bsize_shift; 872 lblock = (size - 1) >> ip->i_sbd->sd_sb.sb_bsize_shift;
908 873
909 find_metapath(ip, lblock, &mp); 874 find_metapath(ip, lblock, &mp);
@@ -1051,7 +1016,7 @@ void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len,
1051 struct gfs2_sbd *sdp = ip->i_sbd; 1016 struct gfs2_sbd *sdp = ip->i_sbd;
1052 unsigned int tmp; 1017 unsigned int tmp;
1053 1018
1054 if (gfs2_is_jdata(ip)) { 1019 if (gfs2_is_dir(ip)) {
1055 *data_blocks = DIV_RU(len, sdp->sd_jbsize) + 2; 1020 *data_blocks = DIV_RU(len, sdp->sd_jbsize) + 2;
1056 *ind_blocks = 3 * (sdp->sd_max_jheight - 1); 1021 *ind_blocks = 3 * (sdp->sd_max_jheight - 1);
1057 } else { 1022 } else {
@@ -1096,7 +1061,7 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, uint64_t offset,
1096 return 0; 1061 return 0;
1097 } 1062 }
1098 1063
1099 if (gfs2_is_jdata(ip)) { 1064 if (gfs2_is_dir(ip)) {
1100 unsigned int bsize = sdp->sd_jbsize; 1065 unsigned int bsize = sdp->sd_jbsize;
1101 lblock = offset; 1066 lblock = offset;
1102 do_div(lblock, bsize); 1067 do_div(lblock, bsize);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index ada283a0f5f3..c77e18048d98 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -86,8 +86,8 @@ typedef int (*leaf_call_t) (struct gfs2_inode *dip,
86 uint32_t index, uint32_t len, uint64_t leaf_no, 86 uint32_t index, uint32_t len, uint64_t leaf_no,
87 void *data); 87 void *data);
88 88
89static int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new, 89int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
90 struct buffer_head **bhp) 90 struct buffer_head **bhp)
91{ 91{
92 struct buffer_head *bh; 92 struct buffer_head *bh;
93 int error = 0; 93 int error = 0;
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index ff6d1c597ee9..5b01497b3ab3 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -45,5 +45,7 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
45 45
46int gfs2_diradd_alloc_required(struct gfs2_inode *dip, struct qstr *filename, 46int gfs2_diradd_alloc_required(struct gfs2_inode *dip, struct qstr *filename,
47 int *alloc_required); 47 int *alloc_required);
48int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
49 struct buffer_head **bhp);
48 50
49#endif /* __DIR_DOT_H__ */ 51#endif /* __DIR_DOT_H__ */
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index e42ae38d6778..214975c6bb22 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -20,6 +20,11 @@ static inline int gfs2_is_jdata(struct gfs2_inode *ip)
20 return ip->i_di.di_flags & GFS2_DIF_JDATA; 20 return ip->i_di.di_flags & GFS2_DIF_JDATA;
21} 21}
22 22
23static inline int gfs2_is_dir(struct gfs2_inode *ip)
24{
25 return S_ISDIR(ip->i_di.di_mode);
26}
27
23void gfs2_inode_attr_in(struct gfs2_inode *ip); 28void gfs2_inode_attr_in(struct gfs2_inode *ip);
24void gfs2_inode_attr_out(struct gfs2_inode *ip); 29void gfs2_inode_attr_out(struct gfs2_inode *ip);
25struct inode *gfs2_ip2v_lookup(struct gfs2_inode *ip); 30struct inode *gfs2_ip2v_lookup(struct gfs2_inode *ip);
@@ -72,9 +77,9 @@ static inline int gfs2_lookup_simple(struct inode *dip, char *name,
72 err = gfs2_lookupi(get_v2ip(dip), &qstr, 1, &ip); 77 err = gfs2_lookupi(get_v2ip(dip), &qstr, 1, &ip);
73 if (err == 0) { 78 if (err == 0) {
74 *ipp = gfs2_ip2v(ip); 79 *ipp = gfs2_ip2v(ip);
80 gfs2_inode_put(ip);
75 if (*ipp == NULL) 81 if (*ipp == NULL)
76 err = -ENOMEM; 82 err = -ENOMEM;
77 gfs2_inode_put(ip);
78 } 83 }
79 return err; 84 return err;
80} 85}
diff --git a/fs/gfs2/jdata.c b/fs/gfs2/jdata.c
deleted file mode 100644
index e43eaf133f10..000000000000
--- a/fs/gfs2/jdata.c
+++ /dev/null
@@ -1,389 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16#include <asm/uaccess.h>
17
18#include "gfs2.h"
19#include "bmap.h"
20#include "inode.h"
21#include "jdata.h"
22#include "meta_io.h"
23#include "trans.h"
24
25int gfs2_internal_read(struct gfs2_inode *ip,
26 struct file_ra_state *ra_state,
27 char *buf, loff_t *pos, unsigned size)
28{
29 return gfs2_jdata_read_mem(ip, buf, *pos, size);
30}
31
32int gfs2_jdata_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
33 struct buffer_head **bhp)
34{
35 struct buffer_head *bh;
36 int error = 0;
37
38 if (new) {
39 bh = gfs2_meta_new(ip->i_gl, block);
40 gfs2_trans_add_bh(ip->i_gl, bh, 1);
41 gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD);
42 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
43 } else {
44 error = gfs2_meta_read(ip->i_gl, block,
45 DIO_START | DIO_WAIT, &bh);
46 if (error)
47 return error;
48 if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_JD)) {
49 brelse(bh);
50 return -EIO;
51 }
52 }
53
54 *bhp = bh;
55
56 return 0;
57}
58
59/**
60 * gfs2_copy2mem - Trivial copy function for gfs2_jdata_read()
61 * @bh: The buffer to copy from, or NULL meaning zero the buffer
62 * @buf: The buffer to copy/zero
63 * @offset: The offset in the buffer to copy from
64 * @size: The amount of data to copy/zero
65 *
66 * Returns: errno
67 */
68
69int gfs2_copy2mem(struct buffer_head *bh, char **buf, unsigned int offset,
70 unsigned int size)
71{
72 if (bh)
73 memcpy(*buf, bh->b_data + offset, size);
74 else
75 memset(*buf, 0, size);
76 *buf += size;
77 return 0;
78}
79
80/**
81 * gfs2_copy2user - Copy bytes to user space for gfs2_jdata_read()
82 * @bh: The buffer
83 * @buf: The destination of the data
84 * @offset: The offset into the buffer
85 * @size: The amount of data to copy
86 *
87 * Returns: errno
88 */
89
90int gfs2_copy2user(struct buffer_head *bh, char **buf, unsigned int offset,
91 unsigned int size)
92{
93 int error;
94
95 if (bh)
96 error = copy_to_user(*buf, bh->b_data + offset, size);
97 else
98 error = clear_user(*buf, size);
99
100 if (error)
101 error = -EFAULT;
102 else
103 *buf += size;
104
105 return error;
106}
107
108static int jdata_read_stuffed(struct gfs2_inode *ip, char *buf,
109 unsigned int offset, unsigned int size,
110 read_copy_fn_t copy_fn)
111{
112 struct buffer_head *dibh;
113 int error;
114
115 error = gfs2_meta_inode_buffer(ip, &dibh);
116 if (!error) {
117 error = copy_fn(dibh, &buf,
118 offset + sizeof(struct gfs2_dinode), size);
119 brelse(dibh);
120 }
121
122 return (error) ? error : size;
123}
124
125/**
126 * gfs2_jdata_read - Read a jdata file
127 * @ip: The GFS2 Inode
128 * @buf: The buffer to place result into
129 * @offset: File offset to begin jdata_readng from
130 * @size: Amount of data to transfer
131 * @copy_fn: Function to actually perform the copy
132 *
133 * The @copy_fn only copies a maximum of a single block at once so
134 * we are safe calling it with int arguments. It is done so that
135 * we don't needlessly put 64bit arguments on the stack and it
136 * also makes the code in the @copy_fn nicer too.
137 *
138 * Returns: The amount of data actually copied or the error
139 */
140
141int gfs2_jdata_read(struct gfs2_inode *ip, char __user *buf, uint64_t offset,
142 unsigned int size, read_copy_fn_t copy_fn)
143{
144 struct gfs2_sbd *sdp = ip->i_sbd;
145 uint64_t lblock, dblock;
146 uint32_t extlen = 0;
147 unsigned int o;
148 int copied = 0;
149 int error = 0;
150
151 if (offset >= ip->i_di.di_size)
152 return 0;
153
154 if ((offset + size) > ip->i_di.di_size)
155 size = ip->i_di.di_size - offset;
156
157 if (!size)
158 return 0;
159
160 if (gfs2_is_stuffed(ip))
161 return jdata_read_stuffed(ip, buf, (unsigned int)offset, size,
162 copy_fn);
163
164 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
165 return -EINVAL;
166
167 lblock = offset;
168 o = do_div(lblock, sdp->sd_jbsize) +
169 sizeof(struct gfs2_meta_header);
170
171 while (copied < size) {
172 unsigned int amount;
173 struct buffer_head *bh;
174 int new;
175
176 amount = size - copied;
177 if (amount > sdp->sd_sb.sb_bsize - o)
178 amount = sdp->sd_sb.sb_bsize - o;
179
180 if (!extlen) {
181 new = 0;
182 error = gfs2_block_map(ip, lblock, &new,
183 &dblock, &extlen);
184 if (error)
185 goto fail;
186 }
187
188 if (extlen > 1)
189 gfs2_meta_ra(ip->i_gl, dblock, extlen);
190
191 if (dblock) {
192 error = gfs2_jdata_get_buffer(ip, dblock, new, &bh);
193 if (error)
194 goto fail;
195 dblock++;
196 extlen--;
197 } else
198 bh = NULL;
199
200 error = copy_fn(bh, &buf, o, amount);
201 brelse(bh);
202 if (error)
203 goto fail;
204
205 copied += amount;
206 lblock++;
207
208 o = sizeof(struct gfs2_meta_header);
209 }
210
211 return copied;
212
213 fail:
214 return (copied) ? copied : error;
215}
216
217/**
218 * gfs2_copy_from_mem - Trivial copy function for gfs2_jdata_write()
219 * @bh: The buffer to copy to or clear
220 * @buf: The buffer to copy from
221 * @offset: The offset in the buffer to write to
222 * @size: The amount of data to write
223 *
224 * Returns: errno
225 */
226
227int gfs2_copy_from_mem(struct gfs2_inode *ip, struct buffer_head *bh,
228 const char **buf, unsigned int offset, unsigned int size)
229{
230 gfs2_trans_add_bh(ip->i_gl, bh, 1);
231 memcpy(bh->b_data + offset, *buf, size);
232
233 *buf += size;
234
235 return 0;
236}
237
238/**
239 * gfs2_copy_from_user - Copy bytes from user space for gfs2_jdata_write()
240 * @bh: The buffer to copy to or clear
241 * @buf: The buffer to copy from
242 * @offset: The offset in the buffer to write to
243 * @size: The amount of data to write
244 *
245 * Returns: errno
246 */
247
248int gfs2_copy_from_user(struct gfs2_inode *ip, struct buffer_head *bh,
249 const char __user **buf, unsigned int offset, unsigned int size)
250{
251 int error = 0;
252
253 gfs2_trans_add_bh(ip->i_gl, bh, 1);
254 if (copy_from_user(bh->b_data + offset, *buf, size))
255 error = -EFAULT;
256 else
257 *buf += size;
258
259 return error;
260}
261
262static int jdata_write_stuffed(struct gfs2_inode *ip, char *buf,
263 unsigned int offset, unsigned int size,
264 write_copy_fn_t copy_fn)
265{
266 struct buffer_head *dibh;
267 int error;
268
269 error = gfs2_meta_inode_buffer(ip, &dibh);
270 if (error)
271 return error;
272
273 error = copy_fn(ip,
274 dibh, &buf,
275 offset + sizeof(struct gfs2_dinode), size);
276 if (!error) {
277 if (ip->i_di.di_size < offset + size)
278 ip->i_di.di_size = offset + size;
279 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
280 gfs2_dinode_out(&ip->i_di, dibh->b_data);
281 }
282
283 brelse(dibh);
284
285 return (error) ? error : size;
286}
287
288/**
289 * gfs2_jdata_write - Write bytes to a file
290 * @ip: The GFS2 inode
291 * @buf: The buffer containing information to be written
292 * @offset: The file offset to start writing at
293 * @size: The amount of data to write
294 * @copy_fn: Function to do the actual copying
295 *
296 * Returns: The number of bytes correctly written or error code
297 */
298
299int gfs2_jdata_write(struct gfs2_inode *ip, const char __user *buf, uint64_t offset,
300 unsigned int size, write_copy_fn_t copy_fn)
301{
302 struct gfs2_sbd *sdp = ip->i_sbd;
303 struct buffer_head *dibh;
304 uint64_t lblock, dblock;
305 uint32_t extlen = 0;
306 unsigned int o;
307 int copied = 0;
308 int error = 0;
309
310 if (!size)
311 return 0;
312
313 if (gfs2_is_stuffed(ip) &&
314 offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
315 return jdata_write_stuffed(ip, buf, (unsigned int)offset, size,
316 copy_fn);
317
318 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
319 return -EINVAL;
320
321 if (gfs2_is_stuffed(ip)) {
322 error = gfs2_unstuff_dinode(ip, NULL, NULL);
323 if (error)
324 return error;
325 }
326
327 lblock = offset;
328 o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
329
330 while (copied < size) {
331 unsigned int amount;
332 struct buffer_head *bh;
333 int new;
334
335 amount = size - copied;
336 if (amount > sdp->sd_sb.sb_bsize - o)
337 amount = sdp->sd_sb.sb_bsize - o;
338
339 if (!extlen) {
340 new = 1;
341 error = gfs2_block_map(ip, lblock, &new,
342 &dblock, &extlen);
343 if (error)
344 goto fail;
345 error = -EIO;
346 if (gfs2_assert_withdraw(sdp, dblock))
347 goto fail;
348 }
349
350 error = gfs2_jdata_get_buffer(ip, dblock,
351 (amount == sdp->sd_jbsize) ? 1 : new,
352 &bh);
353 if (error)
354 goto fail;
355
356 error = copy_fn(ip, bh, &buf, o, amount);
357 brelse(bh);
358 if (error)
359 goto fail;
360
361 copied += amount;
362 lblock++;
363 dblock++;
364 extlen--;
365
366 o = sizeof(struct gfs2_meta_header);
367 }
368
369 out:
370 error = gfs2_meta_inode_buffer(ip, &dibh);
371 if (error)
372 return error;
373
374 if (ip->i_di.di_size < offset + copied)
375 ip->i_di.di_size = offset + copied;
376 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
377
378 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
379 gfs2_dinode_out(&ip->i_di, dibh->b_data);
380 brelse(dibh);
381
382 return copied;
383
384 fail:
385 if (copied)
386 goto out;
387 return error;
388}
389
diff --git a/fs/gfs2/jdata.h b/fs/gfs2/jdata.h
deleted file mode 100644
index 95e18fcb8f82..000000000000
--- a/fs/gfs2/jdata.h
+++ /dev/null
@@ -1,52 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __FILE_DOT_H__
11#define __FILE_DOT_H__
12
13int gfs2_jdata_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
14 struct buffer_head **bhp);
15
16typedef int (*read_copy_fn_t) (struct buffer_head *bh, char **buf,
17 unsigned int offset, unsigned int size);
18typedef int (*write_copy_fn_t) (struct gfs2_inode *ip,
19 struct buffer_head *bh, const char **buf,
20 unsigned int offset, unsigned int size);
21
22int gfs2_copy2mem(struct buffer_head *bh, char **buf,
23 unsigned int offset, unsigned int size);
24int gfs2_copy2user(struct buffer_head *bh, char __user **buf,
25 unsigned int offset, unsigned int size);
26int gfs2_jdata_read(struct gfs2_inode *ip, char __user *buf,
27 uint64_t offset, unsigned int size,
28 read_copy_fn_t copy_fn);
29
30int gfs2_copy_from_mem(struct gfs2_inode *ip,
31 struct buffer_head *bh, const char **buf,
32 unsigned int offset, unsigned int size);
33int gfs2_copy_from_user(struct gfs2_inode *ip,
34 struct buffer_head *bh, const char __user **buf,
35 unsigned int offset, unsigned int size);
36int gfs2_jdata_write(struct gfs2_inode *ip, const char __user *buf,
37 uint64_t offset, unsigned int size,
38 write_copy_fn_t copy_fn);
39
40static inline int gfs2_jdata_read_mem(struct gfs2_inode *ip, char *buf,
41 uint64_t offset, unsigned int size)
42{
43 return gfs2_jdata_read(ip, (__force char __user *)buf, offset, size, gfs2_copy2mem);
44}
45
46static inline int gfs2_jdata_write_mem(struct gfs2_inode *ip, const char *buf,
47 uint64_t offset, unsigned int size)
48{
49 return gfs2_jdata_write(ip, (__force const char __user *)buf, offset, size, gfs2_copy_from_mem);
50}
51
52#endif /* __FILE_DOT_H__ */
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f6d00130f96f..9b4484d366ca 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -387,8 +387,7 @@ struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
387 bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL); 387 bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
388 atomic_set(&bh->b_count, 1); 388 atomic_set(&bh->b_count, 1);
389 bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate); 389 bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate);
390 set_bh_page(bh, virt_to_page(real->b_data), 390 set_bh_page(bh, real->b_page, bh_offset(real));
391 ((unsigned long)real->b_data) & (PAGE_SIZE - 1));
392 bh->b_blocknr = blkno; 391 bh->b_blocknr = blkno;
393 bh->b_size = sdp->sd_sb.sb_bsize; 392 bh->b_size = sdp->sd_sb.sb_bsize;
394 bh->b_bdev = sdp->sd_vfs->s_bdev; 393 bh->b_bdev = sdp->sd_vfs->s_bdev;
@@ -634,6 +633,7 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
634 gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); 633 gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
635 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl); 634 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl);
636 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf); 635 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf);
636 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_jdata);
637 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); 637 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
638 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg); 638 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
639 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf); 639 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index a065f7667238..dd41863810d7 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -428,49 +428,188 @@ static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
428 gfs2_assert_warn(sdp, !sdp->sd_log_num_rg); 428 gfs2_assert_warn(sdp, !sdp->sd_log_num_rg);
429} 429}
430 430
431/**
432 * databuf_lo_add - Add a databuf to the transaction.
433 *
434 * This is used in two distinct cases:
435 * i) In ordered write mode
436 * We put the data buffer on a list so that we can ensure that its
437 * synced to disk at the right time
438 * ii) In journaled data mode
439 * We need to journal the data block in the same way as metadata in
440 * the functions above. The difference is that here we have a tag
441 * which is two __be64's being the block number (as per meta data)
442 * and a flag which says whether the data block needs escaping or
443 * not. This means we need a new log entry for each 251 or so data
444 * blocks, which isn't an enormous overhead but twice as much as
445 * for normal metadata blocks.
446 */
431static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 447static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
432{ 448{
433 get_transaction->tr_touched = 1; 449 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
450 struct gfs2_trans *tr = get_transaction;
451 struct address_space *mapping = bd->bd_bh->b_page->mapping;
452 struct gfs2_inode *ip = get_v2ip(mapping->host);
434 453
454 tr->tr_touched = 1;
455 if (!list_empty(&bd->bd_list_tr) &&
456 (ip->i_di.di_flags & GFS2_DIF_JDATA)) {
457 tr->tr_num_buf++;
458 gfs2_trans_add_gl(bd->bd_gl);
459 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
460 gfs2_pin(sdp, bd->bd_bh);
461 } else {
462 clear_buffer_pinned(bd->bd_bh);
463 }
435 gfs2_log_lock(sdp); 464 gfs2_log_lock(sdp);
465 if (ip->i_di.di_flags & GFS2_DIF_JDATA)
466 sdp->sd_log_num_jdata++;
436 sdp->sd_log_num_databuf++; 467 sdp->sd_log_num_databuf++;
437 list_add(&le->le_list, &sdp->sd_log_le_databuf); 468 list_add(&le->le_list, &sdp->sd_log_le_databuf);
438 gfs2_log_unlock(sdp); 469 gfs2_log_unlock(sdp);
439} 470}
440 471
472static int gfs2_check_magic(struct buffer_head *bh)
473{
474 struct page *page = bh->b_page;
475 void *kaddr;
476 __be32 *ptr;
477 int rv = 0;
478
479 kaddr = kmap_atomic(page, KM_USER0);
480 ptr = kaddr + bh_offset(bh);
481 if (*ptr == cpu_to_be32(GFS2_MAGIC))
482 rv = 1;
483 kunmap_atomic(page, KM_USER0);
484
485 return rv;
486}
487
488/**
489 * databuf_lo_before_commit - Scan the data buffers, writing as we go
490 *
491 * Here we scan through the lists of buffers and make the assumption
492 * that any buffer thats been pinned is being journaled, and that
493 * any unpinned buffer is an ordered write data buffer and therefore
494 * will be written back rather than journaled.
495 */
441static void databuf_lo_before_commit(struct gfs2_sbd *sdp) 496static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
442{ 497{
443 struct list_head *head = &sdp->sd_log_le_databuf;
444 LIST_HEAD(started); 498 LIST_HEAD(started);
445 struct gfs2_bufdata *bd; 499 struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
446 struct buffer_head *bh; 500 struct buffer_head *bh = NULL;
501 unsigned int offset = sizeof(struct gfs2_log_descriptor);
502 struct gfs2_log_descriptor *ld;
503 unsigned int limit;
504 unsigned int total_dbuf = sdp->sd_log_num_databuf;
505 unsigned int total_jdata = sdp->sd_log_num_jdata;
506 unsigned int num, n;
507 __be64 *ptr;
447 508
448 while (!list_empty(head)) { 509 offset += (2*sizeof(__be64) - 1);
449 bd = list_entry(head->prev, struct gfs2_bufdata, bd_le.le_list); 510 offset &= ~(2*sizeof(__be64) - 1);
450 list_move(&bd->bd_le.le_list, &started); 511 limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
451 512
452 gfs2_log_lock(sdp); 513 /* printk(KERN_INFO "totals: jdata=%u dbuf=%u\n", total_jdata, total_dbuf); */
453 bh = bd->bd_bh; 514 /*
515 * Start writing ordered buffers, write journaled buffers
516 * into the log along with a header
517 */
518 bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf, bd_le.le_list);
519 while(total_dbuf) {
520 num = total_jdata;
521 if (num > limit)
522 num = limit;
523 n = 0;
524 list_for_each_entry_safe_continue(bd1, bdt, &sdp->sd_log_le_databuf, bd_le.le_list) {
525 gfs2_log_lock(sdp);
526 /* An ordered write buffer */
527 if (bd1->bd_bh && !buffer_pinned(bd1->bd_bh)) {
528 list_move(&bd1->bd_le.le_list, &started);
529 if (bd1 == bd2) {
530 bd2 = NULL;
531 bd2 = list_prepare_entry(bd2, &sdp->sd_log_le_databuf, bd_le.le_list);
532 }
533 total_dbuf--;
534 if (bd1->bd_bh) {
535 get_bh(bd1->bd_bh);
536 gfs2_log_unlock(sdp);
537 if (buffer_dirty(bd1->bd_bh)) {
538 wait_on_buffer(bd1->bd_bh);
539 ll_rw_block(WRITE, 1, &bd1->bd_bh);
540 }
541 brelse(bd1->bd_bh);
542 continue;
543 }
544 gfs2_log_unlock(sdp);
545 continue;
546 } else if (bd1->bd_bh) { /* A journaled buffer */
547 int magic;
548 gfs2_log_unlock(sdp);
549 /* printk(KERN_INFO "journaled buffer\n"); */
550 if (!bh) {
551 bh = gfs2_log_get_buf(sdp);
552 ld = (struct gfs2_log_descriptor *)bh->b_data;
553 ptr = (__be64 *)(bh->b_data + offset);
554 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
555 ld->ld_header.mh_type = cpu_to_be16(GFS2_METATYPE_LD);
556 ld->ld_header.mh_format = cpu_to_be16(GFS2_FORMAT_LD);
557 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_JDATA);
558 ld->ld_length = cpu_to_be32(num + 1);
559 ld->ld_data1 = cpu_to_be32(num);
560 ld->ld_data2 = cpu_to_be32(0);
561 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
562 }
563 magic = gfs2_check_magic(bd1->bd_bh);
564 *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
565 *ptr++ = cpu_to_be64((__u64)magic);
566 clear_buffer_escaped(bd1->bd_bh);
567 if (unlikely(magic != 0))
568 set_buffer_escaped(bd1->bd_bh);
569 if (n++ > num)
570 break;
571 }
572 }
454 if (bh) { 573 if (bh) {
455 get_bh(bh); 574 set_buffer_dirty(bh);
456 gfs2_log_unlock(sdp); 575 ll_rw_block(WRITE, 1, &bh);
457 if (buffer_dirty(bh)) { 576 bh = NULL;
458 wait_on_buffer(bh); 577 }
459 ll_rw_block(WRITE, 1, &bh); 578 n = 0;
579 /* printk(KERN_INFO "totals2: jdata=%u dbuf=%u\n", total_jdata, total_dbuf); */
580 list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf, bd_le.le_list) {
581 if (!bd2->bd_bh)
582 continue;
583 /* copy buffer if it needs escaping */
584 if (unlikely(buffer_escaped(bd2->bd_bh))) {
585 void *kaddr;
586 struct page *page = bd2->bd_bh->b_page;
587 bh = gfs2_log_get_buf(sdp);
588 kaddr = kmap_atomic(page, KM_USER0);
589 memcpy(bh->b_data, kaddr + bh_offset(bd2->bd_bh), sdp->sd_sb.sb_bsize);
590 kunmap_atomic(page, KM_USER0);
591 *(__be32 *)bh->b_data = 0;
592 } else {
593 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
460 } 594 }
461 brelse(bh); 595 set_buffer_dirty(bh);
462 } else 596 ll_rw_block(WRITE, 1, &bh);
463 gfs2_log_unlock(sdp); 597 if (++n >= num)
598 break;
599 }
600 bh = NULL;
601 total_dbuf -= num;
602 total_jdata -= num;
464 } 603 }
465 604 /* printk(KERN_INFO "wait on ordered data buffers\n"); */
605 /* Wait on all ordered buffers */
466 while (!list_empty(&started)) { 606 while (!list_empty(&started)) {
467 bd = list_entry(started.next, struct gfs2_bufdata, 607 bd1 = list_entry(started.next, struct gfs2_bufdata, bd_le.le_list);
468 bd_le.le_list); 608 list_del(&bd1->bd_le.le_list);
469 list_del(&bd->bd_le.le_list);
470 sdp->sd_log_num_databuf--; 609 sdp->sd_log_num_databuf--;
471 610
472 gfs2_log_lock(sdp); 611 gfs2_log_lock(sdp);
473 bh = bd->bd_bh; 612 bh = bd1->bd_bh;
474 if (bh) { 613 if (bh) {
475 set_v2bd(bh, NULL); 614 set_v2bd(bh, NULL);
476 gfs2_log_unlock(sdp); 615 gfs2_log_unlock(sdp);
@@ -479,12 +618,103 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
479 } else 618 } else
480 gfs2_log_unlock(sdp); 619 gfs2_log_unlock(sdp);
481 620
482 kfree(bd); 621 kfree(bd1);
483 } 622 }
484 623
624 /* printk(KERN_INFO "sd_log_num_databuf %u sd_log_num_jdata %u\n", sdp->sd_log_num_databuf, sdp->sd_log_num_jdata); */
625 /* We've removed all the ordered write bufs here, so only jdata left */
626 gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata);
627}
628
629static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
630 struct gfs2_log_descriptor *ld,
631 __be64 *ptr, int pass)
632{
633 struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
634 struct gfs2_glock *gl = jd->jd_inode->i_gl;
635 unsigned int blks = be32_to_cpu(ld->ld_data1);
636 struct buffer_head *bh_log, *bh_ip;
637 uint64_t blkno;
638 uint64_t esc;
639 int error = 0;
640
641 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
642 return 0;
643
644 gfs2_replay_incr_blk(sdp, &start);
645 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
646 blkno = be64_to_cpu(*ptr++);
647 esc = be64_to_cpu(*ptr++);
648
649 sdp->sd_found_blocks++;
650
651 if (gfs2_revoke_check(sdp, blkno, start))
652 continue;
653
654 error = gfs2_replay_read_block(jd, start, &bh_log);
655 if (error)
656 return error;
657
658 bh_ip = gfs2_meta_new(gl, blkno);
659 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
660
661 /* Unescape */
662 if (esc) {
663 __be32 *eptr = (__be32 *)bh_ip->b_data;
664 *eptr = cpu_to_be32(GFS2_MAGIC);
665 }
666 mark_buffer_dirty(bh_ip);
667
668 brelse(bh_log);
669 brelse(bh_ip);
670 if (error)
671 break;
672
673 sdp->sd_replayed_blocks++;
674 }
675
676 return error;
677}
678
679/* FIXME: sort out accounting for log blocks etc. */
680
681static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
682{
683 struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
684
685 if (error) {
686 gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT);
687 return;
688 }
689 if (pass != 1)
690 return;
691
692 /* data sync? */
693 gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT);
694
695 fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
696 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
697}
698
699static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
700{
701 struct list_head *head = &sdp->sd_log_le_databuf;
702 struct gfs2_bufdata *bd;
703
704 while (!list_empty(head)) {
705 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
706 list_del_init(&bd->bd_le.le_list);
707 sdp->sd_log_num_databuf--;
708 sdp->sd_log_num_jdata--;
709 gfs2_unpin(sdp, bd->bd_bh, ai);
710 brelse(bd->bd_bh);
711 kfree(bd);
712 }
485 gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf); 713 gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
714 gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata);
486} 715}
487 716
717
488struct gfs2_log_operations gfs2_glock_lops = { 718struct gfs2_log_operations gfs2_glock_lops = {
489 .lo_add = glock_lo_add, 719 .lo_add = glock_lo_add,
490 .lo_after_commit = glock_lo_after_commit, 720 .lo_after_commit = glock_lo_after_commit,
@@ -519,7 +749,11 @@ struct gfs2_log_operations gfs2_rg_lops = {
519 749
520struct gfs2_log_operations gfs2_databuf_lops = { 750struct gfs2_log_operations gfs2_databuf_lops = {
521 .lo_add = databuf_lo_add, 751 .lo_add = databuf_lo_add,
752 .lo_incore_commit = buf_lo_incore_commit,
522 .lo_before_commit = databuf_lo_before_commit, 753 .lo_before_commit = databuf_lo_before_commit,
754 .lo_after_commit = databuf_lo_after_commit,
755 .lo_scan_elements = databuf_lo_scan_elements,
756 .lo_after_scan = databuf_lo_after_scan,
523 .lo_name = "databuf" 757 .lo_name = "databuf"
524}; 758};
525 759
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index b6bd2ebfc2cc..ef58d43b67ee 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -547,10 +547,12 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, int meta
547{ 547{
548 struct gfs2_bufdata *bd; 548 struct gfs2_bufdata *bd;
549 549
550 lock_page(bh->b_page); 550 if (meta)
551 lock_page(bh->b_page);
551 552
552 if (get_v2bd(bh)) { 553 if (get_v2bd(bh)) {
553 unlock_page(bh->b_page); 554 if (meta)
555 unlock_page(bh->b_page);
554 return; 556 return;
555 } 557 }
556 558
@@ -563,14 +565,16 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, int meta
563 bd->bd_gl = gl; 565 bd->bd_gl = gl;
564 566
565 INIT_LIST_HEAD(&bd->bd_list_tr); 567 INIT_LIST_HEAD(&bd->bd_list_tr);
566 if (meta) 568 if (meta) {
567 lops_init_le(&bd->bd_le, &gfs2_buf_lops); 569 lops_init_le(&bd->bd_le, &gfs2_buf_lops);
568 else 570 } else {
569 lops_init_le(&bd->bd_le, &gfs2_databuf_lops); 571 lops_init_le(&bd->bd_le, &gfs2_databuf_lops);
570 572 get_bh(bh);
573 }
571 set_v2bd(bh, bd); 574 set_v2bd(bh, bd);
572 575
573 unlock_page(bh->b_page); 576 if (meta)
577 unlock_page(bh->b_page);
574} 578}
575 579
576/** 580/**
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index d611b2ad2e97..b14357e89421 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -20,13 +20,13 @@
20#include "bmap.h" 20#include "bmap.h"
21#include "glock.h" 21#include "glock.h"
22#include "inode.h" 22#include "inode.h"
23#include "jdata.h"
24#include "log.h" 23#include "log.h"
25#include "meta_io.h" 24#include "meta_io.h"
26#include "ops_address.h" 25#include "ops_address.h"
27#include "page.h" 26#include "page.h"
28#include "quota.h" 27#include "quota.h"
29#include "trans.h" 28#include "trans.h"
29#include "rgrp.h"
30 30
31/** 31/**
32 * gfs2_get_block - Fills in a buffer head with details about a block 32 * gfs2_get_block - Fills in a buffer head with details about a block
@@ -149,33 +149,55 @@ static int get_blocks_noalloc(struct inode *inode, sector_t lblock,
149 * 149 *
150 * Returns: errno 150 * Returns: errno
151 * 151 *
152 * Use Linux VFS block_write_full_page() to write one page, 152 * Some of this is copied from block_write_full_page() although we still
153 * using GFS2's get_block_noalloc to find which blocks to write. 153 * call it to do most of the work.
154 */ 154 */
155 155
156static int gfs2_writepage(struct page *page, struct writeback_control *wbc) 156static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
157{ 157{
158 struct inode *inode = page->mapping->host;
158 struct gfs2_inode *ip = get_v2ip(page->mapping->host); 159 struct gfs2_inode *ip = get_v2ip(page->mapping->host);
159 struct gfs2_sbd *sdp = ip->i_sbd; 160 struct gfs2_sbd *sdp = ip->i_sbd;
161 loff_t i_size = i_size_read(inode);
162 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
163 unsigned offset;
160 int error; 164 int error;
165 int done_trans = 0;
161 166
162 atomic_inc(&sdp->sd_ops_address); 167 atomic_inc(&sdp->sd_ops_address);
163
164 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) { 168 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) {
165 unlock_page(page); 169 unlock_page(page);
166 return -EIO; 170 return -EIO;
167 } 171 }
168 if (get_transaction) { 172 if (get_transaction)
169 redirty_page_for_writepage(wbc, page); 173 goto out_ignore;
174
175 /* Is the page fully outside i_size? (truncate in progress) */
176 offset = i_size & (PAGE_CACHE_SIZE-1);
177 if (page->index >= end_index+1 || !offset) {
178 page->mapping->a_ops->invalidatepage(page, 0);
170 unlock_page(page); 179 unlock_page(page);
171 return 0; 180 return 0; /* don't care */
172 } 181 }
173 182
174 error = block_write_full_page(page, get_block_noalloc, wbc); 183 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) {
184 error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
185 if (error)
186 goto out_ignore;
187 gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
188 done_trans = 1;
189 }
175 190
191 error = block_write_full_page(page, get_block_noalloc, wbc);
192 if (done_trans)
193 gfs2_trans_end(sdp);
176 gfs2_meta_cache_flush(ip); 194 gfs2_meta_cache_flush(ip);
177
178 return error; 195 return error;
196
197out_ignore:
198 redirty_page_for_writepage(wbc, page);
199 unlock_page(page);
200 return 0;
179} 201}
180 202
181/** 203/**
@@ -227,40 +249,9 @@ static int zero_readpage(struct page *page)
227} 249}
228 250
229/** 251/**
230 * jdata_readpage - readpage that goes through gfs2_jdata_read_mem()
231 * @ip:
232 * @page: The page to read
233 *
234 * Returns: errno
235 */
236
237static int jdata_readpage(struct gfs2_inode *ip, struct page *page)
238{
239 void *kaddr;
240 int ret;
241
242 kaddr = kmap(page);
243
244 ret = gfs2_jdata_read_mem(ip, kaddr,
245 (uint64_t)page->index << PAGE_CACHE_SHIFT,
246 PAGE_CACHE_SIZE);
247 if (ret >= 0) {
248 if (ret < PAGE_CACHE_SIZE)
249 memset(kaddr + ret, 0, PAGE_CACHE_SIZE - ret);
250 SetPageUptodate(page);
251 ret = 0;
252 }
253
254 kunmap(page);
255
256 unlock_page(page);
257
258 return ret;
259}
260
261/**
262 * gfs2_readpage - readpage with locking 252 * gfs2_readpage - readpage with locking
263 * @file: The file to read a page for 253 * @file: The file to read a page for. N.B. This may be NULL if we are
254 * reading an internal file.
264 * @page: The page to read 255 * @page: The page to read
265 * 256 *
266 * Returns: errno 257 * Returns: errno
@@ -270,31 +261,35 @@ static int gfs2_readpage(struct file *file, struct page *page)
270{ 261{
271 struct gfs2_inode *ip = get_v2ip(page->mapping->host); 262 struct gfs2_inode *ip = get_v2ip(page->mapping->host);
272 struct gfs2_sbd *sdp = ip->i_sbd; 263 struct gfs2_sbd *sdp = ip->i_sbd;
264 struct gfs2_holder gh;
273 int error; 265 int error;
274 266
275 atomic_inc(&sdp->sd_ops_address); 267 atomic_inc(&sdp->sd_ops_address);
276 268
277 if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl))) { 269 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
278 unlock_page(page); 270 error = gfs2_glock_nq_m_atime(1, &gh);
279 return -EOPNOTSUPP; 271 if (error)
280 } 272 goto out_unlock;
281 273
282 if (!gfs2_is_jdata(ip)) { 274 if (gfs2_is_stuffed(ip)) {
283 if (gfs2_is_stuffed(ip)) { 275 if (!page->index) {
284 if (!page->index) { 276 error = stuffed_readpage(ip, page);
285 error = stuffed_readpage(ip, page); 277 unlock_page(page);
286 unlock_page(page);
287 } else
288 error = zero_readpage(page);
289 } else 278 } else
290 error = mpage_readpage(page, gfs2_get_block); 279 error = zero_readpage(page);
291 } else 280 } else
292 error = jdata_readpage(ip, page); 281 error = mpage_readpage(page, gfs2_get_block);
293 282
294 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 283 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
295 error = -EIO; 284 error = -EIO;
296 285
286 gfs2_glock_dq_m(1, &gh);
287 gfs2_holder_uninit(&gh);
288out:
297 return error; 289 return error;
290out_unlock:
291 unlock_page(page);
292 goto out;
298} 293}
299 294
300/** 295/**
@@ -312,28 +307,82 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
312{ 307{
313 struct gfs2_inode *ip = get_v2ip(page->mapping->host); 308 struct gfs2_inode *ip = get_v2ip(page->mapping->host);
314 struct gfs2_sbd *sdp = ip->i_sbd; 309 struct gfs2_sbd *sdp = ip->i_sbd;
310 unsigned int data_blocks, ind_blocks, rblocks;
311 int alloc_required;
315 int error = 0; 312 int error = 0;
313 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + from;
314 loff_t end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
315 struct gfs2_alloc *al;
316 316
317 atomic_inc(&sdp->sd_ops_address); 317 atomic_inc(&sdp->sd_ops_address);
318 318
319 if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl))) 319 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh);
320 return -EOPNOTSUPP; 320 error = gfs2_glock_nq_m_atime(1, &ip->i_gh);
321 if (error)
322 goto out_uninit;
321 323
322 if (gfs2_is_stuffed(ip)) { 324 gfs2_write_calc_reserv(ip, to - from, &data_blocks, &ind_blocks);
323 uint64_t file_size; 325
324 file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to; 326 error = gfs2_write_alloc_required(ip, pos, from - to, &alloc_required);
327 if (error)
328 goto out_unlock;
325 329
326 if (file_size > sdp->sd_sb.sb_bsize - 330
327 sizeof(struct gfs2_dinode)) { 331 if (alloc_required) {
328 error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page, 332 al = gfs2_alloc_get(ip);
329 page); 333
330 if (!error) 334 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
331 error = block_prepare_write(page, from, to, 335 if (error)
332 gfs2_get_block); 336 goto out_alloc_put;
333 } else if (!PageUptodate(page)) 337
338 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
339 if (error)
340 goto out_qunlock;
341
342 al->al_requested = data_blocks + ind_blocks;
343 error = gfs2_inplace_reserve(ip);
344 if (error)
345 goto out_qunlock;
346 }
347
348 rblocks = RES_DINODE + ind_blocks;
349 if (gfs2_is_jdata(ip))
350 rblocks += data_blocks ? data_blocks : 1;
351 if (ind_blocks || data_blocks)
352 rblocks += RES_STATFS + RES_QUOTA;
353
354 error = gfs2_trans_begin(sdp, rblocks, 0);
355 if (error)
356 goto out;
357
358 if (gfs2_is_stuffed(ip)) {
359 if (end > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
360 error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page, page);
361 if (error)
362 goto out;
363 } else if (!PageUptodate(page)) {
334 error = stuffed_readpage(ip, page); 364 error = stuffed_readpage(ip, page);
335 } else 365 goto out;
336 error = block_prepare_write(page, from, to, gfs2_get_block); 366 }
367 }
368
369 error = block_prepare_write(page, from, to, gfs2_get_block);
370
371out:
372 if (error) {
373 gfs2_trans_end(sdp);
374 if (alloc_required) {
375 gfs2_inplace_release(ip);
376out_qunlock:
377 gfs2_quota_unlock(ip);
378out_alloc_put:
379 gfs2_alloc_put(ip);
380 }
381out_unlock:
382 gfs2_glock_dq_m(1, &ip->i_gh);
383out_uninit:
384 gfs2_holder_uninit(&ip->i_gh);
385 }
337 386
338 return error; 387 return error;
339} 388}
@@ -354,48 +403,73 @@ static int gfs2_commit_write(struct file *file, struct page *page,
354 struct inode *inode = page->mapping->host; 403 struct inode *inode = page->mapping->host;
355 struct gfs2_inode *ip = get_v2ip(inode); 404 struct gfs2_inode *ip = get_v2ip(inode);
356 struct gfs2_sbd *sdp = ip->i_sbd; 405 struct gfs2_sbd *sdp = ip->i_sbd;
357 int error; 406 int error = -EOPNOTSUPP;
407 struct buffer_head *dibh;
408 struct gfs2_alloc *al = &ip->i_alloc;;
358 409
359 atomic_inc(&sdp->sd_ops_address); 410 atomic_inc(&sdp->sd_ops_address);
360 411
412
413 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)))
414 goto fail_nounlock;
415
416 error = gfs2_meta_inode_buffer(ip, &dibh);
417 if (error)
418 goto fail_endtrans;
419
420 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
421
361 if (gfs2_is_stuffed(ip)) { 422 if (gfs2_is_stuffed(ip)) {
362 struct buffer_head *dibh;
363 uint64_t file_size; 423 uint64_t file_size;
364 void *kaddr; 424 void *kaddr;
365 425
366 file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to; 426 file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to;
367 427
368 error = gfs2_meta_inode_buffer(ip, &dibh); 428 kaddr = kmap_atomic(page, KM_USER0);
369 if (error)
370 goto fail;
371
372 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
373
374 kaddr = kmap(page);
375 memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from, 429 memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from,
376 (char *)kaddr + from, 430 (char *)kaddr + from, to - from);
377 to - from); 431 kunmap_atomic(page, KM_USER0);
378 kunmap(page);
379
380 brelse(dibh);
381 432
382 SetPageUptodate(page); 433 SetPageUptodate(page);
383 434
384 if (inode->i_size < file_size) 435 if (inode->i_size < file_size)
385 i_size_write(inode, file_size); 436 i_size_write(inode, file_size);
386 } else { 437 } else {
387 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED) 438 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
388 gfs2_page_add_databufs(ip, page, from, to); 439 gfs2_page_add_databufs(ip, page, from, to);
389 error = generic_commit_write(file, page, from, to); 440 error = generic_commit_write(file, page, from, to);
390 if (error) 441 if (error)
391 goto fail; 442 goto fail;
392 } 443 }
393 444
445 if (ip->i_di.di_size < inode->i_size)
446 ip->i_di.di_size = inode->i_size;
447
448 gfs2_dinode_out(&ip->i_di, dibh->b_data);
449 brelse(dibh);
450 gfs2_trans_end(sdp);
451 if (al->al_requested) {
452 gfs2_inplace_release(ip);
453 gfs2_quota_unlock(ip);
454 gfs2_alloc_put(ip);
455 }
456 gfs2_glock_dq_m(1, &ip->i_gh);
457 gfs2_holder_uninit(&ip->i_gh);
394 return 0; 458 return 0;
395 459
396 fail: 460fail:
461 brelse(dibh);
462fail_endtrans:
463 gfs2_trans_end(sdp);
464 if (al->al_requested) {
465 gfs2_inplace_release(ip);
466 gfs2_quota_unlock(ip);
467 gfs2_alloc_put(ip);
468 }
469 gfs2_glock_dq_m(1, &ip->i_gh);
470 gfs2_holder_uninit(&ip->i_gh);
471fail_nounlock:
397 ClearPageUptodate(page); 472 ClearPageUptodate(page);
398
399 return error; 473 return error;
400} 474}
401 475
@@ -492,12 +566,16 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *io
492 566
493 atomic_inc(&sdp->sd_ops_address); 567 atomic_inc(&sdp->sd_ops_address);
494 568
495 if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) || 569 if (gfs2_is_jdata(ip))
496 gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
497 return -EINVAL; 570 return -EINVAL;
498 571
499 if (rw == WRITE && !get_transaction) 572 if (rw == WRITE) {
500 gb = get_blocks_noalloc; 573 return -EOPNOTSUPP; /* for now */
574 } else {
575 if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) ||
576 gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
577 return -EINVAL;
578 }
501 579
502 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 580 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
503 offset, nr_segs, gb, NULL); 581 offset, nr_segs, gb, NULL);
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 0f356fc4690c..56820b39a993 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -18,6 +18,7 @@
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <linux/smp_lock.h> 19#include <linux/smp_lock.h>
20#include <linux/gfs2_ioctl.h> 20#include <linux/gfs2_ioctl.h>
21#include <linux/fs.h>
21#include <asm/semaphore.h> 22#include <asm/semaphore.h>
22#include <asm/uaccess.h> 23#include <asm/uaccess.h>
23 24
@@ -27,7 +28,6 @@
27#include "glock.h" 28#include "glock.h"
28#include "glops.h" 29#include "glops.h"
29#include "inode.h" 30#include "inode.h"
30#include "jdata.h"
31#include "lm.h" 31#include "lm.h"
32#include "log.h" 32#include "log.h"
33#include "meta_io.h" 33#include "meta_io.h"
@@ -67,10 +67,37 @@ struct filldir_reg {
67 void *fdr_opaque; 67 void *fdr_opaque;
68}; 68};
69 69
70typedef ssize_t(*do_rw_t) (struct file *file, 70static int gfs2_read_actor(read_descriptor_t *desc, struct page *page,
71 char __user *buf, 71 unsigned long offset, unsigned long size)
72 size_t size, loff_t *offset, 72{
73 unsigned int num_gh, struct gfs2_holder *ghs); 73 char *kaddr;
74 unsigned long count = desc->count;
75
76 if (size > count)
77 size = count;
78
79 kaddr = kmap(page);
80 memcpy(desc->arg.buf, kaddr + offset, size);
81 kunmap(page);
82
83 desc->count = count - size;
84 desc->written += size;
85 desc->arg.buf += size;
86 return size;
87}
88
89int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
90 char *buf, loff_t *pos, unsigned size)
91{
92 struct inode *inode = ip->i_vnode;
93 read_descriptor_t desc;
94 desc.written = 0;
95 desc.arg.buf = buf;
96 desc.count = size;
97 desc.error = 0;
98 do_generic_mapping_read(inode->i_mapping, ra_state, NULL, pos, &desc, gfs2_read_actor);
99 return desc.written ? desc.written : desc.error;
100}
74 101
75/** 102/**
76 * gfs2_llseek - seek to a location in a file 103 * gfs2_llseek - seek to a location in a file
@@ -105,247 +132,114 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
105 return error; 132 return error;
106} 133}
107 134
108static inline unsigned int vma2state(struct vm_area_struct *vma)
109{
110 if ((vma->vm_flags & (VM_MAYWRITE | VM_MAYSHARE)) ==
111 (VM_MAYWRITE | VM_MAYSHARE))
112 return LM_ST_EXCLUSIVE;
113 return LM_ST_SHARED;
114}
115 135
116static ssize_t walk_vm_hard(struct file *file, const char __user *buf, size_t size, 136static ssize_t gfs2_direct_IO_read(struct kiocb *iocb, const struct iovec *iov,
117 loff_t *offset, do_rw_t operation) 137 loff_t offset, unsigned long nr_segs)
118{ 138{
119 struct gfs2_holder *ghs; 139 struct file *file = iocb->ki_filp;
120 unsigned int num_gh = 0; 140 struct address_space *mapping = file->f_mapping;
121 ssize_t count; 141 ssize_t retval;
122 struct super_block *sb = file->f_dentry->d_inode->i_sb;
123 struct mm_struct *mm = current->mm;
124 struct vm_area_struct *vma;
125 unsigned long start = (unsigned long)buf;
126 unsigned long end = start + size;
127 int dumping = (current->flags & PF_DUMPCORE);
128 unsigned int x = 0;
129
130 for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
131 if (end <= vma->vm_start)
132 break;
133 if (vma->vm_file &&
134 vma->vm_file->f_dentry->d_inode->i_sb == sb) {
135 num_gh++;
136 }
137 }
138
139 ghs = kcalloc((num_gh + 1), sizeof(struct gfs2_holder), GFP_KERNEL);
140 if (!ghs) {
141 if (!dumping)
142 up_read(&mm->mmap_sem);
143 return -ENOMEM;
144 }
145 142
146 for (vma = find_vma(mm, start); vma; vma = vma->vm_next) { 143 retval = filemap_write_and_wait(mapping);
147 if (end <= vma->vm_start) 144 if (retval == 0) {
148 break; 145 retval = mapping->a_ops->direct_IO(READ, iocb, iov, offset,
149 if (vma->vm_file) { 146 nr_segs);
150 struct inode *inode = vma->vm_file->f_dentry->d_inode;
151 if (inode->i_sb == sb)
152 gfs2_holder_init(get_v2ip(inode)->i_gl,
153 vma2state(vma), 0, &ghs[x++]);
154 }
155 } 147 }
156 148 return retval;
157 if (!dumping)
158 up_read(&mm->mmap_sem);
159
160 gfs2_assert(get_v2sdp(sb), x == num_gh);
161
162 count = operation(file, buf, size, offset, num_gh, ghs);
163
164 while (num_gh--)
165 gfs2_holder_uninit(&ghs[num_gh]);
166 kfree(ghs);
167
168 return count;
169} 149}
170 150
171/** 151/**
172 * walk_vm - Walk the vmas associated with a buffer for read or write. 152 * __gfs2_file_aio_read - The main GFS2 read function
173 * If any of them are gfs2, pass the gfs2 inode down to the read/write 153 *
174 * worker function so that locks can be acquired in the correct order. 154 * N.B. This is almost, but not quite the same as __generic_file_aio_read()
175 * @file: The file to read/write from/to 155 * the important subtle different being that inode->i_size isn't valid
176 * @buf: The buffer to copy to/from 156 * unless we are holding a lock, and we do this _only_ on the O_DIRECT
177 * @size: The amount of data requested 157 * path since otherwise locking is done entirely at the page cache
178 * @offset: The current file offset 158 * layer.
179 * @operation: The read or write worker function
180 *
181 * Outputs: Offset - updated according to number of bytes written
182 *
183 * Returns: The number of bytes written, errno on failure
184 */ 159 */
185 160static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
186static ssize_t walk_vm(struct file *file, const char __user *buf, size_t size, 161 const struct iovec *iov,
187 loff_t *offset, do_rw_t operation) 162 unsigned long nr_segs, loff_t *ppos)
188{ 163{
164 struct file *filp = iocb->ki_filp;
165 struct gfs2_inode *ip = get_v2ip(filp->f_mapping->host);
189 struct gfs2_holder gh; 166 struct gfs2_holder gh;
190 167 ssize_t retval;
191 if (current->mm) { 168 unsigned long seg;
192 struct super_block *sb = file->f_dentry->d_inode->i_sb; 169 size_t count;
193 struct mm_struct *mm = current->mm; 170
194 struct vm_area_struct *vma; 171 count = 0;
195 unsigned long start = (unsigned long)buf; 172 for (seg = 0; seg < nr_segs; seg++) {
196 unsigned long end = start + size; 173 const struct iovec *iv = &iov[seg];
197 int dumping = (current->flags & PF_DUMPCORE); 174
198 175 /*
199 if (!dumping) 176 * If any segment has a negative length, or the cumulative
200 down_read(&mm->mmap_sem); 177 * length ever wraps negative then return -EINVAL.
201 178 */
202 for (vma = find_vma(mm, start); vma; vma = vma->vm_next) { 179 count += iv->iov_len;
203 if (end <= vma->vm_start) 180 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
204 break;
205 if (vma->vm_file &&
206 vma->vm_file->f_dentry->d_inode->i_sb == sb)
207 goto do_locks;
208 }
209
210 if (!dumping)
211 up_read(&mm->mmap_sem);
212 }
213
214 return operation(file, buf, size, offset, 0, &gh);
215
216do_locks:
217 return walk_vm_hard(file, buf, size, offset, operation);
218}
219
220static ssize_t do_jdata_read(struct file *file, char __user *buf, size_t size,
221 loff_t *offset)
222{
223 struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
224 ssize_t count = 0;
225
226 if (*offset < 0)
227 return -EINVAL; 181 return -EINVAL;
228 if (!access_ok(VERIFY_WRITE, buf, size)) 182 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
183 continue;
184 if (seg == 0)
229 return -EFAULT; 185 return -EFAULT;
186 nr_segs = seg;
187 count -= iv->iov_len; /* This segment is no good */
188 break;
189 }
190
191 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
192 if (filp->f_flags & O_DIRECT) {
193 loff_t pos = *ppos, size;
194 struct address_space *mapping;
195 struct inode *inode;
196
197 mapping = filp->f_mapping;
198 inode = mapping->host;
199 retval = 0;
200 if (!count)
201 goto out; /* skip atime */
202
203 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
204 retval = gfs2_glock_nq_m_atime(1, &gh);
205 if (retval)
206 goto out;
230 207
231 if (!(file->f_flags & O_LARGEFILE)) { 208 size = i_size_read(inode);
232 if (*offset >= MAX_NON_LFS) 209 if (pos < size) {
233 return -EFBIG; 210 retval = gfs2_direct_IO_read(iocb, iov, pos, nr_segs);
234 if (*offset + size > MAX_NON_LFS) 211 if (retval > 0 && !is_sync_kiocb(iocb))
235 size = MAX_NON_LFS - *offset; 212 retval = -EIOCBQUEUED;
236 } 213 if (retval > 0)
237 214 *ppos = pos + retval;
238 count = gfs2_jdata_read(ip, buf, *offset, size, gfs2_copy2user);
239
240 if (count > 0)
241 *offset += count;
242
243 return count;
244}
245
246/**
247 * do_read_direct - Read bytes from a file
248 * @file: The file to read from
249 * @buf: The buffer to copy into
250 * @size: The amount of data requested
251 * @offset: The current file offset
252 * @num_gh: The number of other locks we need to do the read
253 * @ghs: the locks we need plus one for our lock
254 *
255 * Outputs: Offset - updated according to number of bytes read
256 *
257 * Returns: The number of bytes read, errno on failure
258 */
259
260static ssize_t do_read_direct(struct file *file, char __user *buf, size_t size,
261 loff_t *offset, unsigned int num_gh,
262 struct gfs2_holder *ghs)
263{
264 struct inode *inode = file->f_mapping->host;
265 struct gfs2_inode *ip = get_v2ip(inode);
266 unsigned int state = LM_ST_DEFERRED;
267 int flags = 0;
268 unsigned int x;
269 ssize_t count = 0;
270 int error;
271
272 for (x = 0; x < num_gh; x++)
273 if (ghs[x].gh_gl == ip->i_gl) {
274 state = LM_ST_SHARED;
275 flags |= GL_LOCAL_EXCL;
276 break;
277 } 215 }
278 216 file_accessed(filp);
279 gfs2_holder_init(ip->i_gl, state, flags, &ghs[num_gh]); 217 gfs2_glock_dq_m(1, &gh);
280 218 gfs2_holder_uninit(&gh);
281 error = gfs2_glock_nq_m(num_gh + 1, ghs);
282 if (error)
283 goto out; 219 goto out;
220 }
284 221
285 error = -EINVAL; 222 retval = 0;
286 if (gfs2_is_jdata(ip)) 223 if (count) {
287 goto out_gunlock; 224 for (seg = 0; seg < nr_segs; seg++) {
288 225 read_descriptor_t desc;
289 if (gfs2_is_stuffed(ip)) { 226
290 size_t mask = bdev_hardsect_size(inode->i_sb->s_bdev) - 1; 227 desc.written = 0;
291 228 desc.arg.buf = iov[seg].iov_base;
292 if (((*offset) & mask) || (((unsigned long)buf) & mask)) 229 desc.count = iov[seg].iov_len;
293 goto out_gunlock; 230 if (desc.count == 0)
294 231 continue;
295 count = do_jdata_read(file, buf, size & ~mask, offset); 232 desc.error = 0;
296 } else 233 do_generic_file_read(filp,ppos,&desc,file_read_actor);
297 count = generic_file_read(file, buf, size, offset); 234 retval += desc.written;
298 235 if (desc.error) {
299 error = 0; 236 retval = retval ?: desc.error;
300 237 break;
301 out_gunlock: 238 }
302 gfs2_glock_dq_m(num_gh + 1, ghs); 239 }
303 240 }
304 out: 241out:
305 gfs2_holder_uninit(&ghs[num_gh]); 242 return retval;
306
307 return (count) ? count : error;
308}
309
310/**
311 * do_read_buf - Read bytes from a file
312 * @file: The file to read from
313 * @buf: The buffer to copy into
314 * @size: The amount of data requested
315 * @offset: The current file offset
316 * @num_gh: The number of other locks we need to do the read
317 * @ghs: the locks we need plus one for our lock
318 *
319 * Outputs: Offset - updated according to number of bytes read
320 *
321 * Returns: The number of bytes read, errno on failure
322 */
323
324static ssize_t do_read_buf(struct file *file, char __user *buf, size_t size,
325 loff_t *offset, unsigned int num_gh,
326 struct gfs2_holder *ghs)
327{
328 struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
329 ssize_t count = 0;
330 int error;
331
332 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &ghs[num_gh]);
333
334 error = gfs2_glock_nq_m_atime(num_gh + 1, ghs);
335 if (error)
336 goto out;
337
338 if (gfs2_is_jdata(ip))
339 count = do_jdata_read(file, buf, size, offset);
340 else
341 count = generic_file_read(file, buf, size, offset);
342
343 gfs2_glock_dq_m(num_gh + 1, ghs);
344
345 out:
346 gfs2_holder_uninit(&ghs[num_gh]);
347
348 return (count) ? count : error;
349} 243}
350 244
351/** 245/**
@@ -360,550 +254,49 @@ static ssize_t do_read_buf(struct file *file, char __user *buf, size_t size,
360 * Returns: The number of bytes read, errno on failure 254 * Returns: The number of bytes read, errno on failure
361 */ 255 */
362 256
363static ssize_t gfs2_read(struct file *file, char __user *buf, size_t size, 257static ssize_t gfs2_read(struct file *filp, char __user *buf, size_t size,
364 loff_t *offset) 258 loff_t *offset)
365{ 259{
366 atomic_inc(&get_v2sdp(file->f_mapping->host->i_sb)->sd_ops_file);
367
368 if (file->f_flags & O_DIRECT)
369 return walk_vm(file, buf, size, offset, do_read_direct);
370 else
371 return walk_vm(file, buf, size, offset, do_read_buf);
372}
373
374/**
375 * grope_mapping - feel up a mapping that needs to be written
376 * @buf: the start of the memory to be written
377 * @size: the size of the memory to be written
378 *
379 * We do this after acquiring the locks on the mapping,
380 * but before starting the write transaction. We need to make
381 * sure that we don't cause recursive transactions if blocks
382 * need to be allocated to the file backing the mapping.
383 *
384 * Returns: errno
385 */
386
387static int grope_mapping(const char __user *buf, size_t size)
388{
389 const char __user *stop = buf + size;
390 char c;
391
392 while (buf < stop) {
393 if (copy_from_user(&c, buf, 1))
394 return -EFAULT;
395 buf += PAGE_CACHE_SIZE;
396 buf = (const char __user *)PAGE_ALIGN((unsigned long)buf);
397 }
398
399 return 0;
400}
401
402/**
403 * do_write_direct_alloc - Write bytes to a file
404 * @file: The file to write to
405 * @buf: The buffer to copy from
406 * @size: The amount of data requested
407 * @offset: The current file offset
408 *
409 * Outputs: Offset - updated according to number of bytes written
410 *
411 * Returns: The number of bytes written, errno on failure
412 */
413
414static ssize_t do_write_direct_alloc(struct file *file, const char __user *buf, size_t size,
415 loff_t *offset)
416{
417 struct inode *inode = file->f_mapping->host;
418 struct gfs2_inode *ip = get_v2ip(inode);
419 struct gfs2_sbd *sdp = ip->i_sbd;
420 struct gfs2_alloc *al = NULL;
421 struct iovec local_iov = { .iov_base = buf, .iov_len = size }; 260 struct iovec local_iov = { .iov_base = buf, .iov_len = size };
422 struct buffer_head *dibh; 261 struct kiocb kiocb;
423 unsigned int data_blocks, ind_blocks; 262 ssize_t ret;
424 ssize_t count;
425 int error;
426
427 gfs2_write_calc_reserv(ip, size, &data_blocks, &ind_blocks);
428
429 al = gfs2_alloc_get(ip);
430
431 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
432 if (error)
433 goto fail;
434
435 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
436 if (error)
437 goto fail_gunlock_q;
438
439 al->al_requested = data_blocks + ind_blocks;
440
441 error = gfs2_inplace_reserve(ip);
442 if (error)
443 goto fail_gunlock_q;
444
445 error = gfs2_trans_begin(sdp,
446 al->al_rgd->rd_ri.ri_length + ind_blocks +
447 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
448 if (error)
449 goto fail_ipres;
450
451 if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) {
452 error = gfs2_meta_inode_buffer(ip, &dibh);
453 if (error)
454 goto fail_end_trans;
455
456 ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ?
457 (~(S_ISUID | S_ISGID)) : (~S_ISUID);
458
459 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
460 gfs2_dinode_out(&ip->i_di, dibh->b_data);
461 brelse(dibh);
462 }
463
464 if (gfs2_is_stuffed(ip)) {
465 error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_sync, NULL);
466 if (error)
467 goto fail_end_trans;
468 }
469
470 count = generic_file_write_nolock(file, &local_iov, 1, offset);
471 if (count < 0) {
472 error = count;
473 goto fail_end_trans;
474 }
475
476 error = gfs2_meta_inode_buffer(ip, &dibh);
477 if (error)
478 goto fail_end_trans;
479
480 if (ip->i_di.di_size < inode->i_size)
481 ip->i_di.di_size = inode->i_size;
482 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
483
484 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
485 gfs2_dinode_out(&ip->i_di, dibh->b_data);
486 brelse(dibh);
487
488 gfs2_trans_end(sdp);
489 263
490 if (file->f_flags & O_SYNC) 264 atomic_inc(&get_v2sdp(filp->f_mapping->host->i_sb)->sd_ops_file);
491 gfs2_log_flush_glock(ip->i_gl);
492
493 gfs2_inplace_release(ip);
494 gfs2_quota_unlock(ip);
495 gfs2_alloc_put(ip);
496
497 if (file->f_mapping->nrpages) {
498 error = filemap_fdatawrite(file->f_mapping);
499 if (!error)
500 error = filemap_fdatawait(file->f_mapping);
501 }
502 if (error)
503 return error;
504
505 return count;
506
507 fail_end_trans:
508 gfs2_trans_end(sdp);
509
510 fail_ipres:
511 gfs2_inplace_release(ip);
512
513 fail_gunlock_q:
514 gfs2_quota_unlock(ip);
515
516 fail:
517 gfs2_alloc_put(ip);
518 265
519 return error; 266 init_sync_kiocb(&kiocb, filp);
520} 267 ret = __gfs2_file_aio_read(&kiocb, &local_iov, 1, offset);
521 268 if (-EIOCBQUEUED == ret)
522/** 269 ret = wait_on_sync_kiocb(&kiocb);
523 * do_write_direct - Write bytes to a file 270 return ret;
524 * @file: The file to write to
525 * @buf: The buffer to copy from
526 * @size: The amount of data requested
527 * @offset: The current file offset
528 * @num_gh: The number of other locks we need to do the read
529 * @gh: the locks we need plus one for our lock
530 *
531 * Outputs: Offset - updated according to number of bytes written
532 *
533 * Returns: The number of bytes written, errno on failure
534 */
535
536static ssize_t do_write_direct(struct file *file, const char __user *buf, size_t size,
537 loff_t *offset, unsigned int num_gh,
538 struct gfs2_holder *ghs)
539{
540 struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
541 struct gfs2_sbd *sdp = ip->i_sbd;
542 struct gfs2_file *fp = get_v2fp(file);
543 unsigned int state = LM_ST_DEFERRED;
544 int alloc_required;
545 unsigned int x;
546 size_t s;
547 ssize_t count = 0;
548 int error;
549
550 if (test_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags))
551 state = LM_ST_EXCLUSIVE;
552 else
553 for (x = 0; x < num_gh; x++)
554 if (ghs[x].gh_gl == ip->i_gl) {
555 state = LM_ST_EXCLUSIVE;
556 break;
557 }
558
559 restart:
560 gfs2_holder_init(ip->i_gl, state, 0, &ghs[num_gh]);
561
562 error = gfs2_glock_nq_m(num_gh + 1, ghs);
563 if (error)
564 goto out;
565
566 error = -EINVAL;
567 if (gfs2_is_jdata(ip))
568 goto out_gunlock;
569
570 if (num_gh) {
571 error = grope_mapping(buf, size);
572 if (error)
573 goto out_gunlock;
574 }
575
576 if (file->f_flags & O_APPEND)
577 *offset = ip->i_di.di_size;
578
579 if (!(file->f_flags & O_LARGEFILE)) {
580 error = -EFBIG;
581 if (*offset >= MAX_NON_LFS)
582 goto out_gunlock;
583 if (*offset + size > MAX_NON_LFS)
584 size = MAX_NON_LFS - *offset;
585 }
586
587 if (gfs2_is_stuffed(ip) ||
588 *offset + size > ip->i_di.di_size ||
589 ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)))
590 alloc_required = 1;
591 else {
592 error = gfs2_write_alloc_required(ip, *offset, size,
593 &alloc_required);
594 if (error)
595 goto out_gunlock;
596 }
597
598 if (alloc_required && state != LM_ST_EXCLUSIVE) {
599 gfs2_glock_dq_m(num_gh + 1, ghs);
600 gfs2_holder_uninit(&ghs[num_gh]);
601 state = LM_ST_EXCLUSIVE;
602 goto restart;
603 }
604
605 if (alloc_required) {
606 set_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags);
607
608 /* split large writes into smaller atomic transactions */
609 while (size) {
610 s = gfs2_tune_get(sdp, gt_max_atomic_write);
611 if (s > size)
612 s = size;
613
614 error = do_write_direct_alloc(file, buf, s, offset);
615 if (error < 0)
616 goto out_gunlock;
617
618 buf += error;
619 size -= error;
620 count += error;
621 }
622 } else {
623 struct iovec local_iov = { .iov_base = buf, .iov_len = size };
624 struct gfs2_holder t_gh;
625
626 clear_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags);
627
628 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
629 GL_NEVER_RECURSE, &t_gh);
630 if (error)
631 goto out_gunlock;
632
633 count = generic_file_write_nolock(file, &local_iov, 1, offset);
634
635 gfs2_glock_dq_uninit(&t_gh);
636 }
637
638 error = 0;
639
640 out_gunlock:
641 gfs2_glock_dq_m(num_gh + 1, ghs);
642
643 out:
644 gfs2_holder_uninit(&ghs[num_gh]);
645
646 return (count) ? count : error;
647} 271}
648 272
649/** 273static ssize_t gfs2_file_readv(struct file *filp, const struct iovec *iov,
650 * do_do_write_buf - Write bytes to a file 274 unsigned long nr_segs, loff_t *ppos)
651 * @file: The file to write to
652 * @buf: The buffer to copy from
653 * @size: The amount of data requested
654 * @offset: The current file offset
655 *
656 * Outputs: Offset - updated according to number of bytes written
657 *
658 * Returns: The number of bytes written, errno on failure
659 */
660
661static ssize_t do_do_write_buf(struct file *file, const char __user *buf, size_t size,
662 loff_t *offset)
663{ 275{
664 struct inode *inode = file->f_mapping->host; 276 struct kiocb kiocb;
665 struct gfs2_inode *ip = get_v2ip(inode); 277 ssize_t ret;
666 struct gfs2_sbd *sdp = ip->i_sbd;
667 struct gfs2_alloc *al = NULL;
668 struct buffer_head *dibh;
669 unsigned int data_blocks, ind_blocks;
670 int alloc_required, journaled;
671 ssize_t count;
672 int error;
673
674 journaled = gfs2_is_jdata(ip);
675
676 gfs2_write_calc_reserv(ip, size, &data_blocks, &ind_blocks);
677
678 error = gfs2_write_alloc_required(ip, *offset, size, &alloc_required);
679 if (error)
680 return error;
681
682 if (alloc_required) {
683 al = gfs2_alloc_get(ip);
684
685 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
686 if (error)
687 goto fail;
688
689 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
690 if (error)
691 goto fail_gunlock_q;
692
693 al->al_requested = data_blocks + ind_blocks;
694
695 error = gfs2_inplace_reserve(ip);
696 if (error)
697 goto fail_gunlock_q;
698
699 error = gfs2_trans_begin(sdp,
700 al->al_rgd->rd_ri.ri_length +
701 ind_blocks +
702 ((journaled) ? data_blocks : 0) +
703 RES_DINODE + RES_STATFS + RES_QUOTA,
704 0);
705 if (error)
706 goto fail_ipres;
707 } else {
708 error = gfs2_trans_begin(sdp,
709 ((journaled) ? data_blocks : 0) +
710 RES_DINODE,
711 0);
712 if (error)
713 goto fail_ipres;
714 }
715
716 if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) {
717 error = gfs2_meta_inode_buffer(ip, &dibh);
718 if (error)
719 goto fail_end_trans;
720
721 ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ?
722 (~(S_ISUID | S_ISGID)) : (~S_ISUID);
723
724 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
725 gfs2_dinode_out(&ip->i_di, dibh->b_data);
726 brelse(dibh);
727 }
728 278
729 if (journaled) { 279 atomic_inc(&get_v2sdp(filp->f_mapping->host->i_sb)->sd_ops_file);
730 count = gfs2_jdata_write(ip, buf, *offset, size,
731 gfs2_copy_from_user);
732 if (count < 0) {
733 error = count;
734 goto fail_end_trans;
735 }
736
737 *offset += count;
738 } else {
739 struct iovec local_iov = { .iov_base = buf, .iov_len = size };
740
741 count = generic_file_write_nolock(file, &local_iov, 1, offset);
742 if (count < 0) {
743 error = count;
744 goto fail_end_trans;
745 }
746
747 error = gfs2_meta_inode_buffer(ip, &dibh);
748 if (error)
749 goto fail_end_trans;
750
751 if (ip->i_di.di_size < inode->i_size)
752 ip->i_di.di_size = inode->i_size;
753 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
754
755 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
756 gfs2_dinode_out(&ip->i_di, dibh->b_data);
757 brelse(dibh);
758 }
759
760 gfs2_trans_end(sdp);
761
762 if (file->f_flags & O_SYNC || IS_SYNC(inode)) {
763 gfs2_log_flush_glock(ip->i_gl);
764 error = filemap_fdatawrite(file->f_mapping);
765 if (error == 0)
766 error = filemap_fdatawait(file->f_mapping);
767 if (error)
768 goto fail_ipres;
769 }
770
771 if (alloc_required) {
772 gfs2_assert_warn(sdp, count != size ||
773 al->al_alloced);
774 gfs2_inplace_release(ip);
775 gfs2_quota_unlock(ip);
776 gfs2_alloc_put(ip);
777 }
778
779 return count;
780
781 fail_end_trans:
782 gfs2_trans_end(sdp);
783
784 fail_ipres:
785 if (alloc_required)
786 gfs2_inplace_release(ip);
787
788 fail_gunlock_q:
789 if (alloc_required)
790 gfs2_quota_unlock(ip);
791 280
792 fail: 281 init_sync_kiocb(&kiocb, filp);
793 if (alloc_required) 282 ret = __gfs2_file_aio_read(&kiocb, iov, nr_segs, ppos);
794 gfs2_alloc_put(ip); 283 if (-EIOCBQUEUED == ret)
795 284 ret = wait_on_sync_kiocb(&kiocb);
796 return error; 285 return ret;
797} 286}
798 287
799/** 288static ssize_t gfs2_file_aio_read(struct kiocb *iocb, char __user *buf,
800 * do_write_buf - Write bytes to a file 289 size_t count, loff_t pos)
801 * @file: The file to write to
802 * @buf: The buffer to copy from
803 * @size: The amount of data requested
804 * @offset: The current file offset
805 * @num_gh: The number of other locks we need to do the read
806 * @gh: the locks we need plus one for our lock
807 *
808 * Outputs: Offset - updated according to number of bytes written
809 *
810 * Returns: The number of bytes written, errno on failure
811 */
812
813static ssize_t do_write_buf(struct file *file, const char __user *buf, size_t size,
814 loff_t *offset, unsigned int num_gh,
815 struct gfs2_holder *ghs)
816{ 290{
817 struct gfs2_inode *ip = get_v2ip(file->f_mapping->host); 291 struct file *filp = iocb->ki_filp;
818 struct gfs2_sbd *sdp = ip->i_sbd; 292 struct iovec local_iov = { .iov_base = buf, .iov_len = count };
819 size_t s;
820 ssize_t count = 0;
821 int error;
822
823 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[num_gh]);
824
825 error = gfs2_glock_nq_m(num_gh + 1, ghs);
826 if (error)
827 goto out;
828
829 if (num_gh) {
830 error = grope_mapping(buf, size);
831 if (error)
832 goto out_gunlock;
833 }
834
835 if (file->f_flags & O_APPEND)
836 *offset = ip->i_di.di_size;
837
838 if (!(file->f_flags & O_LARGEFILE)) {
839 error = -EFBIG;
840 if (*offset >= MAX_NON_LFS)
841 goto out_gunlock;
842 if (*offset + size > MAX_NON_LFS)
843 size = MAX_NON_LFS - *offset;
844 }
845
846 /* split large writes into smaller atomic transactions */
847 while (size) {
848 s = gfs2_tune_get(sdp, gt_max_atomic_write);
849 if (s > size)
850 s = size;
851
852 error = do_do_write_buf(file, buf, s, offset);
853 if (error < 0)
854 goto out_gunlock;
855
856 buf += error;
857 size -= error;
858 count += error;
859 }
860
861 error = 0;
862 293
863 out_gunlock: 294 atomic_inc(&get_v2sdp(filp->f_mapping->host->i_sb)->sd_ops_file);
864 gfs2_glock_dq_m(num_gh + 1, ghs);
865 295
866 out: 296 BUG_ON(iocb->ki_pos != pos);
867 gfs2_holder_uninit(&ghs[num_gh]); 297 return __gfs2_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos);
868
869 return (count) ? count : error;
870} 298}
871 299
872/**
873 * gfs2_write - Write bytes to a file
874 * @file: The file to write to
875 * @buf: The buffer to copy from
876 * @size: The amount of data requested
877 * @offset: The current file offset
878 *
879 * Outputs: Offset - updated according to number of bytes written
880 *
881 * Returns: The number of bytes written, errno on failure
882 */
883
884static ssize_t gfs2_write(struct file *file, const char __user *buf,
885 size_t size, loff_t *offset)
886{
887 struct inode *inode = file->f_mapping->host;
888 ssize_t count;
889
890 atomic_inc(&get_v2sdp(inode->i_sb)->sd_ops_file);
891
892 if (*offset < 0)
893 return -EINVAL;
894 if (!access_ok(VERIFY_READ, buf, size))
895 return -EFAULT;
896
897 mutex_lock(&inode->i_mutex);
898 if (file->f_flags & O_DIRECT)
899 count = walk_vm(file, buf, size, offset,
900 do_write_direct);
901 else
902 count = walk_vm(file, buf, size, offset, do_write_buf);
903 mutex_unlock(&inode->i_mutex);
904
905 return count;
906}
907 300
908/** 301/**
909 * filldir_reg_func - Report a directory entry to the caller of gfs2_dir_read() 302 * filldir_reg_func - Report a directory entry to the caller of gfs2_dir_read()
@@ -1158,9 +551,6 @@ static int gfs2_ioctl_flags(struct gfs2_inode *ip, unsigned int cmd, unsigned lo
1158 if (flags & (GFS2_DIF_JDATA|GFS2_DIF_DIRECTIO)) { 551 if (flags & (GFS2_DIF_JDATA|GFS2_DIF_DIRECTIO)) {
1159 if (!S_ISREG(ip->i_di.di_mode)) 552 if (!S_ISREG(ip->i_di.di_mode))
1160 goto out; 553 goto out;
1161 /* FIXME: Would be nice not to require the following test */
1162 if ((flags & GFS2_DIF_JDATA) && ip->i_di.di_size)
1163 goto out;
1164 } 554 }
1165 if (flags & (GFS2_DIF_INHERIT_JDATA|GFS2_DIF_INHERIT_DIRECTIO)) { 555 if (flags & (GFS2_DIF_INHERIT_JDATA|GFS2_DIF_INHERIT_DIRECTIO)) {
1166 if (!S_ISDIR(ip->i_di.di_mode)) 556 if (!S_ISDIR(ip->i_di.di_mode))
@@ -1246,21 +636,14 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
1246 return error; 636 return error;
1247 } 637 }
1248 638
1249 if (gfs2_is_jdata(ip)) { 639 /* This is VM_MAYWRITE instead of VM_WRITE because a call
1250 if (vma->vm_flags & VM_MAYSHARE) 640 to mprotect() can turn on VM_WRITE later. */
1251 error = -EOPNOTSUPP; 641
1252 else 642 if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) ==
1253 vma->vm_ops = &gfs2_vm_ops_private; 643 (VM_MAYSHARE | VM_MAYWRITE))
1254 } else { 644 vma->vm_ops = &gfs2_vm_ops_sharewrite;
1255 /* This is VM_MAYWRITE instead of VM_WRITE because a call 645 else
1256 to mprotect() can turn on VM_WRITE later. */ 646 vma->vm_ops = &gfs2_vm_ops_private;
1257
1258 if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) ==
1259 (VM_MAYSHARE | VM_MAYWRITE))
1260 vma->vm_ops = &gfs2_vm_ops_sharewrite;
1261 else
1262 vma->vm_ops = &gfs2_vm_ops_private;
1263 }
1264 647
1265 gfs2_glock_dq_uninit(&i_gh); 648 gfs2_glock_dq_uninit(&i_gh);
1266 649
@@ -1313,13 +696,6 @@ static int gfs2_open(struct inode *inode, struct file *file)
1313 if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO) 696 if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO)
1314 file->f_flags |= O_DIRECT; 697 file->f_flags |= O_DIRECT;
1315 698
1316 /* Don't let the user open O_DIRECT on a jdata file */
1317
1318 if ((file->f_flags & O_DIRECT) && gfs2_is_jdata(ip)) {
1319 error = -EINVAL;
1320 goto fail_gunlock;
1321 }
1322
1323 gfs2_glock_dq_uninit(&i_gh); 699 gfs2_glock_dq_uninit(&i_gh);
1324 } 700 }
1325 701
@@ -1446,29 +822,10 @@ static ssize_t gfs2_sendfile(struct file *in_file, loff_t *offset, size_t count,
1446 read_actor_t actor, void *target) 822 read_actor_t actor, void *target)
1447{ 823{
1448 struct gfs2_inode *ip = get_v2ip(in_file->f_mapping->host); 824 struct gfs2_inode *ip = get_v2ip(in_file->f_mapping->host);
1449 struct gfs2_holder gh;
1450 ssize_t retval;
1451 825
1452 atomic_inc(&ip->i_sbd->sd_ops_file); 826 atomic_inc(&ip->i_sbd->sd_ops_file);
1453 827
1454 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); 828 return generic_file_sendfile(in_file, offset, count, actor, target);
1455
1456 retval = gfs2_glock_nq_atime(&gh);
1457 if (retval)
1458 goto out;
1459
1460 if (gfs2_is_jdata(ip))
1461 retval = -EOPNOTSUPP;
1462 else
1463 retval = generic_file_sendfile(in_file, offset, count, actor,
1464 target);
1465
1466 gfs2_glock_dq(&gh);
1467
1468 out:
1469 gfs2_holder_uninit(&gh);
1470
1471 return retval;
1472} 829}
1473 830
1474static int do_flock(struct file *file, int cmd, struct file_lock *fl) 831static int do_flock(struct file *file, int cmd, struct file_lock *fl)
@@ -1567,7 +924,11 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
1567struct file_operations gfs2_file_fops = { 924struct file_operations gfs2_file_fops = {
1568 .llseek = gfs2_llseek, 925 .llseek = gfs2_llseek,
1569 .read = gfs2_read, 926 .read = gfs2_read,
1570 .write = gfs2_write, 927 .readv = gfs2_file_readv,
928 .aio_read = gfs2_file_aio_read,
929 .write = generic_file_write,
930 .writev = generic_file_writev,
931 .aio_write = generic_file_aio_write,
1571 .ioctl = gfs2_ioctl, 932 .ioctl = gfs2_ioctl,
1572 .mmap = gfs2_mmap, 933 .mmap = gfs2_mmap,
1573 .open = gfs2_open, 934 .open = gfs2_open,
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c
index a1b409ce75e1..8f77bb7896bd 100644
--- a/fs/gfs2/ops_vm.c
+++ b/fs/gfs2/ops_vm.c
@@ -155,9 +155,6 @@ static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area,
155 if (error) 155 if (error)
156 return NULL; 156 return NULL;
157 157
158 if (gfs2_is_jdata(ip))
159 goto out;
160
161 set_bit(GIF_PAGED, &ip->i_flags); 158 set_bit(GIF_PAGED, &ip->i_flags);
162 set_bit(GIF_SW_PAGED, &ip->i_flags); 159 set_bit(GIF_SW_PAGED, &ip->i_flags);
163 160
diff --git a/fs/gfs2/page.c b/fs/gfs2/page.c
index ea31bceac4f2..3542aa6b01c4 100644
--- a/fs/gfs2/page.c
+++ b/fs/gfs2/page.c
@@ -172,8 +172,8 @@ int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
172 map_bh(bh, inode->i_sb, block); 172 map_bh(bh, inode->i_sb, block);
173 173
174 set_buffer_uptodate(bh); 174 set_buffer_uptodate(bh);
175 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED) 175 if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED) || gfs2_is_jdata(ip))
176 gfs2_trans_add_databuf(sdp, bh); 176 gfs2_trans_add_bh(ip->i_gl, bh, 0);
177 mark_buffer_dirty(bh); 177 mark_buffer_dirty(bh);
178 178
179 if (release) { 179 if (release) {
@@ -245,8 +245,8 @@ int gfs2_block_truncate_page(struct address_space *mapping)
245 goto unlock; 245 goto unlock;
246 } 246 }
247 247
248 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED/* || gfs2_is_jdata(ip)*/) 248 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
249 gfs2_trans_add_databuf(sdp, bh); 249 gfs2_trans_add_bh(ip->i_gl, bh, 0);
250 250
251 kaddr = kmap_atomic(page, KM_USER0); 251 kaddr = kmap_atomic(page, KM_USER0);
252 memset(kaddr + offset, 0, length); 252 memset(kaddr + offset, 0, length);
@@ -273,7 +273,7 @@ void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
273 end = start + bsize; 273 end = start + bsize;
274 if (end <= from || start >= to) 274 if (end <= from || start >= to)
275 continue; 275 continue;
276 gfs2_trans_add_databuf(ip->i_sbd, bh); 276 gfs2_trans_add_bh(ip->i_gl, bh, 0);
277 } 277 }
278} 278}
279 279
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 69e8f4e92e57..138fdf559a9a 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -43,20 +43,22 @@
43#include <linux/buffer_head.h> 43#include <linux/buffer_head.h>
44#include <linux/tty.h> 44#include <linux/tty.h>
45#include <linux/sort.h> 45#include <linux/sort.h>
46#include <linux/fs.h>
46#include <asm/semaphore.h> 47#include <asm/semaphore.h>
47 48
48#include "gfs2.h" 49#include "gfs2.h"
49#include "bmap.h" 50#include "bmap.h"
50#include "glock.h" 51#include "glock.h"
51#include "glops.h" 52#include "glops.h"
52#include "jdata.h"
53#include "log.h" 53#include "log.h"
54#include "meta_io.h" 54#include "meta_io.h"
55#include "quota.h" 55#include "quota.h"
56#include "rgrp.h" 56#include "rgrp.h"
57#include "super.h" 57#include "super.h"
58#include "trans.h" 58#include "trans.h"
59#include "inode.h"
59#include "ops_file.h" 60#include "ops_file.h"
61#include "ops_address.h"
60 62
61#define QUOTA_USER 1 63#define QUOTA_USER 1
62#define QUOTA_GROUP 0 64#define QUOTA_GROUP 0
@@ -561,6 +563,81 @@ static void do_qc(struct gfs2_quota_data *qd, int64_t change)
561 up(&sdp->sd_quota_mutex); 563 up(&sdp->sd_quota_mutex);
562} 564}
563 565
566/**
567 * gfs2_adjust_quota
568 *
569 * This function was mostly borrowed from gfs2_block_truncate_page which was
570 * in turn mostly borrowed from ext3
571 */
572static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
573 int64_t change, struct gfs2_quota_data *qd)
574{
575 struct inode *inode = gfs2_ip2v(ip);
576 struct address_space *mapping = inode->i_mapping;
577 unsigned long index = loc >> PAGE_CACHE_SHIFT;
578 unsigned offset = loc & (PAGE_CACHE_SHIFT - 1);
579 unsigned blocksize, iblock, pos;
580 struct buffer_head *bh;
581 struct page *page;
582 void *kaddr;
583 __be64 *ptr;
584 u64 value;
585 int err = -EIO;
586
587 page = grab_cache_page(mapping, index);
588 if (!page)
589 return -ENOMEM;
590
591 blocksize = inode->i_sb->s_blocksize;
592 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
593
594 if (!page_has_buffers(page))
595 create_empty_buffers(page, blocksize, 0);
596
597 bh = page_buffers(page);
598 pos = blocksize;
599 while (offset >= pos) {
600 bh = bh->b_this_page;
601 iblock++;
602 pos += blocksize;
603 }
604
605 if (!buffer_mapped(bh)) {
606 gfs2_get_block(inode, iblock, bh, 1);
607 if (!buffer_mapped(bh))
608 goto unlock;
609 }
610
611 if (PageUptodate(page))
612 set_buffer_uptodate(bh);
613
614 if (!buffer_uptodate(bh)) {
615 ll_rw_block(READ, 1, &bh);
616 wait_on_buffer(bh);
617 if (!buffer_uptodate(bh))
618 goto unlock;
619 }
620
621 gfs2_trans_add_bh(ip->i_gl, bh, 0);
622
623 kaddr = kmap_atomic(page, KM_USER0);
624 ptr = (__be64 *)(kaddr + offset);
625 value = *ptr = cpu_to_be64(be64_to_cpu(*ptr) + change);
626 flush_dcache_page(page);
627 kunmap_atomic(kaddr, KM_USER0);
628 err = 0;
629 qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC);
630#if 0
631 qd->qd_qb.qb_limit = cpu_to_be64(q.qu_limit);
632 qd->qd_qb.qb_warn = cpu_to_be64(q.qu_warn);
633#endif
634 qd->qd_qb.qb_value = cpu_to_be64(value);
635unlock:
636 unlock_page(page);
637 page_cache_release(page);
638 return err;
639}
640
564static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) 641static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
565{ 642{
566 struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd; 643 struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd;
@@ -635,43 +712,14 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
635 712
636 file_ra_state_init(&ra_state, ip->i_vnode->i_mapping); 713 file_ra_state_init(&ra_state, ip->i_vnode->i_mapping);
637 for (x = 0; x < num_qd; x++) { 714 for (x = 0; x < num_qd; x++) {
638 char buf[sizeof(struct gfs2_quota)];
639 struct gfs2_quota q;
640
641 qd = qda[x]; 715 qd = qda[x];
642 offset = qd2offset(qd); 716 offset = qd2offset(qd);
643 717 error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync,
644 /* The quota file may not be a multiple of 718 (struct gfs2_quota_data *)qd->qd_gl->gl_lvb);
645 sizeof(struct gfs2_quota) bytes. */ 719 if (error)
646 memset(buf, 0, sizeof(struct gfs2_quota));
647
648 error = gfs2_internal_read(ip, &ra_state, buf, &offset,
649 sizeof(struct gfs2_quota));
650 if (error < 0)
651 goto out_end_trans; 720 goto out_end_trans;
652 721
653 gfs2_quota_in(&q, buf);
654 q.qu_value += qda[x]->qd_change_sync;
655 gfs2_quota_out(&q, buf);
656
657 error = gfs2_jdata_write_mem(ip, buf, offset,
658 sizeof(struct gfs2_quota));
659 if (error < 0)
660 goto out_end_trans;
661 else if (error != sizeof(struct gfs2_quota)) {
662 error = -EIO;
663 goto out_end_trans;
664 }
665
666 do_qc(qd, -qd->qd_change_sync); 722 do_qc(qd, -qd->qd_change_sync);
667
668 memset(&qd->qd_qb, 0, sizeof(struct gfs2_quota_lvb));
669 qd->qd_qb.qb_magic = GFS2_MAGIC;
670 qd->qd_qb.qb_limit = q.qu_limit;
671 qd->qd_qb.qb_warn = q.qu_warn;
672 qd->qd_qb.qb_value = q.qu_value;
673
674 gfs2_quota_lvb_out(&qd->qd_qb, qd->qd_gl->gl_lvb);
675 } 723 }
676 724
677 error = 0; 725 error = 0;
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index b014591fa4a4..104e664fa182 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -154,14 +154,13 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta)
154 gfs2_attach_bufdata(gl, bh, meta); 154 gfs2_attach_bufdata(gl, bh, meta);
155 bd = get_v2bd(bh); 155 bd = get_v2bd(bh);
156 } 156 }
157
158 lops_add(sdp, &bd->bd_le); 157 lops_add(sdp, &bd->bd_le);
159} 158}
160 159
161void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno) 160void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno)
162{ 161{
163 struct gfs2_revoke *rv = kmalloc(sizeof(struct gfs2_revoke), 162 struct gfs2_revoke *rv = kmalloc(sizeof(struct gfs2_revoke),
164 GFP_KERNEL | __GFP_NOFAIL); 163 GFP_NOFS | __GFP_NOFAIL);
165 lops_init_le(&rv->rv_le, &gfs2_revoke_lops); 164 lops_init_le(&rv->rv_le, &gfs2_revoke_lops);
166 rv->rv_blkno = blkno; 165 rv->rv_blkno = blkno;
167 lops_add(sdp, &rv->rv_le); 166 lops_add(sdp, &rv->rv_le);
@@ -197,19 +196,3 @@ void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd)
197 lops_add(rgd->rd_sbd, &rgd->rd_le); 196 lops_add(rgd->rd_sbd, &rgd->rd_le);
198} 197}
199 198
200void gfs2_trans_add_databuf(struct gfs2_sbd *sdp, struct buffer_head *bh)
201{
202 struct gfs2_bufdata *bd;
203
204 bd = get_v2bd(bh);
205 if (!bd) {
206 bd = kmalloc(sizeof(struct gfs2_bufdata),
207 GFP_NOFS | __GFP_NOFAIL);
208 lops_init_le(&bd->bd_le, &gfs2_databuf_lops);
209 get_bh(bh);
210 bd->bd_bh = bh;
211 set_v2bd(bh, bd);
212 lops_add(sdp, &bd->bd_le);
213 }
214}
215
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index 5a7da1e853c9..f7f3e2a3d590 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -35,6 +35,5 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta);
35void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno); 35void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno);
36void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, uint64_t blkno); 36void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, uint64_t blkno);
37void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd); 37void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd);
38void gfs2_trans_add_databuf(struct gfs2_sbd *sdp, struct buffer_head *bh);
39 38
40#endif /* __TRANS_DOT_H__ */ 39#endif /* __TRANS_DOT_H__ */
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index ad49153c33d1..4fb1704aac10 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -50,6 +50,7 @@ int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
50 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", 50 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
51 sdp->sd_fsname, assertion, 51 sdp->sd_fsname, assertion,
52 sdp->sd_fsname, function, file, line); 52 sdp->sd_fsname, function, file, line);
53 dump_stack();
53 return (me) ? -1 : -2; 54 return (me) ? -1 : -2;
54} 55}
55 56
@@ -75,6 +76,8 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
75 76
76 if (sdp->sd_args.ar_debug) 77 if (sdp->sd_args.ar_debug)
77 BUG(); 78 BUG();
79 else
80 dump_stack();
78 81
79 sdp->sd_last_warning = jiffies; 82 sdp->sd_last_warning = jiffies;
80 83