aboutsummaryrefslogtreecommitdiffstats
path: root/fs/gfs2/ops_file.c
diff options
context:
space:
mode:
authorSteven Whitehouse <swhiteho@redhat.com>2006-02-08 06:50:51 -0500
committerSteven Whitehouse <swhiteho@redhat.com>2006-02-08 06:50:51 -0500
commit18ec7d5c3f434aed9661ed10a9e1f48cdeb4981d (patch)
treea7161a4c4b3592052e6772e1c23849de16cac649 /fs/gfs2/ops_file.c
parent257f9b4e97e9a6cceeb247cead92119a4396d37b (diff)
[GFS2] Make journaled data files identical to normal files on disk
This is a very large patch, with a few still to be resolved issues so you might want to check out the previous head of the tree since this is known to be unstable. Fixes for the various bugs will be forthcoming shortly. This patch removes the special data format which has been used up till now for journaled data files. Directories still retain the old format so that they will remain on disk compatible with earlier releases. As a result you can now do the following with journaled data files: 1) mmap them 2) export them over NFS 3) convert to/from normal files whenever you want to (the zero length restriction is gone) In addition the level at which GFS' locking is done has changed for all files (since they all now use the page cache) such that the locking is done at the page cache level rather than the level of the fs operations. This should mean that things like loopback mounts and other things which touch the page cache directly should now work. Current known issues: 1. There is a lock mode inversion problem related to the resource group hold function which needs to be resolved. 2. Any significant amount of I/O causes an oops with an offset of hex 320 (NULL pointer dereference) which appears to be related to a journaled data buffer appearing on a list where it shouldn't be. 3. Direct I/O writes are disabled for the time being (will reappear later) 4. There is probably a deadlock between the page lock and GFS' locks under certain combinations of mmap and fs operation I/O. 5. Issue relating to ref counting on internally used inodes causes a hang on umount (discovered before this patch, and not fixed by it) 6. One part of the directory metadata is different from GFS1 and will need to be resolved before next release. Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/gfs2/ops_file.c')
-rw-r--r--fs/gfs2/ops_file.c967
1 files changed, 164 insertions, 803 deletions
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 0f356fc4690c..56820b39a993 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -18,6 +18,7 @@
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <linux/smp_lock.h> 19#include <linux/smp_lock.h>
20#include <linux/gfs2_ioctl.h> 20#include <linux/gfs2_ioctl.h>
21#include <linux/fs.h>
21#include <asm/semaphore.h> 22#include <asm/semaphore.h>
22#include <asm/uaccess.h> 23#include <asm/uaccess.h>
23 24
@@ -27,7 +28,6 @@
27#include "glock.h" 28#include "glock.h"
28#include "glops.h" 29#include "glops.h"
29#include "inode.h" 30#include "inode.h"
30#include "jdata.h"
31#include "lm.h" 31#include "lm.h"
32#include "log.h" 32#include "log.h"
33#include "meta_io.h" 33#include "meta_io.h"
@@ -67,10 +67,37 @@ struct filldir_reg {
67 void *fdr_opaque; 67 void *fdr_opaque;
68}; 68};
69 69
70typedef ssize_t(*do_rw_t) (struct file *file, 70static int gfs2_read_actor(read_descriptor_t *desc, struct page *page,
71 char __user *buf, 71 unsigned long offset, unsigned long size)
72 size_t size, loff_t *offset, 72{
73 unsigned int num_gh, struct gfs2_holder *ghs); 73 char *kaddr;
74 unsigned long count = desc->count;
75
76 if (size > count)
77 size = count;
78
79 kaddr = kmap(page);
80 memcpy(desc->arg.buf, kaddr + offset, size);
81 kunmap(page);
82
83 desc->count = count - size;
84 desc->written += size;
85 desc->arg.buf += size;
86 return size;
87}
88
89int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
90 char *buf, loff_t *pos, unsigned size)
91{
92 struct inode *inode = ip->i_vnode;
93 read_descriptor_t desc;
94 desc.written = 0;
95 desc.arg.buf = buf;
96 desc.count = size;
97 desc.error = 0;
98 do_generic_mapping_read(inode->i_mapping, ra_state, NULL, pos, &desc, gfs2_read_actor);
99 return desc.written ? desc.written : desc.error;
100}
74 101
75/** 102/**
76 * gfs2_llseek - seek to a location in a file 103 * gfs2_llseek - seek to a location in a file
@@ -105,247 +132,114 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
105 return error; 132 return error;
106} 133}
107 134
108static inline unsigned int vma2state(struct vm_area_struct *vma)
109{
110 if ((vma->vm_flags & (VM_MAYWRITE | VM_MAYSHARE)) ==
111 (VM_MAYWRITE | VM_MAYSHARE))
112 return LM_ST_EXCLUSIVE;
113 return LM_ST_SHARED;
114}
115 135
116static ssize_t walk_vm_hard(struct file *file, const char __user *buf, size_t size, 136static ssize_t gfs2_direct_IO_read(struct kiocb *iocb, const struct iovec *iov,
117 loff_t *offset, do_rw_t operation) 137 loff_t offset, unsigned long nr_segs)
118{ 138{
119 struct gfs2_holder *ghs; 139 struct file *file = iocb->ki_filp;
120 unsigned int num_gh = 0; 140 struct address_space *mapping = file->f_mapping;
121 ssize_t count; 141 ssize_t retval;
122 struct super_block *sb = file->f_dentry->d_inode->i_sb;
123 struct mm_struct *mm = current->mm;
124 struct vm_area_struct *vma;
125 unsigned long start = (unsigned long)buf;
126 unsigned long end = start + size;
127 int dumping = (current->flags & PF_DUMPCORE);
128 unsigned int x = 0;
129
130 for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
131 if (end <= vma->vm_start)
132 break;
133 if (vma->vm_file &&
134 vma->vm_file->f_dentry->d_inode->i_sb == sb) {
135 num_gh++;
136 }
137 }
138
139 ghs = kcalloc((num_gh + 1), sizeof(struct gfs2_holder), GFP_KERNEL);
140 if (!ghs) {
141 if (!dumping)
142 up_read(&mm->mmap_sem);
143 return -ENOMEM;
144 }
145 142
146 for (vma = find_vma(mm, start); vma; vma = vma->vm_next) { 143 retval = filemap_write_and_wait(mapping);
147 if (end <= vma->vm_start) 144 if (retval == 0) {
148 break; 145 retval = mapping->a_ops->direct_IO(READ, iocb, iov, offset,
149 if (vma->vm_file) { 146 nr_segs);
150 struct inode *inode = vma->vm_file->f_dentry->d_inode;
151 if (inode->i_sb == sb)
152 gfs2_holder_init(get_v2ip(inode)->i_gl,
153 vma2state(vma), 0, &ghs[x++]);
154 }
155 } 147 }
156 148 return retval;
157 if (!dumping)
158 up_read(&mm->mmap_sem);
159
160 gfs2_assert(get_v2sdp(sb), x == num_gh);
161
162 count = operation(file, buf, size, offset, num_gh, ghs);
163
164 while (num_gh--)
165 gfs2_holder_uninit(&ghs[num_gh]);
166 kfree(ghs);
167
168 return count;
169} 149}
170 150
171/** 151/**
172 * walk_vm - Walk the vmas associated with a buffer for read or write. 152 * __gfs2_file_aio_read - The main GFS2 read function
173 * If any of them are gfs2, pass the gfs2 inode down to the read/write 153 *
174 * worker function so that locks can be acquired in the correct order. 154 * N.B. This is almost, but not quite the same as __generic_file_aio_read()
175 * @file: The file to read/write from/to 155 * the important subtle different being that inode->i_size isn't valid
176 * @buf: The buffer to copy to/from 156 * unless we are holding a lock, and we do this _only_ on the O_DIRECT
177 * @size: The amount of data requested 157 * path since otherwise locking is done entirely at the page cache
178 * @offset: The current file offset 158 * layer.
179 * @operation: The read or write worker function
180 *
181 * Outputs: Offset - updated according to number of bytes written
182 *
183 * Returns: The number of bytes written, errno on failure
184 */ 159 */
185 160static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
186static ssize_t walk_vm(struct file *file, const char __user *buf, size_t size, 161 const struct iovec *iov,
187 loff_t *offset, do_rw_t operation) 162 unsigned long nr_segs, loff_t *ppos)
188{ 163{
164 struct file *filp = iocb->ki_filp;
165 struct gfs2_inode *ip = get_v2ip(filp->f_mapping->host);
189 struct gfs2_holder gh; 166 struct gfs2_holder gh;
190 167 ssize_t retval;
191 if (current->mm) { 168 unsigned long seg;
192 struct super_block *sb = file->f_dentry->d_inode->i_sb; 169 size_t count;
193 struct mm_struct *mm = current->mm; 170
194 struct vm_area_struct *vma; 171 count = 0;
195 unsigned long start = (unsigned long)buf; 172 for (seg = 0; seg < nr_segs; seg++) {
196 unsigned long end = start + size; 173 const struct iovec *iv = &iov[seg];
197 int dumping = (current->flags & PF_DUMPCORE); 174
198 175 /*
199 if (!dumping) 176 * If any segment has a negative length, or the cumulative
200 down_read(&mm->mmap_sem); 177 * length ever wraps negative then return -EINVAL.
201 178 */
202 for (vma = find_vma(mm, start); vma; vma = vma->vm_next) { 179 count += iv->iov_len;
203 if (end <= vma->vm_start) 180 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
204 break;
205 if (vma->vm_file &&
206 vma->vm_file->f_dentry->d_inode->i_sb == sb)
207 goto do_locks;
208 }
209
210 if (!dumping)
211 up_read(&mm->mmap_sem);
212 }
213
214 return operation(file, buf, size, offset, 0, &gh);
215
216do_locks:
217 return walk_vm_hard(file, buf, size, offset, operation);
218}
219
220static ssize_t do_jdata_read(struct file *file, char __user *buf, size_t size,
221 loff_t *offset)
222{
223 struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
224 ssize_t count = 0;
225
226 if (*offset < 0)
227 return -EINVAL; 181 return -EINVAL;
228 if (!access_ok(VERIFY_WRITE, buf, size)) 182 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
183 continue;
184 if (seg == 0)
229 return -EFAULT; 185 return -EFAULT;
186 nr_segs = seg;
187 count -= iv->iov_len; /* This segment is no good */
188 break;
189 }
190
191 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
192 if (filp->f_flags & O_DIRECT) {
193 loff_t pos = *ppos, size;
194 struct address_space *mapping;
195 struct inode *inode;
196
197 mapping = filp->f_mapping;
198 inode = mapping->host;
199 retval = 0;
200 if (!count)
201 goto out; /* skip atime */
202
203 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
204 retval = gfs2_glock_nq_m_atime(1, &gh);
205 if (retval)
206 goto out;
230 207
231 if (!(file->f_flags & O_LARGEFILE)) { 208 size = i_size_read(inode);
232 if (*offset >= MAX_NON_LFS) 209 if (pos < size) {
233 return -EFBIG; 210 retval = gfs2_direct_IO_read(iocb, iov, pos, nr_segs);
234 if (*offset + size > MAX_NON_LFS) 211 if (retval > 0 && !is_sync_kiocb(iocb))
235 size = MAX_NON_LFS - *offset; 212 retval = -EIOCBQUEUED;
236 } 213 if (retval > 0)
237 214 *ppos = pos + retval;
238 count = gfs2_jdata_read(ip, buf, *offset, size, gfs2_copy2user);
239
240 if (count > 0)
241 *offset += count;
242
243 return count;
244}
245
246/**
247 * do_read_direct - Read bytes from a file
248 * @file: The file to read from
249 * @buf: The buffer to copy into
250 * @size: The amount of data requested
251 * @offset: The current file offset
252 * @num_gh: The number of other locks we need to do the read
253 * @ghs: the locks we need plus one for our lock
254 *
255 * Outputs: Offset - updated according to number of bytes read
256 *
257 * Returns: The number of bytes read, errno on failure
258 */
259
260static ssize_t do_read_direct(struct file *file, char __user *buf, size_t size,
261 loff_t *offset, unsigned int num_gh,
262 struct gfs2_holder *ghs)
263{
264 struct inode *inode = file->f_mapping->host;
265 struct gfs2_inode *ip = get_v2ip(inode);
266 unsigned int state = LM_ST_DEFERRED;
267 int flags = 0;
268 unsigned int x;
269 ssize_t count = 0;
270 int error;
271
272 for (x = 0; x < num_gh; x++)
273 if (ghs[x].gh_gl == ip->i_gl) {
274 state = LM_ST_SHARED;
275 flags |= GL_LOCAL_EXCL;
276 break;
277 } 215 }
278 216 file_accessed(filp);
279 gfs2_holder_init(ip->i_gl, state, flags, &ghs[num_gh]); 217 gfs2_glock_dq_m(1, &gh);
280 218 gfs2_holder_uninit(&gh);
281 error = gfs2_glock_nq_m(num_gh + 1, ghs);
282 if (error)
283 goto out; 219 goto out;
220 }
284 221
285 error = -EINVAL; 222 retval = 0;
286 if (gfs2_is_jdata(ip)) 223 if (count) {
287 goto out_gunlock; 224 for (seg = 0; seg < nr_segs; seg++) {
288 225 read_descriptor_t desc;
289 if (gfs2_is_stuffed(ip)) { 226
290 size_t mask = bdev_hardsect_size(inode->i_sb->s_bdev) - 1; 227 desc.written = 0;
291 228 desc.arg.buf = iov[seg].iov_base;
292 if (((*offset) & mask) || (((unsigned long)buf) & mask)) 229 desc.count = iov[seg].iov_len;
293 goto out_gunlock; 230 if (desc.count == 0)
294 231 continue;
295 count = do_jdata_read(file, buf, size & ~mask, offset); 232 desc.error = 0;
296 } else 233 do_generic_file_read(filp,ppos,&desc,file_read_actor);
297 count = generic_file_read(file, buf, size, offset); 234 retval += desc.written;
298 235 if (desc.error) {
299 error = 0; 236 retval = retval ?: desc.error;
300 237 break;
301 out_gunlock: 238 }
302 gfs2_glock_dq_m(num_gh + 1, ghs); 239 }
303 240 }
304 out: 241out:
305 gfs2_holder_uninit(&ghs[num_gh]); 242 return retval;
306
307 return (count) ? count : error;
308}
309
310/**
311 * do_read_buf - Read bytes from a file
312 * @file: The file to read from
313 * @buf: The buffer to copy into
314 * @size: The amount of data requested
315 * @offset: The current file offset
316 * @num_gh: The number of other locks we need to do the read
317 * @ghs: the locks we need plus one for our lock
318 *
319 * Outputs: Offset - updated according to number of bytes read
320 *
321 * Returns: The number of bytes read, errno on failure
322 */
323
324static ssize_t do_read_buf(struct file *file, char __user *buf, size_t size,
325 loff_t *offset, unsigned int num_gh,
326 struct gfs2_holder *ghs)
327{
328 struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
329 ssize_t count = 0;
330 int error;
331
332 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &ghs[num_gh]);
333
334 error = gfs2_glock_nq_m_atime(num_gh + 1, ghs);
335 if (error)
336 goto out;
337
338 if (gfs2_is_jdata(ip))
339 count = do_jdata_read(file, buf, size, offset);
340 else
341 count = generic_file_read(file, buf, size, offset);
342
343 gfs2_glock_dq_m(num_gh + 1, ghs);
344
345 out:
346 gfs2_holder_uninit(&ghs[num_gh]);
347
348 return (count) ? count : error;
349} 243}
350 244
351/** 245/**
@@ -360,550 +254,49 @@ static ssize_t do_read_buf(struct file *file, char __user *buf, size_t size,
360 * Returns: The number of bytes read, errno on failure 254 * Returns: The number of bytes read, errno on failure
361 */ 255 */
362 256
363static ssize_t gfs2_read(struct file *file, char __user *buf, size_t size, 257static ssize_t gfs2_read(struct file *filp, char __user *buf, size_t size,
364 loff_t *offset) 258 loff_t *offset)
365{ 259{
366 atomic_inc(&get_v2sdp(file->f_mapping->host->i_sb)->sd_ops_file);
367
368 if (file->f_flags & O_DIRECT)
369 return walk_vm(file, buf, size, offset, do_read_direct);
370 else
371 return walk_vm(file, buf, size, offset, do_read_buf);
372}
373
374/**
375 * grope_mapping - feel up a mapping that needs to be written
376 * @buf: the start of the memory to be written
377 * @size: the size of the memory to be written
378 *
379 * We do this after acquiring the locks on the mapping,
380 * but before starting the write transaction. We need to make
381 * sure that we don't cause recursive transactions if blocks
382 * need to be allocated to the file backing the mapping.
383 *
384 * Returns: errno
385 */
386
387static int grope_mapping(const char __user *buf, size_t size)
388{
389 const char __user *stop = buf + size;
390 char c;
391
392 while (buf < stop) {
393 if (copy_from_user(&c, buf, 1))
394 return -EFAULT;
395 buf += PAGE_CACHE_SIZE;
396 buf = (const char __user *)PAGE_ALIGN((unsigned long)buf);
397 }
398
399 return 0;
400}
401
402/**
403 * do_write_direct_alloc - Write bytes to a file
404 * @file: The file to write to
405 * @buf: The buffer to copy from
406 * @size: The amount of data requested
407 * @offset: The current file offset
408 *
409 * Outputs: Offset - updated according to number of bytes written
410 *
411 * Returns: The number of bytes written, errno on failure
412 */
413
414static ssize_t do_write_direct_alloc(struct file *file, const char __user *buf, size_t size,
415 loff_t *offset)
416{
417 struct inode *inode = file->f_mapping->host;
418 struct gfs2_inode *ip = get_v2ip(inode);
419 struct gfs2_sbd *sdp = ip->i_sbd;
420 struct gfs2_alloc *al = NULL;
421 struct iovec local_iov = { .iov_base = buf, .iov_len = size }; 260 struct iovec local_iov = { .iov_base = buf, .iov_len = size };
422 struct buffer_head *dibh; 261 struct kiocb kiocb;
423 unsigned int data_blocks, ind_blocks; 262 ssize_t ret;
424 ssize_t count;
425 int error;
426
427 gfs2_write_calc_reserv(ip, size, &data_blocks, &ind_blocks);
428
429 al = gfs2_alloc_get(ip);
430
431 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
432 if (error)
433 goto fail;
434
435 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
436 if (error)
437 goto fail_gunlock_q;
438
439 al->al_requested = data_blocks + ind_blocks;
440
441 error = gfs2_inplace_reserve(ip);
442 if (error)
443 goto fail_gunlock_q;
444
445 error = gfs2_trans_begin(sdp,
446 al->al_rgd->rd_ri.ri_length + ind_blocks +
447 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
448 if (error)
449 goto fail_ipres;
450
451 if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) {
452 error = gfs2_meta_inode_buffer(ip, &dibh);
453 if (error)
454 goto fail_end_trans;
455
456 ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ?
457 (~(S_ISUID | S_ISGID)) : (~S_ISUID);
458
459 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
460 gfs2_dinode_out(&ip->i_di, dibh->b_data);
461 brelse(dibh);
462 }
463
464 if (gfs2_is_stuffed(ip)) {
465 error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_sync, NULL);
466 if (error)
467 goto fail_end_trans;
468 }
469
470 count = generic_file_write_nolock(file, &local_iov, 1, offset);
471 if (count < 0) {
472 error = count;
473 goto fail_end_trans;
474 }
475
476 error = gfs2_meta_inode_buffer(ip, &dibh);
477 if (error)
478 goto fail_end_trans;
479
480 if (ip->i_di.di_size < inode->i_size)
481 ip->i_di.di_size = inode->i_size;
482 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
483
484 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
485 gfs2_dinode_out(&ip->i_di, dibh->b_data);
486 brelse(dibh);
487
488 gfs2_trans_end(sdp);
489 263
490 if (file->f_flags & O_SYNC) 264 atomic_inc(&get_v2sdp(filp->f_mapping->host->i_sb)->sd_ops_file);
491 gfs2_log_flush_glock(ip->i_gl);
492
493 gfs2_inplace_release(ip);
494 gfs2_quota_unlock(ip);
495 gfs2_alloc_put(ip);
496
497 if (file->f_mapping->nrpages) {
498 error = filemap_fdatawrite(file->f_mapping);
499 if (!error)
500 error = filemap_fdatawait(file->f_mapping);
501 }
502 if (error)
503 return error;
504
505 return count;
506
507 fail_end_trans:
508 gfs2_trans_end(sdp);
509
510 fail_ipres:
511 gfs2_inplace_release(ip);
512
513 fail_gunlock_q:
514 gfs2_quota_unlock(ip);
515
516 fail:
517 gfs2_alloc_put(ip);
518 265
519 return error; 266 init_sync_kiocb(&kiocb, filp);
520} 267 ret = __gfs2_file_aio_read(&kiocb, &local_iov, 1, offset);
521 268 if (-EIOCBQUEUED == ret)
522/** 269 ret = wait_on_sync_kiocb(&kiocb);
523 * do_write_direct - Write bytes to a file 270 return ret;
524 * @file: The file to write to
525 * @buf: The buffer to copy from
526 * @size: The amount of data requested
527 * @offset: The current file offset
528 * @num_gh: The number of other locks we need to do the read
529 * @gh: the locks we need plus one for our lock
530 *
531 * Outputs: Offset - updated according to number of bytes written
532 *
533 * Returns: The number of bytes written, errno on failure
534 */
535
536static ssize_t do_write_direct(struct file *file, const char __user *buf, size_t size,
537 loff_t *offset, unsigned int num_gh,
538 struct gfs2_holder *ghs)
539{
540 struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
541 struct gfs2_sbd *sdp = ip->i_sbd;
542 struct gfs2_file *fp = get_v2fp(file);
543 unsigned int state = LM_ST_DEFERRED;
544 int alloc_required;
545 unsigned int x;
546 size_t s;
547 ssize_t count = 0;
548 int error;
549
550 if (test_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags))
551 state = LM_ST_EXCLUSIVE;
552 else
553 for (x = 0; x < num_gh; x++)
554 if (ghs[x].gh_gl == ip->i_gl) {
555 state = LM_ST_EXCLUSIVE;
556 break;
557 }
558
559 restart:
560 gfs2_holder_init(ip->i_gl, state, 0, &ghs[num_gh]);
561
562 error = gfs2_glock_nq_m(num_gh + 1, ghs);
563 if (error)
564 goto out;
565
566 error = -EINVAL;
567 if (gfs2_is_jdata(ip))
568 goto out_gunlock;
569
570 if (num_gh) {
571 error = grope_mapping(buf, size);
572 if (error)
573 goto out_gunlock;
574 }
575
576 if (file->f_flags & O_APPEND)
577 *offset = ip->i_di.di_size;
578
579 if (!(file->f_flags & O_LARGEFILE)) {
580 error = -EFBIG;
581 if (*offset >= MAX_NON_LFS)
582 goto out_gunlock;
583 if (*offset + size > MAX_NON_LFS)
584 size = MAX_NON_LFS - *offset;
585 }
586
587 if (gfs2_is_stuffed(ip) ||
588 *offset + size > ip->i_di.di_size ||
589 ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)))
590 alloc_required = 1;
591 else {
592 error = gfs2_write_alloc_required(ip, *offset, size,
593 &alloc_required);
594 if (error)
595 goto out_gunlock;
596 }
597
598 if (alloc_required && state != LM_ST_EXCLUSIVE) {
599 gfs2_glock_dq_m(num_gh + 1, ghs);
600 gfs2_holder_uninit(&ghs[num_gh]);
601 state = LM_ST_EXCLUSIVE;
602 goto restart;
603 }
604
605 if (alloc_required) {
606 set_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags);
607
608 /* split large writes into smaller atomic transactions */
609 while (size) {
610 s = gfs2_tune_get(sdp, gt_max_atomic_write);
611 if (s > size)
612 s = size;
613
614 error = do_write_direct_alloc(file, buf, s, offset);
615 if (error < 0)
616 goto out_gunlock;
617
618 buf += error;
619 size -= error;
620 count += error;
621 }
622 } else {
623 struct iovec local_iov = { .iov_base = buf, .iov_len = size };
624 struct gfs2_holder t_gh;
625
626 clear_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags);
627
628 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
629 GL_NEVER_RECURSE, &t_gh);
630 if (error)
631 goto out_gunlock;
632
633 count = generic_file_write_nolock(file, &local_iov, 1, offset);
634
635 gfs2_glock_dq_uninit(&t_gh);
636 }
637
638 error = 0;
639
640 out_gunlock:
641 gfs2_glock_dq_m(num_gh + 1, ghs);
642
643 out:
644 gfs2_holder_uninit(&ghs[num_gh]);
645
646 return (count) ? count : error;
647} 271}
648 272
649/** 273static ssize_t gfs2_file_readv(struct file *filp, const struct iovec *iov,
650 * do_do_write_buf - Write bytes to a file 274 unsigned long nr_segs, loff_t *ppos)
651 * @file: The file to write to
652 * @buf: The buffer to copy from
653 * @size: The amount of data requested
654 * @offset: The current file offset
655 *
656 * Outputs: Offset - updated according to number of bytes written
657 *
658 * Returns: The number of bytes written, errno on failure
659 */
660
661static ssize_t do_do_write_buf(struct file *file, const char __user *buf, size_t size,
662 loff_t *offset)
663{ 275{
664 struct inode *inode = file->f_mapping->host; 276 struct kiocb kiocb;
665 struct gfs2_inode *ip = get_v2ip(inode); 277 ssize_t ret;
666 struct gfs2_sbd *sdp = ip->i_sbd;
667 struct gfs2_alloc *al = NULL;
668 struct buffer_head *dibh;
669 unsigned int data_blocks, ind_blocks;
670 int alloc_required, journaled;
671 ssize_t count;
672 int error;
673
674 journaled = gfs2_is_jdata(ip);
675
676 gfs2_write_calc_reserv(ip, size, &data_blocks, &ind_blocks);
677
678 error = gfs2_write_alloc_required(ip, *offset, size, &alloc_required);
679 if (error)
680 return error;
681
682 if (alloc_required) {
683 al = gfs2_alloc_get(ip);
684
685 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
686 if (error)
687 goto fail;
688
689 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
690 if (error)
691 goto fail_gunlock_q;
692
693 al->al_requested = data_blocks + ind_blocks;
694
695 error = gfs2_inplace_reserve(ip);
696 if (error)
697 goto fail_gunlock_q;
698
699 error = gfs2_trans_begin(sdp,
700 al->al_rgd->rd_ri.ri_length +
701 ind_blocks +
702 ((journaled) ? data_blocks : 0) +
703 RES_DINODE + RES_STATFS + RES_QUOTA,
704 0);
705 if (error)
706 goto fail_ipres;
707 } else {
708 error = gfs2_trans_begin(sdp,
709 ((journaled) ? data_blocks : 0) +
710 RES_DINODE,
711 0);
712 if (error)
713 goto fail_ipres;
714 }
715
716 if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) {
717 error = gfs2_meta_inode_buffer(ip, &dibh);
718 if (error)
719 goto fail_end_trans;
720
721 ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ?
722 (~(S_ISUID | S_ISGID)) : (~S_ISUID);
723
724 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
725 gfs2_dinode_out(&ip->i_di, dibh->b_data);
726 brelse(dibh);
727 }
728 278
729 if (journaled) { 279 atomic_inc(&get_v2sdp(filp->f_mapping->host->i_sb)->sd_ops_file);
730 count = gfs2_jdata_write(ip, buf, *offset, size,
731 gfs2_copy_from_user);
732 if (count < 0) {
733 error = count;
734 goto fail_end_trans;
735 }
736
737 *offset += count;
738 } else {
739 struct iovec local_iov = { .iov_base = buf, .iov_len = size };
740
741 count = generic_file_write_nolock(file, &local_iov, 1, offset);
742 if (count < 0) {
743 error = count;
744 goto fail_end_trans;
745 }
746
747 error = gfs2_meta_inode_buffer(ip, &dibh);
748 if (error)
749 goto fail_end_trans;
750
751 if (ip->i_di.di_size < inode->i_size)
752 ip->i_di.di_size = inode->i_size;
753 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
754
755 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
756 gfs2_dinode_out(&ip->i_di, dibh->b_data);
757 brelse(dibh);
758 }
759
760 gfs2_trans_end(sdp);
761
762 if (file->f_flags & O_SYNC || IS_SYNC(inode)) {
763 gfs2_log_flush_glock(ip->i_gl);
764 error = filemap_fdatawrite(file->f_mapping);
765 if (error == 0)
766 error = filemap_fdatawait(file->f_mapping);
767 if (error)
768 goto fail_ipres;
769 }
770
771 if (alloc_required) {
772 gfs2_assert_warn(sdp, count != size ||
773 al->al_alloced);
774 gfs2_inplace_release(ip);
775 gfs2_quota_unlock(ip);
776 gfs2_alloc_put(ip);
777 }
778
779 return count;
780
781 fail_end_trans:
782 gfs2_trans_end(sdp);
783
784 fail_ipres:
785 if (alloc_required)
786 gfs2_inplace_release(ip);
787
788 fail_gunlock_q:
789 if (alloc_required)
790 gfs2_quota_unlock(ip);
791 280
792 fail: 281 init_sync_kiocb(&kiocb, filp);
793 if (alloc_required) 282 ret = __gfs2_file_aio_read(&kiocb, iov, nr_segs, ppos);
794 gfs2_alloc_put(ip); 283 if (-EIOCBQUEUED == ret)
795 284 ret = wait_on_sync_kiocb(&kiocb);
796 return error; 285 return ret;
797} 286}
798 287
799/** 288static ssize_t gfs2_file_aio_read(struct kiocb *iocb, char __user *buf,
800 * do_write_buf - Write bytes to a file 289 size_t count, loff_t pos)
801 * @file: The file to write to
802 * @buf: The buffer to copy from
803 * @size: The amount of data requested
804 * @offset: The current file offset
805 * @num_gh: The number of other locks we need to do the read
806 * @gh: the locks we need plus one for our lock
807 *
808 * Outputs: Offset - updated according to number of bytes written
809 *
810 * Returns: The number of bytes written, errno on failure
811 */
812
813static ssize_t do_write_buf(struct file *file, const char __user *buf, size_t size,
814 loff_t *offset, unsigned int num_gh,
815 struct gfs2_holder *ghs)
816{ 290{
817 struct gfs2_inode *ip = get_v2ip(file->f_mapping->host); 291 struct file *filp = iocb->ki_filp;
818 struct gfs2_sbd *sdp = ip->i_sbd; 292 struct iovec local_iov = { .iov_base = buf, .iov_len = count };
819 size_t s;
820 ssize_t count = 0;
821 int error;
822
823 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[num_gh]);
824
825 error = gfs2_glock_nq_m(num_gh + 1, ghs);
826 if (error)
827 goto out;
828
829 if (num_gh) {
830 error = grope_mapping(buf, size);
831 if (error)
832 goto out_gunlock;
833 }
834
835 if (file->f_flags & O_APPEND)
836 *offset = ip->i_di.di_size;
837
838 if (!(file->f_flags & O_LARGEFILE)) {
839 error = -EFBIG;
840 if (*offset >= MAX_NON_LFS)
841 goto out_gunlock;
842 if (*offset + size > MAX_NON_LFS)
843 size = MAX_NON_LFS - *offset;
844 }
845
846 /* split large writes into smaller atomic transactions */
847 while (size) {
848 s = gfs2_tune_get(sdp, gt_max_atomic_write);
849 if (s > size)
850 s = size;
851
852 error = do_do_write_buf(file, buf, s, offset);
853 if (error < 0)
854 goto out_gunlock;
855
856 buf += error;
857 size -= error;
858 count += error;
859 }
860
861 error = 0;
862 293
863 out_gunlock: 294 atomic_inc(&get_v2sdp(filp->f_mapping->host->i_sb)->sd_ops_file);
864 gfs2_glock_dq_m(num_gh + 1, ghs);
865 295
866 out: 296 BUG_ON(iocb->ki_pos != pos);
867 gfs2_holder_uninit(&ghs[num_gh]); 297 return __gfs2_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos);
868
869 return (count) ? count : error;
870} 298}
871 299
872/**
873 * gfs2_write - Write bytes to a file
874 * @file: The file to write to
875 * @buf: The buffer to copy from
876 * @size: The amount of data requested
877 * @offset: The current file offset
878 *
879 * Outputs: Offset - updated according to number of bytes written
880 *
881 * Returns: The number of bytes written, errno on failure
882 */
883
884static ssize_t gfs2_write(struct file *file, const char __user *buf,
885 size_t size, loff_t *offset)
886{
887 struct inode *inode = file->f_mapping->host;
888 ssize_t count;
889
890 atomic_inc(&get_v2sdp(inode->i_sb)->sd_ops_file);
891
892 if (*offset < 0)
893 return -EINVAL;
894 if (!access_ok(VERIFY_READ, buf, size))
895 return -EFAULT;
896
897 mutex_lock(&inode->i_mutex);
898 if (file->f_flags & O_DIRECT)
899 count = walk_vm(file, buf, size, offset,
900 do_write_direct);
901 else
902 count = walk_vm(file, buf, size, offset, do_write_buf);
903 mutex_unlock(&inode->i_mutex);
904
905 return count;
906}
907 300
908/** 301/**
909 * filldir_reg_func - Report a directory entry to the caller of gfs2_dir_read() 302 * filldir_reg_func - Report a directory entry to the caller of gfs2_dir_read()
@@ -1158,9 +551,6 @@ static int gfs2_ioctl_flags(struct gfs2_inode *ip, unsigned int cmd, unsigned lo
1158 if (flags & (GFS2_DIF_JDATA|GFS2_DIF_DIRECTIO)) { 551 if (flags & (GFS2_DIF_JDATA|GFS2_DIF_DIRECTIO)) {
1159 if (!S_ISREG(ip->i_di.di_mode)) 552 if (!S_ISREG(ip->i_di.di_mode))
1160 goto out; 553 goto out;
1161 /* FIXME: Would be nice not to require the following test */
1162 if ((flags & GFS2_DIF_JDATA) && ip->i_di.di_size)
1163 goto out;
1164 } 554 }
1165 if (flags & (GFS2_DIF_INHERIT_JDATA|GFS2_DIF_INHERIT_DIRECTIO)) { 555 if (flags & (GFS2_DIF_INHERIT_JDATA|GFS2_DIF_INHERIT_DIRECTIO)) {
1166 if (!S_ISDIR(ip->i_di.di_mode)) 556 if (!S_ISDIR(ip->i_di.di_mode))
@@ -1246,21 +636,14 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
1246 return error; 636 return error;
1247 } 637 }
1248 638
1249 if (gfs2_is_jdata(ip)) { 639 /* This is VM_MAYWRITE instead of VM_WRITE because a call
1250 if (vma->vm_flags & VM_MAYSHARE) 640 to mprotect() can turn on VM_WRITE later. */
1251 error = -EOPNOTSUPP; 641
1252 else 642 if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) ==
1253 vma->vm_ops = &gfs2_vm_ops_private; 643 (VM_MAYSHARE | VM_MAYWRITE))
1254 } else { 644 vma->vm_ops = &gfs2_vm_ops_sharewrite;
1255 /* This is VM_MAYWRITE instead of VM_WRITE because a call 645 else
1256 to mprotect() can turn on VM_WRITE later. */ 646 vma->vm_ops = &gfs2_vm_ops_private;
1257
1258 if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) ==
1259 (VM_MAYSHARE | VM_MAYWRITE))
1260 vma->vm_ops = &gfs2_vm_ops_sharewrite;
1261 else
1262 vma->vm_ops = &gfs2_vm_ops_private;
1263 }
1264 647
1265 gfs2_glock_dq_uninit(&i_gh); 648 gfs2_glock_dq_uninit(&i_gh);
1266 649
@@ -1313,13 +696,6 @@ static int gfs2_open(struct inode *inode, struct file *file)
1313 if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO) 696 if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO)
1314 file->f_flags |= O_DIRECT; 697 file->f_flags |= O_DIRECT;
1315 698
1316 /* Don't let the user open O_DIRECT on a jdata file */
1317
1318 if ((file->f_flags & O_DIRECT) && gfs2_is_jdata(ip)) {
1319 error = -EINVAL;
1320 goto fail_gunlock;
1321 }
1322
1323 gfs2_glock_dq_uninit(&i_gh); 699 gfs2_glock_dq_uninit(&i_gh);
1324 } 700 }
1325 701
@@ -1446,29 +822,10 @@ static ssize_t gfs2_sendfile(struct file *in_file, loff_t *offset, size_t count,
1446 read_actor_t actor, void *target) 822 read_actor_t actor, void *target)
1447{ 823{
1448 struct gfs2_inode *ip = get_v2ip(in_file->f_mapping->host); 824 struct gfs2_inode *ip = get_v2ip(in_file->f_mapping->host);
1449 struct gfs2_holder gh;
1450 ssize_t retval;
1451 825
1452 atomic_inc(&ip->i_sbd->sd_ops_file); 826 atomic_inc(&ip->i_sbd->sd_ops_file);
1453 827
1454 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); 828 return generic_file_sendfile(in_file, offset, count, actor, target);
1455
1456 retval = gfs2_glock_nq_atime(&gh);
1457 if (retval)
1458 goto out;
1459
1460 if (gfs2_is_jdata(ip))
1461 retval = -EOPNOTSUPP;
1462 else
1463 retval = generic_file_sendfile(in_file, offset, count, actor,
1464 target);
1465
1466 gfs2_glock_dq(&gh);
1467
1468 out:
1469 gfs2_holder_uninit(&gh);
1470
1471 return retval;
1472} 829}
1473 830
1474static int do_flock(struct file *file, int cmd, struct file_lock *fl) 831static int do_flock(struct file *file, int cmd, struct file_lock *fl)
@@ -1567,7 +924,11 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
1567struct file_operations gfs2_file_fops = { 924struct file_operations gfs2_file_fops = {
1568 .llseek = gfs2_llseek, 925 .llseek = gfs2_llseek,
1569 .read = gfs2_read, 926 .read = gfs2_read,
1570 .write = gfs2_write, 927 .readv = gfs2_file_readv,
928 .aio_read = gfs2_file_aio_read,
929 .write = generic_file_write,
930 .writev = generic_file_writev,
931 .aio_write = generic_file_aio_write,
1571 .ioctl = gfs2_ioctl, 932 .ioctl = gfs2_ioctl,
1572 .mmap = gfs2_mmap, 933 .mmap = gfs2_mmap,
1573 .open = gfs2_open, 934 .open = gfs2_open,