aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_vnodeops.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2013-08-12 06:49:45 -0400
committerBen Myers <bpm@sgi.com>2013-08-12 17:53:39 -0400
commitc24b5dfadc4a4f7a13af373067871479c74455e6 (patch)
treeb723f9218cf1d6d7e73ea4241869ad1905798c36 /fs/xfs/xfs_vnodeops.c
parent836a94ad59bf6c1bcea0fdbe945540926fa3ca8b (diff)
xfs: kill xfs_vnodeops.[ch]
Now we have xfs_inode.c for holding kernel-only XFS inode operations, move all the inode operations from xfs_vnodeops.c to this new file as it holds another set of kernel-only inode operations. The name of this file traces back to the days of Irix and it's vnodes which we don't have anymore. Essentially this move consolidates the inode locking functions and a bunch of XFS inode operations into the one file. Eventually the high level functions will be merged into the VFS interface functions in xfs_iops.c. This leaves only internal preallocation, EOF block manipulation and hole punching functions in vnodeops.c. Move these to xfs_bmap_util.c where we are already consolidating various in-kernel physical extent manipulation and querying functions. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_vnodeops.c')
-rw-r--r--fs/xfs/xfs_vnodeops.c1872
1 files changed, 0 insertions, 1872 deletions
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
deleted file mode 100644
index dc81a8d86ba5..000000000000
--- a/fs/xfs/xfs_vnodeops.c
+++ /dev/null
@@ -1,1872 +0,0 @@
1/*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * Copyright (c) 2012 Red Hat, Inc.
4 * All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it would be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_format.h"
23#include "xfs_bit.h"
24#include "xfs_log.h"
25#include "xfs_trans.h"
26#include "xfs_sb.h"
27#include "xfs_ag.h"
28#include "xfs_mount.h"
29#include "xfs_da_btree.h"
30#include "xfs_dir2_format.h"
31#include "xfs_dir2.h"
32#include "xfs_bmap_btree.h"
33#include "xfs_ialloc_btree.h"
34#include "xfs_dinode.h"
35#include "xfs_inode.h"
36#include "xfs_inode_item.h"
37#include "xfs_itable.h"
38#include "xfs_ialloc.h"
39#include "xfs_alloc.h"
40#include "xfs_bmap.h"
41#include "xfs_bmap_util.h"
42#include "xfs_acl.h"
43#include "xfs_attr.h"
44#include "xfs_error.h"
45#include "xfs_quota.h"
46#include "xfs_utils.h"
47#include "xfs_rtalloc.h"
48#include "xfs_trans_space.h"
49#include "xfs_log_priv.h"
50#include "xfs_filestream.h"
51#include "xfs_vnodeops.h"
52#include "xfs_trace.h"
53#include "xfs_icache.h"
54#include "xfs_symlink.h"
55
56
57/*
58 * This is called by xfs_inactive to free any blocks beyond eof
59 * when the link count isn't zero and by xfs_dm_punch_hole() when
60 * punching a hole to EOF.
61 */
62int
63xfs_free_eofblocks(
64 xfs_mount_t *mp,
65 xfs_inode_t *ip,
66 bool need_iolock)
67{
68 xfs_trans_t *tp;
69 int error;
70 xfs_fileoff_t end_fsb;
71 xfs_fileoff_t last_fsb;
72 xfs_filblks_t map_len;
73 int nimaps;
74 xfs_bmbt_irec_t imap;
75
76 /*
77 * Figure out if there are any blocks beyond the end
78 * of the file. If not, then there is nothing to do.
79 */
80 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
81 last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
82 if (last_fsb <= end_fsb)
83 return 0;
84 map_len = last_fsb - end_fsb;
85
86 nimaps = 1;
87 xfs_ilock(ip, XFS_ILOCK_SHARED);
88 error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0);
89 xfs_iunlock(ip, XFS_ILOCK_SHARED);
90
91 if (!error && (nimaps != 0) &&
92 (imap.br_startblock != HOLESTARTBLOCK ||
93 ip->i_delayed_blks)) {
94 /*
95 * Attach the dquots to the inode up front.
96 */
97 error = xfs_qm_dqattach(ip, 0);
98 if (error)
99 return error;
100
101 /*
102 * There are blocks after the end of file.
103 * Free them up now by truncating the file to
104 * its current size.
105 */
106 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
107
108 if (need_iolock) {
109 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
110 xfs_trans_cancel(tp, 0);
111 return EAGAIN;
112 }
113 }
114
115 error = xfs_trans_reserve(tp, 0,
116 XFS_ITRUNCATE_LOG_RES(mp),
117 0, XFS_TRANS_PERM_LOG_RES,
118 XFS_ITRUNCATE_LOG_COUNT);
119 if (error) {
120 ASSERT(XFS_FORCED_SHUTDOWN(mp));
121 xfs_trans_cancel(tp, 0);
122 if (need_iolock)
123 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
124 return error;
125 }
126
127 xfs_ilock(ip, XFS_ILOCK_EXCL);
128 xfs_trans_ijoin(tp, ip, 0);
129
130 /*
131 * Do not update the on-disk file size. If we update the
132 * on-disk file size and then the system crashes before the
133 * contents of the file are flushed to disk then the files
134 * may be full of holes (ie NULL files bug).
135 */
136 error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK,
137 XFS_ISIZE(ip));
138 if (error) {
139 /*
140 * If we get an error at this point we simply don't
141 * bother truncating the file.
142 */
143 xfs_trans_cancel(tp,
144 (XFS_TRANS_RELEASE_LOG_RES |
145 XFS_TRANS_ABORT));
146 } else {
147 error = xfs_trans_commit(tp,
148 XFS_TRANS_RELEASE_LOG_RES);
149 if (!error)
150 xfs_inode_clear_eofblocks_tag(ip);
151 }
152
153 xfs_iunlock(ip, XFS_ILOCK_EXCL);
154 if (need_iolock)
155 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
156 }
157 return error;
158}
159
160int
161xfs_release(
162 xfs_inode_t *ip)
163{
164 xfs_mount_t *mp = ip->i_mount;
165 int error;
166
167 if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0))
168 return 0;
169
170 /* If this is a read-only mount, don't do this (would generate I/O) */
171 if (mp->m_flags & XFS_MOUNT_RDONLY)
172 return 0;
173
174 if (!XFS_FORCED_SHUTDOWN(mp)) {
175 int truncated;
176
177 /*
178 * If we are using filestreams, and we have an unlinked
179 * file that we are processing the last close on, then nothing
180 * will be able to reopen and write to this file. Purge this
181 * inode from the filestreams cache so that it doesn't delay
182 * teardown of the inode.
183 */
184 if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip))
185 xfs_filestream_deassociate(ip);
186
187 /*
188 * If we previously truncated this file and removed old data
189 * in the process, we want to initiate "early" writeout on
190 * the last close. This is an attempt to combat the notorious
191 * NULL files problem which is particularly noticeable from a
192 * truncate down, buffered (re-)write (delalloc), followed by
193 * a crash. What we are effectively doing here is
194 * significantly reducing the time window where we'd otherwise
195 * be exposed to that problem.
196 */
197 truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
198 if (truncated) {
199 xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
200 if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) {
201 error = -filemap_flush(VFS_I(ip)->i_mapping);
202 if (error)
203 return error;
204 }
205 }
206 }
207
208 if (ip->i_d.di_nlink == 0)
209 return 0;
210
211 if (xfs_can_free_eofblocks(ip, false)) {
212
213 /*
214 * If we can't get the iolock just skip truncating the blocks
215 * past EOF because we could deadlock with the mmap_sem
216 * otherwise. We'll get another chance to drop them once the
217 * last reference to the inode is dropped, so we'll never leak
218 * blocks permanently.
219 *
220 * Further, check if the inode is being opened, written and
221 * closed frequently and we have delayed allocation blocks
222 * outstanding (e.g. streaming writes from the NFS server),
223 * truncating the blocks past EOF will cause fragmentation to
224 * occur.
225 *
226 * In this case don't do the truncation, either, but we have to
227 * be careful how we detect this case. Blocks beyond EOF show
228 * up as i_delayed_blks even when the inode is clean, so we
229 * need to truncate them away first before checking for a dirty
230 * release. Hence on the first dirty close we will still remove
231 * the speculative allocation, but after that we will leave it
232 * in place.
233 */
234 if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
235 return 0;
236
237 error = xfs_free_eofblocks(mp, ip, true);
238 if (error && error != EAGAIN)
239 return error;
240
241 /* delalloc blocks after truncation means it really is dirty */
242 if (ip->i_delayed_blks)
243 xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
244 }
245 return 0;
246}
247
248/*
249 * xfs_inactive
250 *
251 * This is called when the vnode reference count for the vnode
252 * goes to zero. If the file has been unlinked, then it must
253 * now be truncated. Also, we clear all of the read-ahead state
254 * kept for the inode here since the file is now closed.
255 */
256int
257xfs_inactive(
258 xfs_inode_t *ip)
259{
260 xfs_bmap_free_t free_list;
261 xfs_fsblock_t first_block;
262 int committed;
263 xfs_trans_t *tp;
264 xfs_mount_t *mp;
265 int error;
266 int truncate = 0;
267
268 /*
269 * If the inode is already free, then there can be nothing
270 * to clean up here.
271 */
272 if (ip->i_d.di_mode == 0 || is_bad_inode(VFS_I(ip))) {
273 ASSERT(ip->i_df.if_real_bytes == 0);
274 ASSERT(ip->i_df.if_broot_bytes == 0);
275 return VN_INACTIVE_CACHE;
276 }
277
278 mp = ip->i_mount;
279
280 error = 0;
281
282 /* If this is a read-only mount, don't do this (would generate I/O) */
283 if (mp->m_flags & XFS_MOUNT_RDONLY)
284 goto out;
285
286 if (ip->i_d.di_nlink != 0) {
287 /*
288 * force is true because we are evicting an inode from the
289 * cache. Post-eof blocks must be freed, lest we end up with
290 * broken free space accounting.
291 */
292 if (xfs_can_free_eofblocks(ip, true)) {
293 error = xfs_free_eofblocks(mp, ip, false);
294 if (error)
295 return VN_INACTIVE_CACHE;
296 }
297 goto out;
298 }
299
300 if (S_ISREG(ip->i_d.di_mode) &&
301 (ip->i_d.di_size != 0 || XFS_ISIZE(ip) != 0 ||
302 ip->i_d.di_nextents > 0 || ip->i_delayed_blks > 0))
303 truncate = 1;
304
305 error = xfs_qm_dqattach(ip, 0);
306 if (error)
307 return VN_INACTIVE_CACHE;
308
309 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
310 error = xfs_trans_reserve(tp, 0,
311 (truncate || S_ISLNK(ip->i_d.di_mode)) ?
312 XFS_ITRUNCATE_LOG_RES(mp) :
313 XFS_IFREE_LOG_RES(mp),
314 0,
315 XFS_TRANS_PERM_LOG_RES,
316 XFS_ITRUNCATE_LOG_COUNT);
317 if (error) {
318 ASSERT(XFS_FORCED_SHUTDOWN(mp));
319 xfs_trans_cancel(tp, 0);
320 return VN_INACTIVE_CACHE;
321 }
322
323 xfs_ilock(ip, XFS_ILOCK_EXCL);
324 xfs_trans_ijoin(tp, ip, 0);
325
326 if (S_ISLNK(ip->i_d.di_mode)) {
327 error = xfs_inactive_symlink(ip, &tp);
328 if (error)
329 goto out_cancel;
330 } else if (truncate) {
331 ip->i_d.di_size = 0;
332 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
333
334 error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
335 if (error)
336 goto out_cancel;
337
338 ASSERT(ip->i_d.di_nextents == 0);
339 }
340
341 /*
342 * If there are attributes associated with the file then blow them away
343 * now. The code calls a routine that recursively deconstructs the
344 * attribute fork. We need to just commit the current transaction
345 * because we can't use it for xfs_attr_inactive().
346 */
347 if (ip->i_d.di_anextents > 0) {
348 ASSERT(ip->i_d.di_forkoff != 0);
349
350 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
351 if (error)
352 goto out_unlock;
353
354 xfs_iunlock(ip, XFS_ILOCK_EXCL);
355
356 error = xfs_attr_inactive(ip);
357 if (error)
358 goto out;
359
360 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
361 error = xfs_trans_reserve(tp, 0,
362 XFS_IFREE_LOG_RES(mp),
363 0, XFS_TRANS_PERM_LOG_RES,
364 XFS_INACTIVE_LOG_COUNT);
365 if (error) {
366 xfs_trans_cancel(tp, 0);
367 goto out;
368 }
369
370 xfs_ilock(ip, XFS_ILOCK_EXCL);
371 xfs_trans_ijoin(tp, ip, 0);
372 }
373
374 if (ip->i_afp)
375 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
376
377 ASSERT(ip->i_d.di_anextents == 0);
378
379 /*
380 * Free the inode.
381 */
382 xfs_bmap_init(&free_list, &first_block);
383 error = xfs_ifree(tp, ip, &free_list);
384 if (error) {
385 /*
386 * If we fail to free the inode, shut down. The cancel
387 * might do that, we need to make sure. Otherwise the
388 * inode might be lost for a long time or forever.
389 */
390 if (!XFS_FORCED_SHUTDOWN(mp)) {
391 xfs_notice(mp, "%s: xfs_ifree returned error %d",
392 __func__, error);
393 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
394 }
395 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
396 } else {
397 /*
398 * Credit the quota account(s). The inode is gone.
399 */
400 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
401
402 /*
403 * Just ignore errors at this point. There is nothing we can
404 * do except to try to keep going. Make sure it's not a silent
405 * error.
406 */
407 error = xfs_bmap_finish(&tp, &free_list, &committed);
408 if (error)
409 xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
410 __func__, error);
411 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
412 if (error)
413 xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
414 __func__, error);
415 }
416
417 /*
418 * Release the dquots held by inode, if any.
419 */
420 xfs_qm_dqdetach(ip);
421out_unlock:
422 xfs_iunlock(ip, XFS_ILOCK_EXCL);
423out:
424 return VN_INACTIVE_CACHE;
425out_cancel:
426 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
427 goto out_unlock;
428}
429
430/*
431 * Lookups up an inode from "name". If ci_name is not NULL, then a CI match
432 * is allowed, otherwise it has to be an exact match. If a CI match is found,
433 * ci_name->name will point to a the actual name (caller must free) or
434 * will be set to NULL if an exact match is found.
435 */
436int
437xfs_lookup(
438 xfs_inode_t *dp,
439 struct xfs_name *name,
440 xfs_inode_t **ipp,
441 struct xfs_name *ci_name)
442{
443 xfs_ino_t inum;
444 int error;
445 uint lock_mode;
446
447 trace_xfs_lookup(dp, name);
448
449 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
450 return XFS_ERROR(EIO);
451
452 lock_mode = xfs_ilock_map_shared(dp);
453 error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
454 xfs_iunlock_map_shared(dp, lock_mode);
455
456 if (error)
457 goto out;
458
459 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
460 if (error)
461 goto out_free_name;
462
463 return 0;
464
465out_free_name:
466 if (ci_name)
467 kmem_free(ci_name->name);
468out:
469 *ipp = NULL;
470 return error;
471}
472
473int
474xfs_create(
475 xfs_inode_t *dp,
476 struct xfs_name *name,
477 umode_t mode,
478 xfs_dev_t rdev,
479 xfs_inode_t **ipp)
480{
481 int is_dir = S_ISDIR(mode);
482 struct xfs_mount *mp = dp->i_mount;
483 struct xfs_inode *ip = NULL;
484 struct xfs_trans *tp = NULL;
485 int error;
486 xfs_bmap_free_t free_list;
487 xfs_fsblock_t first_block;
488 bool unlock_dp_on_error = false;
489 uint cancel_flags;
490 int committed;
491 prid_t prid;
492 struct xfs_dquot *udqp = NULL;
493 struct xfs_dquot *gdqp = NULL;
494 struct xfs_dquot *pdqp = NULL;
495 uint resblks;
496 uint log_res;
497 uint log_count;
498
499 trace_xfs_create(dp, name);
500
501 if (XFS_FORCED_SHUTDOWN(mp))
502 return XFS_ERROR(EIO);
503
504 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
505 prid = xfs_get_projid(dp);
506 else
507 prid = XFS_PROJID_DEFAULT;
508
509 /*
510 * Make sure that we have allocated dquot(s) on disk.
511 */
512 error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
513 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
514 &udqp, &gdqp, &pdqp);
515 if (error)
516 return error;
517
518 if (is_dir) {
519 rdev = 0;
520 resblks = XFS_MKDIR_SPACE_RES(mp, name->len);
521 log_res = XFS_MKDIR_LOG_RES(mp);
522 log_count = XFS_MKDIR_LOG_COUNT;
523 tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
524 } else {
525 resblks = XFS_CREATE_SPACE_RES(mp, name->len);
526 log_res = XFS_CREATE_LOG_RES(mp);
527 log_count = XFS_CREATE_LOG_COUNT;
528 tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
529 }
530
531 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
532
533 /*
534 * Initially assume that the file does not exist and
535 * reserve the resources for that case. If that is not
536 * the case we'll drop the one we have and get a more
537 * appropriate transaction later.
538 */
539 error = xfs_trans_reserve(tp, resblks, log_res, 0,
540 XFS_TRANS_PERM_LOG_RES, log_count);
541 if (error == ENOSPC) {
542 /* flush outstanding delalloc blocks and retry */
543 xfs_flush_inodes(mp);
544 error = xfs_trans_reserve(tp, resblks, log_res, 0,
545 XFS_TRANS_PERM_LOG_RES, log_count);
546 }
547 if (error == ENOSPC) {
548 /* No space at all so try a "no-allocation" reservation */
549 resblks = 0;
550 error = xfs_trans_reserve(tp, 0, log_res, 0,
551 XFS_TRANS_PERM_LOG_RES, log_count);
552 }
553 if (error) {
554 cancel_flags = 0;
555 goto out_trans_cancel;
556 }
557
558 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
559 unlock_dp_on_error = true;
560
561 xfs_bmap_init(&free_list, &first_block);
562
563 /*
564 * Reserve disk quota and the inode.
565 */
566 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
567 pdqp, resblks, 1, 0);
568 if (error)
569 goto out_trans_cancel;
570
571 error = xfs_dir_canenter(tp, dp, name, resblks);
572 if (error)
573 goto out_trans_cancel;
574
575 /*
576 * A newly created regular or special file just has one directory
577 * entry pointing to them, but a directory also the "." entry
578 * pointing to itself.
579 */
580 error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
581 prid, resblks > 0, &ip, &committed);
582 if (error) {
583 if (error == ENOSPC)
584 goto out_trans_cancel;
585 goto out_trans_abort;
586 }
587
588 /*
589 * Now we join the directory inode to the transaction. We do not do it
590 * earlier because xfs_dir_ialloc might commit the previous transaction
591 * (and release all the locks). An error from here on will result in
592 * the transaction cancel unlocking dp so don't do it explicitly in the
593 * error path.
594 */
595 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
596 unlock_dp_on_error = false;
597
598 error = xfs_dir_createname(tp, dp, name, ip->i_ino,
599 &first_block, &free_list, resblks ?
600 resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
601 if (error) {
602 ASSERT(error != ENOSPC);
603 goto out_trans_abort;
604 }
605 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
606 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
607
608 if (is_dir) {
609 error = xfs_dir_init(tp, ip, dp);
610 if (error)
611 goto out_bmap_cancel;
612
613 error = xfs_bumplink(tp, dp);
614 if (error)
615 goto out_bmap_cancel;
616 }
617
618 /*
619 * If this is a synchronous mount, make sure that the
620 * create transaction goes to disk before returning to
621 * the user.
622 */
623 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
624 xfs_trans_set_sync(tp);
625
626 /*
627 * Attach the dquot(s) to the inodes and modify them incore.
628 * These ids of the inode couldn't have changed since the new
629 * inode has been locked ever since it was created.
630 */
631 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
632
633 error = xfs_bmap_finish(&tp, &free_list, &committed);
634 if (error)
635 goto out_bmap_cancel;
636
637 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
638 if (error)
639 goto out_release_inode;
640
641 xfs_qm_dqrele(udqp);
642 xfs_qm_dqrele(gdqp);
643 xfs_qm_dqrele(pdqp);
644
645 *ipp = ip;
646 return 0;
647
648 out_bmap_cancel:
649 xfs_bmap_cancel(&free_list);
650 out_trans_abort:
651 cancel_flags |= XFS_TRANS_ABORT;
652 out_trans_cancel:
653 xfs_trans_cancel(tp, cancel_flags);
654 out_release_inode:
655 /*
656 * Wait until after the current transaction is aborted to
657 * release the inode. This prevents recursive transactions
658 * and deadlocks from xfs_inactive.
659 */
660 if (ip)
661 IRELE(ip);
662
663 xfs_qm_dqrele(udqp);
664 xfs_qm_dqrele(gdqp);
665 xfs_qm_dqrele(pdqp);
666
667 if (unlock_dp_on_error)
668 xfs_iunlock(dp, XFS_ILOCK_EXCL);
669 return error;
670}
671
672#ifdef DEBUG
673int xfs_locked_n;
674int xfs_small_retries;
675int xfs_middle_retries;
676int xfs_lots_retries;
677int xfs_lock_delays;
678#endif
679
680/*
681 * Bump the subclass so xfs_lock_inodes() acquires each lock with
682 * a different value
683 */
684static inline int
685xfs_lock_inumorder(int lock_mode, int subclass)
686{
687 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
688 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
689 if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
690 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
691
692 return lock_mode;
693}
694
695/*
696 * The following routine will lock n inodes in exclusive mode.
697 * We assume the caller calls us with the inodes in i_ino order.
698 *
699 * We need to detect deadlock where an inode that we lock
700 * is in the AIL and we start waiting for another inode that is locked
701 * by a thread in a long running transaction (such as truncate). This can
702 * result in deadlock since the long running trans might need to wait
703 * for the inode we just locked in order to push the tail and free space
704 * in the log.
705 */
706void
707xfs_lock_inodes(
708 xfs_inode_t **ips,
709 int inodes,
710 uint lock_mode)
711{
712 int attempts = 0, i, j, try_lock;
713 xfs_log_item_t *lp;
714
715 ASSERT(ips && (inodes >= 2)); /* we need at least two */
716
717 try_lock = 0;
718 i = 0;
719
720again:
721 for (; i < inodes; i++) {
722 ASSERT(ips[i]);
723
724 if (i && (ips[i] == ips[i-1])) /* Already locked */
725 continue;
726
727 /*
728 * If try_lock is not set yet, make sure all locked inodes
729 * are not in the AIL.
730 * If any are, set try_lock to be used later.
731 */
732
733 if (!try_lock) {
734 for (j = (i - 1); j >= 0 && !try_lock; j--) {
735 lp = (xfs_log_item_t *)ips[j]->i_itemp;
736 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
737 try_lock++;
738 }
739 }
740 }
741
742 /*
743 * If any of the previous locks we have locked is in the AIL,
744 * we must TRY to get the second and subsequent locks. If
745 * we can't get any, we must release all we have
746 * and try again.
747 */
748
749 if (try_lock) {
750 /* try_lock must be 0 if i is 0. */
751 /*
752 * try_lock means we have an inode locked
753 * that is in the AIL.
754 */
755 ASSERT(i != 0);
756 if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) {
757 attempts++;
758
759 /*
760 * Unlock all previous guys and try again.
761 * xfs_iunlock will try to push the tail
762 * if the inode is in the AIL.
763 */
764
765 for(j = i - 1; j >= 0; j--) {
766
767 /*
768 * Check to see if we've already
769 * unlocked this one.
770 * Not the first one going back,
771 * and the inode ptr is the same.
772 */
773 if ((j != (i - 1)) && ips[j] ==
774 ips[j+1])
775 continue;
776
777 xfs_iunlock(ips[j], lock_mode);
778 }
779
780 if ((attempts % 5) == 0) {
781 delay(1); /* Don't just spin the CPU */
782#ifdef DEBUG
783 xfs_lock_delays++;
784#endif
785 }
786 i = 0;
787 try_lock = 0;
788 goto again;
789 }
790 } else {
791 xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
792 }
793 }
794
795#ifdef DEBUG
796 if (attempts) {
797 if (attempts < 5) xfs_small_retries++;
798 else if (attempts < 100) xfs_middle_retries++;
799 else xfs_lots_retries++;
800 } else {
801 xfs_locked_n++;
802 }
803#endif
804}
805
806/*
807 * xfs_lock_two_inodes() can only be used to lock one type of lock
808 * at a time - the iolock or the ilock, but not both at once. If
809 * we lock both at once, lockdep will report false positives saying
810 * we have violated locking orders.
811 */
812void
813xfs_lock_two_inodes(
814 xfs_inode_t *ip0,
815 xfs_inode_t *ip1,
816 uint lock_mode)
817{
818 xfs_inode_t *temp;
819 int attempts = 0;
820 xfs_log_item_t *lp;
821
822 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
823 ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0);
824 ASSERT(ip0->i_ino != ip1->i_ino);
825
826 if (ip0->i_ino > ip1->i_ino) {
827 temp = ip0;
828 ip0 = ip1;
829 ip1 = temp;
830 }
831
832 again:
833 xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0));
834
835 /*
836 * If the first lock we have locked is in the AIL, we must TRY to get
837 * the second lock. If we can't get it, we must release the first one
838 * and try again.
839 */
840 lp = (xfs_log_item_t *)ip0->i_itemp;
841 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
842 if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) {
843 xfs_iunlock(ip0, lock_mode);
844 if ((++attempts % 5) == 0)
845 delay(1); /* Don't just spin the CPU */
846 goto again;
847 }
848 } else {
849 xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1));
850 }
851}
852
853int
854xfs_remove(
855 xfs_inode_t *dp,
856 struct xfs_name *name,
857 xfs_inode_t *ip)
858{
859 xfs_mount_t *mp = dp->i_mount;
860 xfs_trans_t *tp = NULL;
861 int is_dir = S_ISDIR(ip->i_d.di_mode);
862 int error = 0;
863 xfs_bmap_free_t free_list;
864 xfs_fsblock_t first_block;
865 int cancel_flags;
866 int committed;
867 int link_zero;
868 uint resblks;
869 uint log_count;
870
871 trace_xfs_remove(dp, name);
872
873 if (XFS_FORCED_SHUTDOWN(mp))
874 return XFS_ERROR(EIO);
875
876 error = xfs_qm_dqattach(dp, 0);
877 if (error)
878 goto std_return;
879
880 error = xfs_qm_dqattach(ip, 0);
881 if (error)
882 goto std_return;
883
884 if (is_dir) {
885 tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
886 log_count = XFS_DEFAULT_LOG_COUNT;
887 } else {
888 tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
889 log_count = XFS_REMOVE_LOG_COUNT;
890 }
891 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
892
893 /*
894 * We try to get the real space reservation first,
895 * allowing for directory btree deletion(s) implying
896 * possible bmap insert(s). If we can't get the space
897 * reservation then we use 0 instead, and avoid the bmap
898 * btree insert(s) in the directory code by, if the bmap
899 * insert tries to happen, instead trimming the LAST
900 * block from the directory.
901 */
902 resblks = XFS_REMOVE_SPACE_RES(mp);
903 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
904 XFS_TRANS_PERM_LOG_RES, log_count);
905 if (error == ENOSPC) {
906 resblks = 0;
907 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
908 XFS_TRANS_PERM_LOG_RES, log_count);
909 }
910 if (error) {
911 ASSERT(error != ENOSPC);
912 cancel_flags = 0;
913 goto out_trans_cancel;
914 }
915
916 xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
917
918 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
919 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
920
921 /*
922 * If we're removing a directory perform some additional validation.
923 */
924 if (is_dir) {
925 ASSERT(ip->i_d.di_nlink >= 2);
926 if (ip->i_d.di_nlink != 2) {
927 error = XFS_ERROR(ENOTEMPTY);
928 goto out_trans_cancel;
929 }
930 if (!xfs_dir_isempty(ip)) {
931 error = XFS_ERROR(ENOTEMPTY);
932 goto out_trans_cancel;
933 }
934 }
935
936 xfs_bmap_init(&free_list, &first_block);
937 error = xfs_dir_removename(tp, dp, name, ip->i_ino,
938 &first_block, &free_list, resblks);
939 if (error) {
940 ASSERT(error != ENOENT);
941 goto out_bmap_cancel;
942 }
943 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
944
945 if (is_dir) {
946 /*
947 * Drop the link from ip's "..".
948 */
949 error = xfs_droplink(tp, dp);
950 if (error)
951 goto out_bmap_cancel;
952
953 /*
954 * Drop the "." link from ip to self.
955 */
956 error = xfs_droplink(tp, ip);
957 if (error)
958 goto out_bmap_cancel;
959 } else {
960 /*
961 * When removing a non-directory we need to log the parent
962 * inode here. For a directory this is done implicitly
963 * by the xfs_droplink call for the ".." entry.
964 */
965 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
966 }
967
968 /*
969 * Drop the link from dp to ip.
970 */
971 error = xfs_droplink(tp, ip);
972 if (error)
973 goto out_bmap_cancel;
974
975 /*
976 * Determine if this is the last link while
977 * we are in the transaction.
978 */
979 link_zero = (ip->i_d.di_nlink == 0);
980
981 /*
982 * If this is a synchronous mount, make sure that the
983 * remove transaction goes to disk before returning to
984 * the user.
985 */
986 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
987 xfs_trans_set_sync(tp);
988
989 error = xfs_bmap_finish(&tp, &free_list, &committed);
990 if (error)
991 goto out_bmap_cancel;
992
993 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
994 if (error)
995 goto std_return;
996
997 /*
998 * If we are using filestreams, kill the stream association.
999 * If the file is still open it may get a new one but that
1000 * will get killed on last close in xfs_close() so we don't
1001 * have to worry about that.
1002 */
1003 if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
1004 xfs_filestream_deassociate(ip);
1005
1006 return 0;
1007
1008 out_bmap_cancel:
1009 xfs_bmap_cancel(&free_list);
1010 cancel_flags |= XFS_TRANS_ABORT;
1011 out_trans_cancel:
1012 xfs_trans_cancel(tp, cancel_flags);
1013 std_return:
1014 return error;
1015}
1016
1017int
1018xfs_link(
1019 xfs_inode_t *tdp,
1020 xfs_inode_t *sip,
1021 struct xfs_name *target_name)
1022{
1023 xfs_mount_t *mp = tdp->i_mount;
1024 xfs_trans_t *tp;
1025 int error;
1026 xfs_bmap_free_t free_list;
1027 xfs_fsblock_t first_block;
1028 int cancel_flags;
1029 int committed;
1030 int resblks;
1031
1032 trace_xfs_link(tdp, target_name);
1033
1034 ASSERT(!S_ISDIR(sip->i_d.di_mode));
1035
1036 if (XFS_FORCED_SHUTDOWN(mp))
1037 return XFS_ERROR(EIO);
1038
1039 error = xfs_qm_dqattach(sip, 0);
1040 if (error)
1041 goto std_return;
1042
1043 error = xfs_qm_dqattach(tdp, 0);
1044 if (error)
1045 goto std_return;
1046
1047 tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
1048 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1049 resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
1050 error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0,
1051 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT);
1052 if (error == ENOSPC) {
1053 resblks = 0;
1054 error = xfs_trans_reserve(tp, 0, XFS_LINK_LOG_RES(mp), 0,
1055 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT);
1056 }
1057 if (error) {
1058 cancel_flags = 0;
1059 goto error_return;
1060 }
1061
1062 xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
1063
1064 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
1065 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
1066
1067 /*
1068 * If we are using project inheritance, we only allow hard link
1069 * creation in our tree when the project IDs are the same; else
1070 * the tree quota mechanism could be circumvented.
1071 */
1072 if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
1073 (xfs_get_projid(tdp) != xfs_get_projid(sip)))) {
1074 error = XFS_ERROR(EXDEV);
1075 goto error_return;
1076 }
1077
1078 error = xfs_dir_canenter(tp, tdp, target_name, resblks);
1079 if (error)
1080 goto error_return;
1081
1082 xfs_bmap_init(&free_list, &first_block);
1083
1084 error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
1085 &first_block, &free_list, resblks);
1086 if (error)
1087 goto abort_return;
1088 xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
1089 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
1090
1091 error = xfs_bumplink(tp, sip);
1092 if (error)
1093 goto abort_return;
1094
1095 /*
1096 * If this is a synchronous mount, make sure that the
1097 * link transaction goes to disk before returning to
1098 * the user.
1099 */
1100 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
1101 xfs_trans_set_sync(tp);
1102 }
1103
1104 error = xfs_bmap_finish (&tp, &free_list, &committed);
1105 if (error) {
1106 xfs_bmap_cancel(&free_list);
1107 goto abort_return;
1108 }
1109
1110 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1111
1112 abort_return:
1113 cancel_flags |= XFS_TRANS_ABORT;
1114 error_return:
1115 xfs_trans_cancel(tp, cancel_flags);
1116 std_return:
1117 return error;
1118}
1119
1120int
1121xfs_set_dmattrs(
1122 xfs_inode_t *ip,
1123 u_int evmask,
1124 u_int16_t state)
1125{
1126 xfs_mount_t *mp = ip->i_mount;
1127 xfs_trans_t *tp;
1128 int error;
1129
1130 if (!capable(CAP_SYS_ADMIN))
1131 return XFS_ERROR(EPERM);
1132
1133 if (XFS_FORCED_SHUTDOWN(mp))
1134 return XFS_ERROR(EIO);
1135
1136 tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS);
1137 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES (mp), 0, 0, 0);
1138 if (error) {
1139 xfs_trans_cancel(tp, 0);
1140 return error;
1141 }
1142 xfs_ilock(ip, XFS_ILOCK_EXCL);
1143 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1144
1145 ip->i_d.di_dmevmask = evmask;
1146 ip->i_d.di_dmstate = state;
1147
1148 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1149 error = xfs_trans_commit(tp, 0);
1150
1151 return error;
1152}
1153
1154/*
1155 * xfs_alloc_file_space()
1156 * This routine allocates disk space for the given file.
1157 *
1158 * If alloc_type == 0, this request is for an ALLOCSP type
1159 * request which will change the file size. In this case, no
1160 * DMAPI event will be generated by the call. A TRUNCATE event
1161 * will be generated later by xfs_setattr.
1162 *
1163 * If alloc_type != 0, this request is for a RESVSP type
1164 * request, and a DMAPI DM_EVENT_WRITE will be generated if the
1165 * lower block boundary byte address is less than the file's
1166 * length.
1167 *
1168 * RETURNS:
1169 * 0 on success
1170 * errno on error
1171 *
1172 */
1173STATIC int
1174xfs_alloc_file_space(
1175 xfs_inode_t *ip,
1176 xfs_off_t offset,
1177 xfs_off_t len,
1178 int alloc_type,
1179 int attr_flags)
1180{
1181 xfs_mount_t *mp = ip->i_mount;
1182 xfs_off_t count;
1183 xfs_filblks_t allocated_fsb;
1184 xfs_filblks_t allocatesize_fsb;
1185 xfs_extlen_t extsz, temp;
1186 xfs_fileoff_t startoffset_fsb;
1187 xfs_fsblock_t firstfsb;
1188 int nimaps;
1189 int quota_flag;
1190 int rt;
1191 xfs_trans_t *tp;
1192 xfs_bmbt_irec_t imaps[1], *imapp;
1193 xfs_bmap_free_t free_list;
1194 uint qblocks, resblks, resrtextents;
1195 int committed;
1196 int error;
1197
1198 trace_xfs_alloc_file_space(ip);
1199
1200 if (XFS_FORCED_SHUTDOWN(mp))
1201 return XFS_ERROR(EIO);
1202
1203 error = xfs_qm_dqattach(ip, 0);
1204 if (error)
1205 return error;
1206
1207 if (len <= 0)
1208 return XFS_ERROR(EINVAL);
1209
1210 rt = XFS_IS_REALTIME_INODE(ip);
1211 extsz = xfs_get_extsz_hint(ip);
1212
1213 count = len;
1214 imapp = &imaps[0];
1215 nimaps = 1;
1216 startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
1217 allocatesize_fsb = XFS_B_TO_FSB(mp, count);
1218
1219 /*
1220 * Allocate file space until done or until there is an error
1221 */
1222 while (allocatesize_fsb && !error) {
1223 xfs_fileoff_t s, e;
1224
1225 /*
1226 * Determine space reservations for data/realtime.
1227 */
1228 if (unlikely(extsz)) {
1229 s = startoffset_fsb;
1230 do_div(s, extsz);
1231 s *= extsz;
1232 e = startoffset_fsb + allocatesize_fsb;
1233 if ((temp = do_mod(startoffset_fsb, extsz)))
1234 e += temp;
1235 if ((temp = do_mod(e, extsz)))
1236 e += extsz - temp;
1237 } else {
1238 s = 0;
1239 e = allocatesize_fsb;
1240 }
1241
1242 /*
1243 * The transaction reservation is limited to a 32-bit block
1244 * count, hence we need to limit the number of blocks we are
1245 * trying to reserve to avoid an overflow. We can't allocate
1246 * more than @nimaps extents, and an extent is limited on disk
1247 * to MAXEXTLEN (21 bits), so use that to enforce the limit.
1248 */
1249 resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps));
1250 if (unlikely(rt)) {
1251 resrtextents = qblocks = resblks;
1252 resrtextents /= mp->m_sb.sb_rextsize;
1253 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
1254 quota_flag = XFS_QMOPT_RES_RTBLKS;
1255 } else {
1256 resrtextents = 0;
1257 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks);
1258 quota_flag = XFS_QMOPT_RES_REGBLKS;
1259 }
1260
1261 /*
1262 * Allocate and setup the transaction.
1263 */
1264 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
1265 error = xfs_trans_reserve(tp, resblks,
1266 XFS_WRITE_LOG_RES(mp), resrtextents,
1267 XFS_TRANS_PERM_LOG_RES,
1268 XFS_WRITE_LOG_COUNT);
1269 /*
1270 * Check for running out of space
1271 */
1272 if (error) {
1273 /*
1274 * Free the transaction structure.
1275 */
1276 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
1277 xfs_trans_cancel(tp, 0);
1278 break;
1279 }
1280 xfs_ilock(ip, XFS_ILOCK_EXCL);
1281 error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks,
1282 0, quota_flag);
1283 if (error)
1284 goto error1;
1285
1286 xfs_trans_ijoin(tp, ip, 0);
1287
1288 xfs_bmap_init(&free_list, &firstfsb);
1289 error = xfs_bmapi_write(tp, ip, startoffset_fsb,
1290 allocatesize_fsb, alloc_type, &firstfsb,
1291 0, imapp, &nimaps, &free_list);
1292 if (error) {
1293 goto error0;
1294 }
1295
1296 /*
1297 * Complete the transaction
1298 */
1299 error = xfs_bmap_finish(&tp, &free_list, &committed);
1300 if (error) {
1301 goto error0;
1302 }
1303
1304 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1305 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1306 if (error) {
1307 break;
1308 }
1309
1310 allocated_fsb = imapp->br_blockcount;
1311
1312 if (nimaps == 0) {
1313 error = XFS_ERROR(ENOSPC);
1314 break;
1315 }
1316
1317 startoffset_fsb += allocated_fsb;
1318 allocatesize_fsb -= allocated_fsb;
1319 }
1320
1321 return error;
1322
1323error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
1324 xfs_bmap_cancel(&free_list);
1325 xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
1326
1327error1: /* Just cancel transaction */
1328 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
1329 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1330 return error;
1331}
1332
1333/*
1334 * Zero file bytes between startoff and endoff inclusive.
1335 * The iolock is held exclusive and no blocks are buffered.
1336 *
1337 * This function is used by xfs_free_file_space() to zero
1338 * partial blocks when the range to free is not block aligned.
1339 * When unreserving space with boundaries that are not block
1340 * aligned we round up the start and round down the end
1341 * boundaries and then use this function to zero the parts of
1342 * the blocks that got dropped during the rounding.
1343 */
1344STATIC int
1345xfs_zero_remaining_bytes(
1346 xfs_inode_t *ip,
1347 xfs_off_t startoff,
1348 xfs_off_t endoff)
1349{
1350 xfs_bmbt_irec_t imap;
1351 xfs_fileoff_t offset_fsb;
1352 xfs_off_t lastoffset;
1353 xfs_off_t offset;
1354 xfs_buf_t *bp;
1355 xfs_mount_t *mp = ip->i_mount;
1356 int nimap;
1357 int error = 0;
1358
1359 /*
1360 * Avoid doing I/O beyond eof - it's not necessary
1361 * since nothing can read beyond eof. The space will
1362 * be zeroed when the file is extended anyway.
1363 */
1364 if (startoff >= XFS_ISIZE(ip))
1365 return 0;
1366
1367 if (endoff > XFS_ISIZE(ip))
1368 endoff = XFS_ISIZE(ip);
1369
1370 bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
1371 mp->m_rtdev_targp : mp->m_ddev_targp,
1372 BTOBB(mp->m_sb.sb_blocksize), 0);
1373 if (!bp)
1374 return XFS_ERROR(ENOMEM);
1375
1376 xfs_buf_unlock(bp);
1377
1378 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
1379 offset_fsb = XFS_B_TO_FSBT(mp, offset);
1380 nimap = 1;
1381 error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0);
1382 if (error || nimap < 1)
1383 break;
1384 ASSERT(imap.br_blockcount >= 1);
1385 ASSERT(imap.br_startoff == offset_fsb);
1386 lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1;
1387 if (lastoffset > endoff)
1388 lastoffset = endoff;
1389 if (imap.br_startblock == HOLESTARTBLOCK)
1390 continue;
1391 ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
1392 if (imap.br_state == XFS_EXT_UNWRITTEN)
1393 continue;
1394 XFS_BUF_UNDONE(bp);
1395 XFS_BUF_UNWRITE(bp);
1396 XFS_BUF_READ(bp);
1397 XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
1398 xfsbdstrat(mp, bp);
1399 error = xfs_buf_iowait(bp);
1400 if (error) {
1401 xfs_buf_ioerror_alert(bp,
1402 "xfs_zero_remaining_bytes(read)");
1403 break;
1404 }
1405 memset(bp->b_addr +
1406 (offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
1407 0, lastoffset - offset + 1);
1408 XFS_BUF_UNDONE(bp);
1409 XFS_BUF_UNREAD(bp);
1410 XFS_BUF_WRITE(bp);
1411 xfsbdstrat(mp, bp);
1412 error = xfs_buf_iowait(bp);
1413 if (error) {
1414 xfs_buf_ioerror_alert(bp,
1415 "xfs_zero_remaining_bytes(write)");
1416 break;
1417 }
1418 }
1419 xfs_buf_free(bp);
1420 return error;
1421}
1422
1423/*
1424 * xfs_free_file_space()
1425 * This routine frees disk space for the given file.
1426 *
1427 * This routine is only called by xfs_change_file_space
1428 * for an UNRESVSP type call.
1429 *
1430 * RETURNS:
1431 * 0 on success
1432 * errno on error
1433 *
1434 */
1435STATIC int
1436xfs_free_file_space(
1437 xfs_inode_t *ip,
1438 xfs_off_t offset,
1439 xfs_off_t len,
1440 int attr_flags)
1441{
1442 int committed;
1443 int done;
1444 xfs_fileoff_t endoffset_fsb;
1445 int error;
1446 xfs_fsblock_t firstfsb;
1447 xfs_bmap_free_t free_list;
1448 xfs_bmbt_irec_t imap;
1449 xfs_off_t ioffset;
1450 xfs_extlen_t mod=0;
1451 xfs_mount_t *mp;
1452 int nimap;
1453 uint resblks;
1454 xfs_off_t rounding;
1455 int rt;
1456 xfs_fileoff_t startoffset_fsb;
1457 xfs_trans_t *tp;
1458 int need_iolock = 1;
1459
1460 mp = ip->i_mount;
1461
1462 trace_xfs_free_file_space(ip);
1463
1464 error = xfs_qm_dqattach(ip, 0);
1465 if (error)
1466 return error;
1467
1468 error = 0;
1469 if (len <= 0) /* if nothing being freed */
1470 return error;
1471 rt = XFS_IS_REALTIME_INODE(ip);
1472 startoffset_fsb = XFS_B_TO_FSB(mp, offset);
1473 endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
1474
1475 if (attr_flags & XFS_ATTR_NOLOCK)
1476 need_iolock = 0;
1477 if (need_iolock) {
1478 xfs_ilock(ip, XFS_IOLOCK_EXCL);
1479 /* wait for the completion of any pending DIOs */
1480 inode_dio_wait(VFS_I(ip));
1481 }
1482
1483 rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
1484 ioffset = offset & ~(rounding - 1);
1485 error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
1486 ioffset, -1);
1487 if (error)
1488 goto out_unlock_iolock;
1489 truncate_pagecache_range(VFS_I(ip), ioffset, -1);
1490
1491 /*
1492 * Need to zero the stuff we're not freeing, on disk.
1493 * If it's a realtime file & can't use unwritten extents then we
1494 * actually need to zero the extent edges. Otherwise xfs_bunmapi
1495 * will take care of it for us.
1496 */
1497 if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
1498 nimap = 1;
1499 error = xfs_bmapi_read(ip, startoffset_fsb, 1,
1500 &imap, &nimap, 0);
1501 if (error)
1502 goto out_unlock_iolock;
1503 ASSERT(nimap == 0 || nimap == 1);
1504 if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
1505 xfs_daddr_t block;
1506
1507 ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
1508 block = imap.br_startblock;
1509 mod = do_div(block, mp->m_sb.sb_rextsize);
1510 if (mod)
1511 startoffset_fsb += mp->m_sb.sb_rextsize - mod;
1512 }
1513 nimap = 1;
1514 error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1,
1515 &imap, &nimap, 0);
1516 if (error)
1517 goto out_unlock_iolock;
1518 ASSERT(nimap == 0 || nimap == 1);
1519 if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
1520 ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
1521 mod++;
1522 if (mod && (mod != mp->m_sb.sb_rextsize))
1523 endoffset_fsb -= mod;
1524 }
1525 }
1526 if ((done = (endoffset_fsb <= startoffset_fsb)))
1527 /*
1528 * One contiguous piece to clear
1529 */
1530 error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1);
1531 else {
1532 /*
1533 * Some full blocks, possibly two pieces to clear
1534 */
1535 if (offset < XFS_FSB_TO_B(mp, startoffset_fsb))
1536 error = xfs_zero_remaining_bytes(ip, offset,
1537 XFS_FSB_TO_B(mp, startoffset_fsb) - 1);
1538 if (!error &&
1539 XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len)
1540 error = xfs_zero_remaining_bytes(ip,
1541 XFS_FSB_TO_B(mp, endoffset_fsb),
1542 offset + len - 1);
1543 }
1544
1545 /*
1546 * free file space until done or until there is an error
1547 */
1548 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
1549 while (!error && !done) {
1550
1551 /*
1552 * allocate and setup the transaction. Allow this
1553 * transaction to dip into the reserve blocks to ensure
1554 * the freeing of the space succeeds at ENOSPC.
1555 */
1556 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
1557 tp->t_flags |= XFS_TRANS_RESERVE;
1558 error = xfs_trans_reserve(tp,
1559 resblks,
1560 XFS_WRITE_LOG_RES(mp),
1561 0,
1562 XFS_TRANS_PERM_LOG_RES,
1563 XFS_WRITE_LOG_COUNT);
1564
1565 /*
1566 * check for running out of space
1567 */
1568 if (error) {
1569 /*
1570 * Free the transaction structure.
1571 */
1572 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
1573 xfs_trans_cancel(tp, 0);
1574 break;
1575 }
1576 xfs_ilock(ip, XFS_ILOCK_EXCL);
1577 error = xfs_trans_reserve_quota(tp, mp,
1578 ip->i_udquot, ip->i_gdquot, ip->i_pdquot,
1579 resblks, 0, XFS_QMOPT_RES_REGBLKS);
1580 if (error)
1581 goto error1;
1582
1583 xfs_trans_ijoin(tp, ip, 0);
1584
1585 /*
1586 * issue the bunmapi() call to free the blocks
1587 */
1588 xfs_bmap_init(&free_list, &firstfsb);
1589 error = xfs_bunmapi(tp, ip, startoffset_fsb,
1590 endoffset_fsb - startoffset_fsb,
1591 0, 2, &firstfsb, &free_list, &done);
1592 if (error) {
1593 goto error0;
1594 }
1595
1596 /*
1597 * complete the transaction
1598 */
1599 error = xfs_bmap_finish(&tp, &free_list, &committed);
1600 if (error) {
1601 goto error0;
1602 }
1603
1604 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1605 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1606 }
1607
1608 out_unlock_iolock:
1609 if (need_iolock)
1610 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1611 return error;
1612
1613 error0:
1614 xfs_bmap_cancel(&free_list);
1615 error1:
1616 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
1617 xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) :
1618 XFS_ILOCK_EXCL);
1619 return error;
1620}
1621
1622
1623STATIC int
1624xfs_zero_file_space(
1625 struct xfs_inode *ip,
1626 xfs_off_t offset,
1627 xfs_off_t len,
1628 int attr_flags)
1629{
1630 struct xfs_mount *mp = ip->i_mount;
1631 uint granularity;
1632 xfs_off_t start_boundary;
1633 xfs_off_t end_boundary;
1634 int error;
1635
1636 granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
1637
1638 /*
1639 * Round the range of extents we are going to convert inwards. If the
1640 * offset is aligned, then it doesn't get changed so we zero from the
1641 * start of the block offset points to.
1642 */
1643 start_boundary = round_up(offset, granularity);
1644 end_boundary = round_down(offset + len, granularity);
1645
1646 ASSERT(start_boundary >= offset);
1647 ASSERT(end_boundary <= offset + len);
1648
1649 if (!(attr_flags & XFS_ATTR_NOLOCK))
1650 xfs_ilock(ip, XFS_IOLOCK_EXCL);
1651
1652 if (start_boundary < end_boundary - 1) {
1653 /* punch out the page cache over the conversion range */
1654 truncate_pagecache_range(VFS_I(ip), start_boundary,
1655 end_boundary - 1);
1656 /* convert the blocks */
1657 error = xfs_alloc_file_space(ip, start_boundary,
1658 end_boundary - start_boundary - 1,
1659 XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT,
1660 attr_flags);
1661 if (error)
1662 goto out_unlock;
1663
1664 /* We've handled the interior of the range, now for the edges */
1665 if (start_boundary != offset)
1666 error = xfs_iozero(ip, offset, start_boundary - offset);
1667 if (error)
1668 goto out_unlock;
1669
1670 if (end_boundary != offset + len)
1671 error = xfs_iozero(ip, end_boundary,
1672 offset + len - end_boundary);
1673
1674 } else {
1675 /*
1676 * It's either a sub-granularity range or the range spanned lies
1677 * partially across two adjacent blocks.
1678 */
1679 error = xfs_iozero(ip, offset, len);
1680 }
1681
1682out_unlock:
1683 if (!(attr_flags & XFS_ATTR_NOLOCK))
1684 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1685 return error;
1686
1687}
1688
1689/*
1690 * xfs_change_file_space()
1691 * This routine allocates or frees disk space for the given file.
1692 * The user specified parameters are checked for alignment and size
1693 * limitations.
1694 *
1695 * RETURNS:
1696 * 0 on success
1697 * errno on error
1698 *
1699 */
1700int
1701xfs_change_file_space(
1702 xfs_inode_t *ip,
1703 int cmd,
1704 xfs_flock64_t *bf,
1705 xfs_off_t offset,
1706 int attr_flags)
1707{
1708 xfs_mount_t *mp = ip->i_mount;
1709 int clrprealloc;
1710 int error;
1711 xfs_fsize_t fsize;
1712 int setprealloc;
1713 xfs_off_t startoffset;
1714 xfs_trans_t *tp;
1715 struct iattr iattr;
1716
1717 if (!S_ISREG(ip->i_d.di_mode))
1718 return XFS_ERROR(EINVAL);
1719
1720 switch (bf->l_whence) {
1721 case 0: /*SEEK_SET*/
1722 break;
1723 case 1: /*SEEK_CUR*/
1724 bf->l_start += offset;
1725 break;
1726 case 2: /*SEEK_END*/
1727 bf->l_start += XFS_ISIZE(ip);
1728 break;
1729 default:
1730 return XFS_ERROR(EINVAL);
1731 }
1732
1733 /*
1734 * length of <= 0 for resv/unresv/zero is invalid. length for
1735 * alloc/free is ignored completely and we have no idea what userspace
1736 * might have set it to, so set it to zero to allow range
1737 * checks to pass.
1738 */
1739 switch (cmd) {
1740 case XFS_IOC_ZERO_RANGE:
1741 case XFS_IOC_RESVSP:
1742 case XFS_IOC_RESVSP64:
1743 case XFS_IOC_UNRESVSP:
1744 case XFS_IOC_UNRESVSP64:
1745 if (bf->l_len <= 0)
1746 return XFS_ERROR(EINVAL);
1747 break;
1748 default:
1749 bf->l_len = 0;
1750 break;
1751 }
1752
1753 if (bf->l_start < 0 ||
1754 bf->l_start > mp->m_super->s_maxbytes ||
1755 bf->l_start + bf->l_len < 0 ||
1756 bf->l_start + bf->l_len >= mp->m_super->s_maxbytes)
1757 return XFS_ERROR(EINVAL);
1758
1759 bf->l_whence = 0;
1760
1761 startoffset = bf->l_start;
1762 fsize = XFS_ISIZE(ip);
1763
1764 setprealloc = clrprealloc = 0;
1765 switch (cmd) {
1766 case XFS_IOC_ZERO_RANGE:
1767 error = xfs_zero_file_space(ip, startoffset, bf->l_len,
1768 attr_flags);
1769 if (error)
1770 return error;
1771 setprealloc = 1;
1772 break;
1773
1774 case XFS_IOC_RESVSP:
1775 case XFS_IOC_RESVSP64:
1776 error = xfs_alloc_file_space(ip, startoffset, bf->l_len,
1777 XFS_BMAPI_PREALLOC, attr_flags);
1778 if (error)
1779 return error;
1780 setprealloc = 1;
1781 break;
1782
1783 case XFS_IOC_UNRESVSP:
1784 case XFS_IOC_UNRESVSP64:
1785 if ((error = xfs_free_file_space(ip, startoffset, bf->l_len,
1786 attr_flags)))
1787 return error;
1788 break;
1789
1790 case XFS_IOC_ALLOCSP:
1791 case XFS_IOC_ALLOCSP64:
1792 case XFS_IOC_FREESP:
1793 case XFS_IOC_FREESP64:
1794 /*
1795 * These operations actually do IO when extending the file, but
1796 * the allocation is done seperately to the zeroing that is
1797 * done. This set of operations need to be serialised against
1798 * other IO operations, such as truncate and buffered IO. We
1799 * need to take the IOLOCK here to serialise the allocation and
1800 * zeroing IO to prevent other IOLOCK holders (e.g. getbmap,
1801 * truncate, direct IO) from racing against the transient
1802 * allocated but not written state we can have here.
1803 */
1804 xfs_ilock(ip, XFS_IOLOCK_EXCL);
1805 if (startoffset > fsize) {
1806 error = xfs_alloc_file_space(ip, fsize,
1807 startoffset - fsize, 0,
1808 attr_flags | XFS_ATTR_NOLOCK);
1809 if (error) {
1810 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1811 break;
1812 }
1813 }
1814
1815 iattr.ia_valid = ATTR_SIZE;
1816 iattr.ia_size = startoffset;
1817
1818 error = xfs_setattr_size(ip, &iattr,
1819 attr_flags | XFS_ATTR_NOLOCK);
1820 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1821
1822 if (error)
1823 return error;
1824
1825 clrprealloc = 1;
1826 break;
1827
1828 default:
1829 ASSERT(0);
1830 return XFS_ERROR(EINVAL);
1831 }
1832
1833 /*
1834 * update the inode timestamp, mode, and prealloc flag bits
1835 */
1836 tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID);
1837
1838 if ((error = xfs_trans_reserve(tp, 0, XFS_WRITEID_LOG_RES(mp),
1839 0, 0, 0))) {
1840 /* ASSERT(0); */
1841 xfs_trans_cancel(tp, 0);
1842 return error;
1843 }
1844
1845 xfs_ilock(ip, XFS_ILOCK_EXCL);
1846 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1847
1848 if ((attr_flags & XFS_ATTR_DMI) == 0) {
1849 ip->i_d.di_mode &= ~S_ISUID;
1850
1851 /*
1852 * Note that we don't have to worry about mandatory
1853 * file locking being disabled here because we only
1854 * clear the S_ISGID bit if the Group execute bit is
1855 * on, but if it was on then mandatory locking wouldn't
1856 * have been enabled.
1857 */
1858 if (ip->i_d.di_mode & S_IXGRP)
1859 ip->i_d.di_mode &= ~S_ISGID;
1860
1861 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
1862 }
1863 if (setprealloc)
1864 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
1865 else if (clrprealloc)
1866 ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
1867
1868 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1869 if (attr_flags & XFS_ATTR_SYNC)
1870 xfs_trans_set_sync(tp);
1871 return xfs_trans_commit(tp, 0);
1872}