aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2006-01-12 12:10:34 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-12 12:10:34 -0500
commit9f5974c8734d83d4ab7096ed98136a82f41210d6 (patch)
tree6f328555796bafefb74936ab68128aa84efd28b1 /fs/xfs
parenta2d823bf13efea4c859376f6e85c49cfbad7ab60 (diff)
parentddae9c2ea79449beb00027cf77fca6dc489f2d15 (diff)
Merge git://oss.sgi.com:8090/oss/git/xfs-2.6
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c1088
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h10
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c1373
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h696
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c121
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.h5
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h6
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c56
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.h18
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c19
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h19
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c4
-rw-r--r--fs/xfs/quota/xfs_qm.c18
-rw-r--r--fs/xfs/support/debug.c60
-rw-r--r--fs/xfs/support/debug.h25
-rw-r--r--fs/xfs/support/uuid.c23
-rw-r--r--fs/xfs/xfs_arch.h22
-rw-r--r--fs/xfs/xfs_attr_leaf.c12
-rw-r--r--fs/xfs/xfs_attr_leaf.h79
-rw-r--r--fs/xfs/xfs_bmap.c412
-rw-r--r--fs/xfs/xfs_bmap.h7
-rw-r--r--fs/xfs/xfs_clnt.h2
-rw-r--r--fs/xfs/xfs_dfrag.c16
-rw-r--r--fs/xfs/xfs_dinode.h22
-rw-r--r--fs/xfs/xfs_dir.c2
-rw-r--r--fs/xfs/xfs_dir.h2
-rw-r--r--fs/xfs/xfs_dir2.h3
-rw-r--r--fs/xfs/xfs_dir_leaf.h64
-rw-r--r--fs/xfs/xfs_error.c1
-rw-r--r--fs/xfs/xfs_error.h8
-rw-r--r--fs/xfs/xfs_fs.h10
-rw-r--r--fs/xfs/xfs_fsops.c26
-rw-r--r--fs/xfs/xfs_fsops.h1
-rw-r--r--fs/xfs/xfs_iget.c5
-rw-r--r--fs/xfs/xfs_inode.c61
-rw-r--r--fs/xfs/xfs_inode.h4
-rw-r--r--fs/xfs/xfs_inode_item.c9
-rw-r--r--fs/xfs/xfs_iomap.c425
-rw-r--r--fs/xfs/xfs_itable.c5
-rw-r--r--fs/xfs/xfs_log.c123
-rw-r--r--fs/xfs/xfs_log.h11
-rw-r--r--fs/xfs/xfs_log_priv.h77
-rw-r--r--fs/xfs/xfs_log_recover.c12
-rw-r--r--fs/xfs/xfs_mount.c5
-rw-r--r--fs/xfs/xfs_mount.h3
-rw-r--r--fs/xfs/xfs_rename.c7
-rw-r--r--fs/xfs/xfs_rw.c9
-rw-r--r--fs/xfs/xfs_sb.h17
-rw-r--r--fs/xfs/xfs_trans.c14
-rw-r--r--fs/xfs/xfs_trans.h1
-rw-r--r--fs/xfs/xfs_utils.c9
-rw-r--r--fs/xfs/xfs_vfsops.c50
-rw-r--r--fs/xfs/xfs_vnodeops.c193
57 files changed, 2717 insertions, 2572 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 94d3cdfbf9b8..d1db8c17a74e 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -40,11 +40,10 @@
40#include "xfs_rw.h" 40#include "xfs_rw.h"
41#include "xfs_iomap.h" 41#include "xfs_iomap.h"
42#include <linux/mpage.h> 42#include <linux/mpage.h>
43#include <linux/pagevec.h>
43#include <linux/writeback.h> 44#include <linux/writeback.h>
44 45
45STATIC void xfs_count_page_state(struct page *, int *, int *, int *); 46STATIC void xfs_count_page_state(struct page *, int *, int *, int *);
46STATIC void xfs_convert_page(struct inode *, struct page *, xfs_iomap_t *,
47 struct writeback_control *wbc, void *, int, int);
48 47
49#if defined(XFS_RW_TRACE) 48#if defined(XFS_RW_TRACE)
50void 49void
@@ -55,17 +54,15 @@ xfs_page_trace(
55 int mask) 54 int mask)
56{ 55{
57 xfs_inode_t *ip; 56 xfs_inode_t *ip;
58 bhv_desc_t *bdp;
59 vnode_t *vp = LINVFS_GET_VP(inode); 57 vnode_t *vp = LINVFS_GET_VP(inode);
60 loff_t isize = i_size_read(inode); 58 loff_t isize = i_size_read(inode);
61 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT; 59 loff_t offset = page_offset(page);
62 int delalloc = -1, unmapped = -1, unwritten = -1; 60 int delalloc = -1, unmapped = -1, unwritten = -1;
63 61
64 if (page_has_buffers(page)) 62 if (page_has_buffers(page))
65 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); 63 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
66 64
67 bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops); 65 ip = xfs_vtoi(vp);
68 ip = XFS_BHVTOI(bdp);
69 if (!ip->i_rwtrace) 66 if (!ip->i_rwtrace)
70 return; 67 return;
71 68
@@ -103,15 +100,56 @@ xfs_finish_ioend(
103 queue_work(xfsdatad_workqueue, &ioend->io_work); 100 queue_work(xfsdatad_workqueue, &ioend->io_work);
104} 101}
105 102
103/*
104 * We're now finished for good with this ioend structure.
105 * Update the page state via the associated buffer_heads,
106 * release holds on the inode and bio, and finally free
107 * up memory. Do not use the ioend after this.
108 */
106STATIC void 109STATIC void
107xfs_destroy_ioend( 110xfs_destroy_ioend(
108 xfs_ioend_t *ioend) 111 xfs_ioend_t *ioend)
109{ 112{
113 struct buffer_head *bh, *next;
114
115 for (bh = ioend->io_buffer_head; bh; bh = next) {
116 next = bh->b_private;
117 bh->b_end_io(bh, ioend->io_uptodate);
118 }
119
110 vn_iowake(ioend->io_vnode); 120 vn_iowake(ioend->io_vnode);
111 mempool_free(ioend, xfs_ioend_pool); 121 mempool_free(ioend, xfs_ioend_pool);
112} 122}
113 123
114/* 124/*
125 * Buffered IO write completion for delayed allocate extents.
126 * TODO: Update ondisk isize now that we know the file data
127 * has been flushed (i.e. the notorious "NULL file" problem).
128 */
129STATIC void
130xfs_end_bio_delalloc(
131 void *data)
132{
133 xfs_ioend_t *ioend = data;
134
135 xfs_destroy_ioend(ioend);
136}
137
138/*
139 * Buffered IO write completion for regular, written extents.
140 */
141STATIC void
142xfs_end_bio_written(
143 void *data)
144{
145 xfs_ioend_t *ioend = data;
146
147 xfs_destroy_ioend(ioend);
148}
149
150/*
151 * IO write completion for unwritten extents.
152 *
115 * Issue transactions to convert a buffer range from unwritten 153 * Issue transactions to convert a buffer range from unwritten
116 * to written extents. 154 * to written extents.
117 */ 155 */
@@ -123,21 +161,10 @@ xfs_end_bio_unwritten(
123 vnode_t *vp = ioend->io_vnode; 161 vnode_t *vp = ioend->io_vnode;
124 xfs_off_t offset = ioend->io_offset; 162 xfs_off_t offset = ioend->io_offset;
125 size_t size = ioend->io_size; 163 size_t size = ioend->io_size;
126 struct buffer_head *bh, *next;
127 int error; 164 int error;
128 165
129 if (ioend->io_uptodate) 166 if (ioend->io_uptodate)
130 VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); 167 VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
131
132 /* ioend->io_buffer_head is only non-NULL for buffered I/O */
133 for (bh = ioend->io_buffer_head; bh; bh = next) {
134 next = bh->b_private;
135
136 bh->b_end_io = NULL;
137 clear_buffer_unwritten(bh);
138 end_buffer_async_write(bh, ioend->io_uptodate);
139 }
140
141 xfs_destroy_ioend(ioend); 168 xfs_destroy_ioend(ioend);
142} 169}
143 170
@@ -149,7 +176,8 @@ xfs_end_bio_unwritten(
149 */ 176 */
150STATIC xfs_ioend_t * 177STATIC xfs_ioend_t *
151xfs_alloc_ioend( 178xfs_alloc_ioend(
152 struct inode *inode) 179 struct inode *inode,
180 unsigned int type)
153{ 181{
154 xfs_ioend_t *ioend; 182 xfs_ioend_t *ioend;
155 183
@@ -162,45 +190,25 @@ xfs_alloc_ioend(
162 */ 190 */
163 atomic_set(&ioend->io_remaining, 1); 191 atomic_set(&ioend->io_remaining, 1);
164 ioend->io_uptodate = 1; /* cleared if any I/O fails */ 192 ioend->io_uptodate = 1; /* cleared if any I/O fails */
193 ioend->io_list = NULL;
194 ioend->io_type = type;
165 ioend->io_vnode = LINVFS_GET_VP(inode); 195 ioend->io_vnode = LINVFS_GET_VP(inode);
166 ioend->io_buffer_head = NULL; 196 ioend->io_buffer_head = NULL;
197 ioend->io_buffer_tail = NULL;
167 atomic_inc(&ioend->io_vnode->v_iocount); 198 atomic_inc(&ioend->io_vnode->v_iocount);
168 ioend->io_offset = 0; 199 ioend->io_offset = 0;
169 ioend->io_size = 0; 200 ioend->io_size = 0;
170 201
171 INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend); 202 if (type == IOMAP_UNWRITTEN)
203 INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
204 else if (type == IOMAP_DELAY)
205 INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc, ioend);
206 else
207 INIT_WORK(&ioend->io_work, xfs_end_bio_written, ioend);
172 208
173 return ioend; 209 return ioend;
174} 210}
175 211
176void
177linvfs_unwritten_done(
178 struct buffer_head *bh,
179 int uptodate)
180{
181 xfs_ioend_t *ioend = bh->b_private;
182 static spinlock_t unwritten_done_lock = SPIN_LOCK_UNLOCKED;
183 unsigned long flags;
184
185 ASSERT(buffer_unwritten(bh));
186 bh->b_end_io = NULL;
187
188 if (!uptodate)
189 ioend->io_uptodate = 0;
190
191 /*
192 * Deep magic here. We reuse b_private in the buffer_heads to build
193 * a chain for completing the I/O from user context after we've issued
194 * a transaction to convert the unwritten extent.
195 */
196 spin_lock_irqsave(&unwritten_done_lock, flags);
197 bh->b_private = ioend->io_buffer_head;
198 ioend->io_buffer_head = bh;
199 spin_unlock_irqrestore(&unwritten_done_lock, flags);
200
201 xfs_finish_ioend(ioend);
202}
203
204STATIC int 212STATIC int
205xfs_map_blocks( 213xfs_map_blocks(
206 struct inode *inode, 214 struct inode *inode,
@@ -218,138 +226,260 @@ xfs_map_blocks(
218 return -error; 226 return -error;
219} 227}
220 228
229STATIC inline int
230xfs_iomap_valid(
231 xfs_iomap_t *iomapp,
232 loff_t offset)
233{
234 return offset >= iomapp->iomap_offset &&
235 offset < iomapp->iomap_offset + iomapp->iomap_bsize;
236}
237
221/* 238/*
222 * Finds the corresponding mapping in block @map array of the 239 * BIO completion handler for buffered IO.
223 * given @offset within a @page.
224 */ 240 */
225STATIC xfs_iomap_t * 241STATIC int
226xfs_offset_to_map( 242xfs_end_bio(
243 struct bio *bio,
244 unsigned int bytes_done,
245 int error)
246{
247 xfs_ioend_t *ioend = bio->bi_private;
248
249 if (bio->bi_size)
250 return 1;
251
252 ASSERT(ioend);
253 ASSERT(atomic_read(&bio->bi_cnt) >= 1);
254
255 /* Toss bio and pass work off to an xfsdatad thread */
256 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
257 ioend->io_uptodate = 0;
258 bio->bi_private = NULL;
259 bio->bi_end_io = NULL;
260
261 bio_put(bio);
262 xfs_finish_ioend(ioend);
263 return 0;
264}
265
266STATIC void
267xfs_submit_ioend_bio(
268 xfs_ioend_t *ioend,
269 struct bio *bio)
270{
271 atomic_inc(&ioend->io_remaining);
272
273 bio->bi_private = ioend;
274 bio->bi_end_io = xfs_end_bio;
275
276 submit_bio(WRITE, bio);
277 ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));
278 bio_put(bio);
279}
280
281STATIC struct bio *
282xfs_alloc_ioend_bio(
283 struct buffer_head *bh)
284{
285 struct bio *bio;
286 int nvecs = bio_get_nr_vecs(bh->b_bdev);
287
288 do {
289 bio = bio_alloc(GFP_NOIO, nvecs);
290 nvecs >>= 1;
291 } while (!bio);
292
293 ASSERT(bio->bi_private == NULL);
294 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
295 bio->bi_bdev = bh->b_bdev;
296 bio_get(bio);
297 return bio;
298}
299
300STATIC void
301xfs_start_buffer_writeback(
302 struct buffer_head *bh)
303{
304 ASSERT(buffer_mapped(bh));
305 ASSERT(buffer_locked(bh));
306 ASSERT(!buffer_delay(bh));
307 ASSERT(!buffer_unwritten(bh));
308
309 mark_buffer_async_write(bh);
310 set_buffer_uptodate(bh);
311 clear_buffer_dirty(bh);
312}
313
314STATIC void
315xfs_start_page_writeback(
227 struct page *page, 316 struct page *page,
228 xfs_iomap_t *iomapp, 317 struct writeback_control *wbc,
229 unsigned long offset) 318 int clear_dirty,
319 int buffers)
320{
321 ASSERT(PageLocked(page));
322 ASSERT(!PageWriteback(page));
323 set_page_writeback(page);
324 if (clear_dirty)
325 clear_page_dirty(page);
326 unlock_page(page);
327 if (!buffers) {
328 end_page_writeback(page);
329 wbc->pages_skipped++; /* We didn't write this page */
330 }
331}
332
333static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
334{
335 return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
336}
337
338/*
339 * Submit all of the bios for all of the ioends we have saved up,
340 * covering the initial writepage page and also any probed pages.
341 */
342STATIC void
343xfs_submit_ioend(
344 xfs_ioend_t *ioend)
345{
346 xfs_ioend_t *next;
347 struct buffer_head *bh;
348 struct bio *bio;
349 sector_t lastblock = 0;
350
351 do {
352 next = ioend->io_list;
353 bio = NULL;
354
355 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
356 xfs_start_buffer_writeback(bh);
357
358 if (!bio) {
359 retry:
360 bio = xfs_alloc_ioend_bio(bh);
361 } else if (bh->b_blocknr != lastblock + 1) {
362 xfs_submit_ioend_bio(ioend, bio);
363 goto retry;
364 }
365
366 if (bio_add_buffer(bio, bh) != bh->b_size) {
367 xfs_submit_ioend_bio(ioend, bio);
368 goto retry;
369 }
370
371 lastblock = bh->b_blocknr;
372 }
373 if (bio)
374 xfs_submit_ioend_bio(ioend, bio);
375 xfs_finish_ioend(ioend);
376 } while ((ioend = next) != NULL);
377}
378
379/*
380 * Cancel submission of all buffer_heads so far in this endio.
381 * Toss the endio too. Only ever called for the initial page
382 * in a writepage request, so only ever one page.
383 */
384STATIC void
385xfs_cancel_ioend(
386 xfs_ioend_t *ioend)
387{
388 xfs_ioend_t *next;
389 struct buffer_head *bh, *next_bh;
390
391 do {
392 next = ioend->io_list;
393 bh = ioend->io_buffer_head;
394 do {
395 next_bh = bh->b_private;
396 clear_buffer_async_write(bh);
397 unlock_buffer(bh);
398 } while ((bh = next_bh) != NULL);
399
400 vn_iowake(ioend->io_vnode);
401 mempool_free(ioend, xfs_ioend_pool);
402 } while ((ioend = next) != NULL);
403}
404
405/*
406 * Test to see if we've been building up a completion structure for
407 * earlier buffers -- if so, we try to append to this ioend if we
408 * can, otherwise we finish off any current ioend and start another.
409 * Return true if we've finished the given ioend.
410 */
411STATIC void
412xfs_add_to_ioend(
413 struct inode *inode,
414 struct buffer_head *bh,
415 xfs_off_t offset,
416 unsigned int type,
417 xfs_ioend_t **result,
418 int need_ioend)
230{ 419{
231 loff_t full_offset; /* offset from start of file */ 420 xfs_ioend_t *ioend = *result;
232 421
233 ASSERT(offset < PAGE_CACHE_SIZE); 422 if (!ioend || need_ioend || type != ioend->io_type) {
423 xfs_ioend_t *previous = *result;
234 424
235 full_offset = page->index; /* NB: using 64bit number */ 425 ioend = xfs_alloc_ioend(inode, type);
236 full_offset <<= PAGE_CACHE_SHIFT; /* offset from file start */ 426 ioend->io_offset = offset;
237 full_offset += offset; /* offset from page start */ 427 ioend->io_buffer_head = bh;
428 ioend->io_buffer_tail = bh;
429 if (previous)
430 previous->io_list = ioend;
431 *result = ioend;
432 } else {
433 ioend->io_buffer_tail->b_private = bh;
434 ioend->io_buffer_tail = bh;
435 }
238 436
239 if (full_offset < iomapp->iomap_offset) 437 bh->b_private = NULL;
240 return NULL; 438 ioend->io_size += bh->b_size;
241 if (iomapp->iomap_offset + (iomapp->iomap_bsize -1) >= full_offset)
242 return iomapp;
243 return NULL;
244} 439}
245 440
246STATIC void 441STATIC void
247xfs_map_at_offset( 442xfs_map_at_offset(
248 struct page *page,
249 struct buffer_head *bh, 443 struct buffer_head *bh,
250 unsigned long offset, 444 loff_t offset,
251 int block_bits, 445 int block_bits,
252 xfs_iomap_t *iomapp) 446 xfs_iomap_t *iomapp)
253{ 447{
254 xfs_daddr_t bn; 448 xfs_daddr_t bn;
255 loff_t delta;
256 int sector_shift; 449 int sector_shift;
257 450
258 ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); 451 ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));
259 ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); 452 ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));
260 ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL); 453 ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL);
261 454
262 delta = page->index;
263 delta <<= PAGE_CACHE_SHIFT;
264 delta += offset;
265 delta -= iomapp->iomap_offset;
266 delta >>= block_bits;
267
268 sector_shift = block_bits - BBSHIFT; 455 sector_shift = block_bits - BBSHIFT;
269 bn = iomapp->iomap_bn >> sector_shift; 456 bn = (iomapp->iomap_bn >> sector_shift) +
270 bn += delta; 457 ((offset - iomapp->iomap_offset) >> block_bits);
271 BUG_ON(!bn && !(iomapp->iomap_flags & IOMAP_REALTIME)); 458
459 ASSERT(bn || (iomapp->iomap_flags & IOMAP_REALTIME));
272 ASSERT((bn << sector_shift) >= iomapp->iomap_bn); 460 ASSERT((bn << sector_shift) >= iomapp->iomap_bn);
273 461
274 lock_buffer(bh); 462 lock_buffer(bh);
275 bh->b_blocknr = bn; 463 bh->b_blocknr = bn;
276 bh->b_bdev = iomapp->iomap_target->pbr_bdev; 464 bh->b_bdev = iomapp->iomap_target->bt_bdev;
277 set_buffer_mapped(bh); 465 set_buffer_mapped(bh);
278 clear_buffer_delay(bh); 466 clear_buffer_delay(bh);
467 clear_buffer_unwritten(bh);
279} 468}
280 469
281/* 470/*
282 * Look for a page at index which is unlocked and contains our 471 * Look for a page at index that is suitable for clustering.
283 * unwritten extent flagged buffers at its head. Returns page
284 * locked and with an extra reference count, and length of the
285 * unwritten extent component on this page that we can write,
286 * in units of filesystem blocks.
287 */
288STATIC struct page *
289xfs_probe_unwritten_page(
290 struct address_space *mapping,
291 pgoff_t index,
292 xfs_iomap_t *iomapp,
293 xfs_ioend_t *ioend,
294 unsigned long max_offset,
295 unsigned long *fsbs,
296 unsigned int bbits)
297{
298 struct page *page;
299
300 page = find_trylock_page(mapping, index);
301 if (!page)
302 return NULL;
303 if (PageWriteback(page))
304 goto out;
305
306 if (page->mapping && page_has_buffers(page)) {
307 struct buffer_head *bh, *head;
308 unsigned long p_offset = 0;
309
310 *fsbs = 0;
311 bh = head = page_buffers(page);
312 do {
313 if (!buffer_unwritten(bh) || !buffer_uptodate(bh))
314 break;
315 if (!xfs_offset_to_map(page, iomapp, p_offset))
316 break;
317 if (p_offset >= max_offset)
318 break;
319 xfs_map_at_offset(page, bh, p_offset, bbits, iomapp);
320 set_buffer_unwritten_io(bh);
321 bh->b_private = ioend;
322 p_offset += bh->b_size;
323 (*fsbs)++;
324 } while ((bh = bh->b_this_page) != head);
325
326 if (p_offset)
327 return page;
328 }
329
330out:
331 unlock_page(page);
332 return NULL;
333}
334
335/*
336 * Look for a page at index which is unlocked and not mapped
337 * yet - clustering for mmap write case.
338 */ 472 */
339STATIC unsigned int 473STATIC unsigned int
340xfs_probe_unmapped_page( 474xfs_probe_page(
341 struct address_space *mapping, 475 struct page *page,
342 pgoff_t index, 476 unsigned int pg_offset,
343 unsigned int pg_offset) 477 int mapped)
344{ 478{
345 struct page *page;
346 int ret = 0; 479 int ret = 0;
347 480
348 page = find_trylock_page(mapping, index);
349 if (!page)
350 return 0;
351 if (PageWriteback(page)) 481 if (PageWriteback(page))
352 goto out; 482 return 0;
353 483
354 if (page->mapping && PageDirty(page)) { 484 if (page->mapping && PageDirty(page)) {
355 if (page_has_buffers(page)) { 485 if (page_has_buffers(page)) {
@@ -357,79 +487,101 @@ xfs_probe_unmapped_page(
357 487
358 bh = head = page_buffers(page); 488 bh = head = page_buffers(page);
359 do { 489 do {
360 if (buffer_mapped(bh) || !buffer_uptodate(bh)) 490 if (!buffer_uptodate(bh))
491 break;
492 if (mapped != buffer_mapped(bh))
361 break; 493 break;
362 ret += bh->b_size; 494 ret += bh->b_size;
363 if (ret >= pg_offset) 495 if (ret >= pg_offset)
364 break; 496 break;
365 } while ((bh = bh->b_this_page) != head); 497 } while ((bh = bh->b_this_page) != head);
366 } else 498 } else
367 ret = PAGE_CACHE_SIZE; 499 ret = mapped ? 0 : PAGE_CACHE_SIZE;
368 } 500 }
369 501
370out:
371 unlock_page(page);
372 return ret; 502 return ret;
373} 503}
374 504
375STATIC unsigned int 505STATIC size_t
376xfs_probe_unmapped_cluster( 506xfs_probe_cluster(
377 struct inode *inode, 507 struct inode *inode,
378 struct page *startpage, 508 struct page *startpage,
379 struct buffer_head *bh, 509 struct buffer_head *bh,
380 struct buffer_head *head) 510 struct buffer_head *head,
511 int mapped)
381{ 512{
513 struct pagevec pvec;
382 pgoff_t tindex, tlast, tloff; 514 pgoff_t tindex, tlast, tloff;
383 unsigned int pg_offset, len, total = 0; 515 size_t total = 0;
384 struct address_space *mapping = inode->i_mapping; 516 int done = 0, i;
385 517
386 /* First sum forwards in this page */ 518 /* First sum forwards in this page */
387 do { 519 do {
388 if (buffer_mapped(bh)) 520 if (mapped != buffer_mapped(bh))
389 break; 521 return total;
390 total += bh->b_size; 522 total += bh->b_size;
391 } while ((bh = bh->b_this_page) != head); 523 } while ((bh = bh->b_this_page) != head);
392 524
393 /* If we reached the end of the page, sum forwards in 525 /* if we reached the end of the page, sum forwards in following pages */
394 * following pages. 526 tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
395 */ 527 tindex = startpage->index + 1;
396 if (bh == head) { 528
397 tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; 529 /* Prune this back to avoid pathological behavior */
398 /* Prune this back to avoid pathological behavior */ 530 tloff = min(tlast, startpage->index + 64);
399 tloff = min(tlast, startpage->index + 64); 531
400 for (tindex = startpage->index + 1; tindex < tloff; tindex++) { 532 pagevec_init(&pvec, 0);
401 len = xfs_probe_unmapped_page(mapping, tindex, 533 while (!done && tindex <= tloff) {
402 PAGE_CACHE_SIZE); 534 unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
403 if (!len) 535
404 return total; 536 if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
537 break;
538
539 for (i = 0; i < pagevec_count(&pvec); i++) {
540 struct page *page = pvec.pages[i];
541 size_t pg_offset, len = 0;
542
543 if (tindex == tlast) {
544 pg_offset =
545 i_size_read(inode) & (PAGE_CACHE_SIZE - 1);
546 if (!pg_offset) {
547 done = 1;
548 break;
549 }
550 } else
551 pg_offset = PAGE_CACHE_SIZE;
552
553 if (page->index == tindex && !TestSetPageLocked(page)) {
554 len = xfs_probe_page(page, pg_offset, mapped);
555 unlock_page(page);
556 }
557
558 if (!len) {
559 done = 1;
560 break;
561 }
562
405 total += len; 563 total += len;
564 tindex++;
406 } 565 }
407 if (tindex == tlast && 566
408 (pg_offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { 567 pagevec_release(&pvec);
409 total += xfs_probe_unmapped_page(mapping, 568 cond_resched();
410 tindex, pg_offset);
411 }
412 } 569 }
570
413 return total; 571 return total;
414} 572}
415 573
416/* 574/*
417 * Probe for a given page (index) in the inode and test if it is delayed 575 * Test if a given page is suitable for writing as part of an unwritten
418 * and without unwritten buffers. Returns page locked and with an extra 576 * or delayed allocate extent.
419 * reference count.
420 */ 577 */
421STATIC struct page * 578STATIC int
422xfs_probe_delalloc_page( 579xfs_is_delayed_page(
423 struct inode *inode, 580 struct page *page,
424 pgoff_t index) 581 unsigned int type)
425{ 582{
426 struct page *page;
427
428 page = find_trylock_page(inode->i_mapping, index);
429 if (!page)
430 return NULL;
431 if (PageWriteback(page)) 583 if (PageWriteback(page))
432 goto out; 584 return 0;
433 585
434 if (page->mapping && page_has_buffers(page)) { 586 if (page->mapping && page_has_buffers(page)) {
435 struct buffer_head *bh, *head; 587 struct buffer_head *bh, *head;
@@ -437,243 +589,156 @@ xfs_probe_delalloc_page(
437 589
438 bh = head = page_buffers(page); 590 bh = head = page_buffers(page);
439 do { 591 do {
440 if (buffer_unwritten(bh)) { 592 if (buffer_unwritten(bh))
441 acceptable = 0; 593 acceptable = (type == IOMAP_UNWRITTEN);
594 else if (buffer_delay(bh))
595 acceptable = (type == IOMAP_DELAY);
596 else if (buffer_mapped(bh))
597 acceptable = (type == 0);
598 else
442 break; 599 break;
443 } else if (buffer_delay(bh)) {
444 acceptable = 1;
445 }
446 } while ((bh = bh->b_this_page) != head); 600 } while ((bh = bh->b_this_page) != head);
447 601
448 if (acceptable) 602 if (acceptable)
449 return page; 603 return 1;
450 }
451
452out:
453 unlock_page(page);
454 return NULL;
455}
456
457STATIC int
458xfs_map_unwritten(
459 struct inode *inode,
460 struct page *start_page,
461 struct buffer_head *head,
462 struct buffer_head *curr,
463 unsigned long p_offset,
464 int block_bits,
465 xfs_iomap_t *iomapp,
466 struct writeback_control *wbc,
467 int startio,
468 int all_bh)
469{
470 struct buffer_head *bh = curr;
471 xfs_iomap_t *tmp;
472 xfs_ioend_t *ioend;
473 loff_t offset;
474 unsigned long nblocks = 0;
475
476 offset = start_page->index;
477 offset <<= PAGE_CACHE_SHIFT;
478 offset += p_offset;
479
480 ioend = xfs_alloc_ioend(inode);
481
482 /* First map forwards in the page consecutive buffers
483 * covering this unwritten extent
484 */
485 do {
486 if (!buffer_unwritten(bh))
487 break;
488 tmp = xfs_offset_to_map(start_page, iomapp, p_offset);
489 if (!tmp)
490 break;
491 xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp);
492 set_buffer_unwritten_io(bh);
493 bh->b_private = ioend;
494 p_offset += bh->b_size;
495 nblocks++;
496 } while ((bh = bh->b_this_page) != head);
497
498 atomic_add(nblocks, &ioend->io_remaining);
499
500 /* If we reached the end of the page, map forwards in any
501 * following pages which are also covered by this extent.
502 */
503 if (bh == head) {
504 struct address_space *mapping = inode->i_mapping;
505 pgoff_t tindex, tloff, tlast;
506 unsigned long bs;
507 unsigned int pg_offset, bbits = inode->i_blkbits;
508 struct page *page;
509
510 tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
511 tloff = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT;
512 tloff = min(tlast, tloff);
513 for (tindex = start_page->index + 1; tindex < tloff; tindex++) {
514 page = xfs_probe_unwritten_page(mapping,
515 tindex, iomapp, ioend,
516 PAGE_CACHE_SIZE, &bs, bbits);
517 if (!page)
518 break;
519 nblocks += bs;
520 atomic_add(bs, &ioend->io_remaining);
521 xfs_convert_page(inode, page, iomapp, wbc, ioend,
522 startio, all_bh);
523 /* stop if converting the next page might add
524 * enough blocks that the corresponding byte
525 * count won't fit in our ulong page buf length */
526 if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
527 goto enough;
528 }
529
530 if (tindex == tlast &&
531 (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {
532 page = xfs_probe_unwritten_page(mapping,
533 tindex, iomapp, ioend,
534 pg_offset, &bs, bbits);
535 if (page) {
536 nblocks += bs;
537 atomic_add(bs, &ioend->io_remaining);
538 xfs_convert_page(inode, page, iomapp, wbc, ioend,
539 startio, all_bh);
540 if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
541 goto enough;
542 }
543 }
544 } 604 }
545 605
546enough:
547 ioend->io_size = (xfs_off_t)nblocks << block_bits;
548 ioend->io_offset = offset;
549 xfs_finish_ioend(ioend);
550 return 0; 606 return 0;
551} 607}
552 608
553STATIC void
554xfs_submit_page(
555 struct page *page,
556 struct writeback_control *wbc,
557 struct buffer_head *bh_arr[],
558 int bh_count,
559 int probed_page,
560 int clear_dirty)
561{
562 struct buffer_head *bh;
563 int i;
564
565 BUG_ON(PageWriteback(page));
566 if (bh_count)
567 set_page_writeback(page);
568 if (clear_dirty)
569 clear_page_dirty(page);
570 unlock_page(page);
571
572 if (bh_count) {
573 for (i = 0; i < bh_count; i++) {
574 bh = bh_arr[i];
575 mark_buffer_async_write(bh);
576 if (buffer_unwritten(bh))
577 set_buffer_unwritten_io(bh);
578 set_buffer_uptodate(bh);
579 clear_buffer_dirty(bh);
580 }
581
582 for (i = 0; i < bh_count; i++)
583 submit_bh(WRITE, bh_arr[i]);
584
585 if (probed_page && clear_dirty)
586 wbc->nr_to_write--; /* Wrote an "extra" page */
587 }
588}
589
590/* 609/*
591 * Allocate & map buffers for page given the extent map. Write it out. 610 * Allocate & map buffers for page given the extent map. Write it out.
592 * except for the original page of a writepage, this is called on 611 * except for the original page of a writepage, this is called on
593 * delalloc/unwritten pages only, for the original page it is possible 612 * delalloc/unwritten pages only, for the original page it is possible
594 * that the page has no mapping at all. 613 * that the page has no mapping at all.
595 */ 614 */
596STATIC void 615STATIC int
597xfs_convert_page( 616xfs_convert_page(
598 struct inode *inode, 617 struct inode *inode,
599 struct page *page, 618 struct page *page,
600 xfs_iomap_t *iomapp, 619 loff_t tindex,
620 xfs_iomap_t *mp,
621 xfs_ioend_t **ioendp,
601 struct writeback_control *wbc, 622 struct writeback_control *wbc,
602 void *private,
603 int startio, 623 int startio,
604 int all_bh) 624 int all_bh)
605{ 625{
606 struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; 626 struct buffer_head *bh, *head;
607 xfs_iomap_t *mp = iomapp, *tmp; 627 xfs_off_t end_offset;
608 unsigned long offset, end_offset; 628 unsigned long p_offset;
609 int index = 0; 629 unsigned int type;
610 int bbits = inode->i_blkbits; 630 int bbits = inode->i_blkbits;
611 int len, page_dirty; 631 int len, page_dirty;
632 int count = 0, done = 0, uptodate = 1;
633 xfs_off_t offset = page_offset(page);
612 634
613 end_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)); 635 if (page->index != tindex)
636 goto fail;
637 if (TestSetPageLocked(page))
638 goto fail;
639 if (PageWriteback(page))
640 goto fail_unlock_page;
641 if (page->mapping != inode->i_mapping)
642 goto fail_unlock_page;
643 if (!xfs_is_delayed_page(page, (*ioendp)->io_type))
644 goto fail_unlock_page;
614 645
615 /* 646 /*
616 * page_dirty is initially a count of buffers on the page before 647 * page_dirty is initially a count of buffers on the page before
617 * EOF and is decrememted as we move each into a cleanable state. 648 * EOF and is decrememted as we move each into a cleanable state.
649 *
650 * Derivation:
651 *
652 * End offset is the highest offset that this page should represent.
653 * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
654 * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
655 * hence give us the correct page_dirty count. On any other page,
656 * it will be zero and in that case we need page_dirty to be the
657 * count of buffers on the page.
618 */ 658 */
659 end_offset = min_t(unsigned long long,
660 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
661 i_size_read(inode));
662
619 len = 1 << inode->i_blkbits; 663 len = 1 << inode->i_blkbits;
620 end_offset = max(end_offset, PAGE_CACHE_SIZE); 664 p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
621 end_offset = roundup(end_offset, len); 665 PAGE_CACHE_SIZE);
622 page_dirty = end_offset / len; 666 p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
667 page_dirty = p_offset / len;
623 668
624 offset = 0;
625 bh = head = page_buffers(page); 669 bh = head = page_buffers(page);
626 do { 670 do {
627 if (offset >= end_offset) 671 if (offset >= end_offset)
628 break; 672 break;
629 if (!(PageUptodate(page) || buffer_uptodate(bh))) 673 if (!buffer_uptodate(bh))
674 uptodate = 0;
675 if (!(PageUptodate(page) || buffer_uptodate(bh))) {
676 done = 1;
630 continue; 677 continue;
631 if (buffer_mapped(bh) && all_bh && 678 }
632 !(buffer_unwritten(bh) || buffer_delay(bh))) { 679
680 if (buffer_unwritten(bh) || buffer_delay(bh)) {
681 if (buffer_unwritten(bh))
682 type = IOMAP_UNWRITTEN;
683 else
684 type = IOMAP_DELAY;
685
686 if (!xfs_iomap_valid(mp, offset)) {
687 done = 1;
688 continue;
689 }
690
691 ASSERT(!(mp->iomap_flags & IOMAP_HOLE));
692 ASSERT(!(mp->iomap_flags & IOMAP_DELAY));
693
694 xfs_map_at_offset(bh, offset, bbits, mp);
633 if (startio) { 695 if (startio) {
696 xfs_add_to_ioend(inode, bh, offset,
697 type, ioendp, done);
698 } else {
699 set_buffer_dirty(bh);
700 unlock_buffer(bh);
701 mark_buffer_dirty(bh);
702 }
703 page_dirty--;
704 count++;
705 } else {
706 type = 0;
707 if (buffer_mapped(bh) && all_bh && startio) {
634 lock_buffer(bh); 708 lock_buffer(bh);
635 bh_arr[index++] = bh; 709 xfs_add_to_ioend(inode, bh, offset,
710 type, ioendp, done);
711 count++;
636 page_dirty--; 712 page_dirty--;
713 } else {
714 done = 1;
637 } 715 }
638 continue;
639 } 716 }
640 tmp = xfs_offset_to_map(page, mp, offset); 717 } while (offset += len, (bh = bh->b_this_page) != head);
641 if (!tmp)
642 continue;
643 ASSERT(!(tmp->iomap_flags & IOMAP_HOLE));
644 ASSERT(!(tmp->iomap_flags & IOMAP_DELAY));
645 718
646 /* If this is a new unwritten extent buffer (i.e. one 719 if (uptodate && bh == head)
647 * that we haven't passed in private data for, we must 720 SetPageUptodate(page);
648 * now map this buffer too. 721
649 */ 722 if (startio) {
650 if (buffer_unwritten(bh) && !bh->b_end_io) { 723 if (count) {
651 ASSERT(tmp->iomap_flags & IOMAP_UNWRITTEN); 724 struct backing_dev_info *bdi;
652 xfs_map_unwritten(inode, page, head, bh, offset, 725
653 bbits, tmp, wbc, startio, all_bh); 726 bdi = inode->i_mapping->backing_dev_info;
654 } else if (! (buffer_unwritten(bh) && buffer_locked(bh))) { 727 if (bdi_write_congested(bdi)) {
655 xfs_map_at_offset(page, bh, offset, bbits, tmp); 728 wbc->encountered_congestion = 1;
656 if (buffer_unwritten(bh)) { 729 done = 1;
657 set_buffer_unwritten_io(bh); 730 } else if (--wbc->nr_to_write <= 0) {
658 bh->b_private = private; 731 done = 1;
659 ASSERT(private);
660 } 732 }
661 } 733 }
662 if (startio) { 734 xfs_start_page_writeback(page, wbc, !page_dirty, count);
663 bh_arr[index++] = bh;
664 } else {
665 set_buffer_dirty(bh);
666 unlock_buffer(bh);
667 mark_buffer_dirty(bh);
668 }
669 page_dirty--;
670 } while (offset += len, (bh = bh->b_this_page) != head);
671
672 if (startio && index) {
673 xfs_submit_page(page, wbc, bh_arr, index, 1, !page_dirty);
674 } else {
675 unlock_page(page);
676 } 735 }
736
737 return done;
738 fail_unlock_page:
739 unlock_page(page);
740 fail:
741 return 1;
677} 742}
678 743
679/* 744/*
@@ -685,19 +750,31 @@ xfs_cluster_write(
685 struct inode *inode, 750 struct inode *inode,
686 pgoff_t tindex, 751 pgoff_t tindex,
687 xfs_iomap_t *iomapp, 752 xfs_iomap_t *iomapp,
753 xfs_ioend_t **ioendp,
688 struct writeback_control *wbc, 754 struct writeback_control *wbc,
689 int startio, 755 int startio,
690 int all_bh, 756 int all_bh,
691 pgoff_t tlast) 757 pgoff_t tlast)
692{ 758{
693 struct page *page; 759 struct pagevec pvec;
760 int done = 0, i;
694 761
695 for (; tindex <= tlast; tindex++) { 762 pagevec_init(&pvec, 0);
696 page = xfs_probe_delalloc_page(inode, tindex); 763 while (!done && tindex <= tlast) {
697 if (!page) 764 unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
765
766 if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
698 break; 767 break;
699 xfs_convert_page(inode, page, iomapp, wbc, NULL, 768
700 startio, all_bh); 769 for (i = 0; i < pagevec_count(&pvec); i++) {
770 done = xfs_convert_page(inode, pvec.pages[i], tindex++,
771 iomapp, ioendp, wbc, startio, all_bh);
772 if (done)
773 break;
774 }
775
776 pagevec_release(&pvec);
777 cond_resched();
701 } 778 }
702} 779}
703 780
@@ -728,18 +805,22 @@ xfs_page_state_convert(
728 int startio, 805 int startio,
729 int unmapped) /* also implies page uptodate */ 806 int unmapped) /* also implies page uptodate */
730{ 807{
731 struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; 808 struct buffer_head *bh, *head;
732 xfs_iomap_t *iomp, iomap; 809 xfs_iomap_t iomap;
810 xfs_ioend_t *ioend = NULL, *iohead = NULL;
733 loff_t offset; 811 loff_t offset;
734 unsigned long p_offset = 0; 812 unsigned long p_offset = 0;
813 unsigned int type;
735 __uint64_t end_offset; 814 __uint64_t end_offset;
736 pgoff_t end_index, last_index, tlast; 815 pgoff_t end_index, last_index, tlast;
737 int len, err, i, cnt = 0, uptodate = 1; 816 ssize_t size, len;
738 int flags; 817 int flags, err, iomap_valid = 0, uptodate = 1;
739 int page_dirty; 818 int page_dirty, count = 0, trylock_flag = 0;
819 int all_bh = unmapped;
740 820
741 /* wait for other IO threads? */ 821 /* wait for other IO threads? */
742 flags = (startio && wbc->sync_mode != WB_SYNC_NONE) ? 0 : BMAPI_TRYLOCK; 822 if (startio && (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking))
823 trylock_flag |= BMAPI_TRYLOCK;
743 824
744 /* Is this page beyond the end of the file? */ 825 /* Is this page beyond the end of the file? */
745 offset = i_size_read(inode); 826 offset = i_size_read(inode);
@@ -754,161 +835,173 @@ xfs_page_state_convert(
754 } 835 }
755 } 836 }
756 837
757 end_offset = min_t(unsigned long long,
758 (loff_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset);
759 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
760
761 /* 838 /*
762 * page_dirty is initially a count of buffers on the page before 839 * page_dirty is initially a count of buffers on the page before
763 * EOF and is decrememted as we move each into a cleanable state. 840 * EOF and is decrememted as we move each into a cleanable state.
764 */ 841 *
842 * Derivation:
843 *
844 * End offset is the highest offset that this page should represent.
845 * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
846 * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
847 * hence give us the correct page_dirty count. On any other page,
848 * it will be zero and in that case we need page_dirty to be the
849 * count of buffers on the page.
850 */
851 end_offset = min_t(unsigned long long,
852 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset);
765 len = 1 << inode->i_blkbits; 853 len = 1 << inode->i_blkbits;
766 p_offset = max(p_offset, PAGE_CACHE_SIZE); 854 p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
767 p_offset = roundup(p_offset, len); 855 PAGE_CACHE_SIZE);
856 p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
768 page_dirty = p_offset / len; 857 page_dirty = p_offset / len;
769 858
770 iomp = NULL;
771 p_offset = 0;
772 bh = head = page_buffers(page); 859 bh = head = page_buffers(page);
860 offset = page_offset(page);
861 flags = -1;
862 type = 0;
863
864 /* TODO: cleanup count and page_dirty */
773 865
774 do { 866 do {
775 if (offset >= end_offset) 867 if (offset >= end_offset)
776 break; 868 break;
777 if (!buffer_uptodate(bh)) 869 if (!buffer_uptodate(bh))
778 uptodate = 0; 870 uptodate = 0;
779 if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) 871 if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) {
872 /*
873 * the iomap is actually still valid, but the ioend
874 * isn't. shouldn't happen too often.
875 */
876 iomap_valid = 0;
780 continue; 877 continue;
781
782 if (iomp) {
783 iomp = xfs_offset_to_map(page, &iomap, p_offset);
784 } 878 }
785 879
880 if (iomap_valid)
881 iomap_valid = xfs_iomap_valid(&iomap, offset);
882
786 /* 883 /*
787 * First case, map an unwritten extent and prepare for 884 * First case, map an unwritten extent and prepare for
788 * extent state conversion transaction on completion. 885 * extent state conversion transaction on completion.
789 */ 886 *
790 if (buffer_unwritten(bh)) { 887 * Second case, allocate space for a delalloc buffer.
791 if (!startio) 888 * We can return EAGAIN here in the release page case.
792 continue; 889 *
793 if (!iomp) { 890 * Third case, an unmapped buffer was found, and we are
794 err = xfs_map_blocks(inode, offset, len, &iomap, 891 * in a path where we need to write the whole page out.
795 BMAPI_WRITE|BMAPI_IGNSTATE); 892 */
796 if (err) { 893 if (buffer_unwritten(bh) || buffer_delay(bh) ||
797 goto error; 894 ((buffer_uptodate(bh) || PageUptodate(page)) &&
798 } 895 !buffer_mapped(bh) && (unmapped || startio))) {
799 iomp = xfs_offset_to_map(page, &iomap, 896 /*
800 p_offset); 897 * Make sure we don't use a read-only iomap
898 */
899 if (flags == BMAPI_READ)
900 iomap_valid = 0;
901
902 if (buffer_unwritten(bh)) {
903 type = IOMAP_UNWRITTEN;
904 flags = BMAPI_WRITE|BMAPI_IGNSTATE;
905 } else if (buffer_delay(bh)) {
906 type = IOMAP_DELAY;
907 flags = BMAPI_ALLOCATE;
908 if (!startio)
909 flags |= trylock_flag;
910 } else {
911 type = IOMAP_NEW;
912 flags = BMAPI_WRITE|BMAPI_MMAP;
801 } 913 }
802 if (iomp) { 914
803 if (!bh->b_end_io) { 915 if (!iomap_valid) {
804 err = xfs_map_unwritten(inode, page, 916 if (type == IOMAP_NEW) {
805 head, bh, p_offset, 917 size = xfs_probe_cluster(inode,
806 inode->i_blkbits, iomp, 918 page, bh, head, 0);
807 wbc, startio, unmapped);
808 if (err) {
809 goto error;
810 }
811 } else { 919 } else {
812 set_bit(BH_Lock, &bh->b_state); 920 size = len;
813 } 921 }
814 BUG_ON(!buffer_locked(bh)); 922
815 bh_arr[cnt++] = bh; 923 err = xfs_map_blocks(inode, offset, size,
816 page_dirty--; 924 &iomap, flags);
817 } 925 if (err)
818 /*
819 * Second case, allocate space for a delalloc buffer.
820 * We can return EAGAIN here in the release page case.
821 */
822 } else if (buffer_delay(bh)) {
823 if (!iomp) {
824 err = xfs_map_blocks(inode, offset, len, &iomap,
825 BMAPI_ALLOCATE | flags);
826 if (err) {
827 goto error; 926 goto error;
828 } 927 iomap_valid = xfs_iomap_valid(&iomap, offset);
829 iomp = xfs_offset_to_map(page, &iomap,
830 p_offset);
831 } 928 }
832 if (iomp) { 929 if (iomap_valid) {
833 xfs_map_at_offset(page, bh, p_offset, 930 xfs_map_at_offset(bh, offset,
834 inode->i_blkbits, iomp); 931 inode->i_blkbits, &iomap);
835 if (startio) { 932 if (startio) {
836 bh_arr[cnt++] = bh; 933 xfs_add_to_ioend(inode, bh, offset,
934 type, &ioend,
935 !iomap_valid);
837 } else { 936 } else {
838 set_buffer_dirty(bh); 937 set_buffer_dirty(bh);
839 unlock_buffer(bh); 938 unlock_buffer(bh);
840 mark_buffer_dirty(bh); 939 mark_buffer_dirty(bh);
841 } 940 }
842 page_dirty--; 941 page_dirty--;
942 count++;
943 }
944 } else if (buffer_uptodate(bh) && startio) {
945 /*
946 * we got here because the buffer is already mapped.
947 * That means it must already have extents allocated
948 * underneath it. Map the extent by reading it.
949 */
950 if (!iomap_valid || type != 0) {
951 flags = BMAPI_READ;
952 size = xfs_probe_cluster(inode, page, bh,
953 head, 1);
954 err = xfs_map_blocks(inode, offset, size,
955 &iomap, flags);
956 if (err)
957 goto error;
958 iomap_valid = xfs_iomap_valid(&iomap, offset);
843 } 959 }
844 } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
845 (unmapped || startio)) {
846 960
847 if (!buffer_mapped(bh)) { 961 type = 0;
848 int size; 962 if (!test_and_set_bit(BH_Lock, &bh->b_state)) {
849 963 ASSERT(buffer_mapped(bh));
850 /* 964 if (iomap_valid)
851 * Getting here implies an unmapped buffer 965 all_bh = 1;
852 * was found, and we are in a path where we 966 xfs_add_to_ioend(inode, bh, offset, type,
853 * need to write the whole page out. 967 &ioend, !iomap_valid);
854 */ 968 page_dirty--;
855 if (!iomp) { 969 count++;
856 size = xfs_probe_unmapped_cluster( 970 } else {
857 inode, page, bh, head); 971 iomap_valid = 0;
858 err = xfs_map_blocks(inode, offset,
859 size, &iomap,
860 BMAPI_WRITE|BMAPI_MMAP);
861 if (err) {
862 goto error;
863 }
864 iomp = xfs_offset_to_map(page, &iomap,
865 p_offset);
866 }
867 if (iomp) {
868 xfs_map_at_offset(page,
869 bh, p_offset,
870 inode->i_blkbits, iomp);
871 if (startio) {
872 bh_arr[cnt++] = bh;
873 } else {
874 set_buffer_dirty(bh);
875 unlock_buffer(bh);
876 mark_buffer_dirty(bh);
877 }
878 page_dirty--;
879 }
880 } else if (startio) {
881 if (buffer_uptodate(bh) &&
882 !test_and_set_bit(BH_Lock, &bh->b_state)) {
883 bh_arr[cnt++] = bh;
884 page_dirty--;
885 }
886 } 972 }
973 } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
974 (unmapped || startio)) {
975 iomap_valid = 0;
887 } 976 }
888 } while (offset += len, p_offset += len, 977
889 ((bh = bh->b_this_page) != head)); 978 if (!iohead)
979 iohead = ioend;
980
981 } while (offset += len, ((bh = bh->b_this_page) != head));
890 982
891 if (uptodate && bh == head) 983 if (uptodate && bh == head)
892 SetPageUptodate(page); 984 SetPageUptodate(page);
893 985
894 if (startio) { 986 if (startio)
895 xfs_submit_page(page, wbc, bh_arr, cnt, 0, !page_dirty); 987 xfs_start_page_writeback(page, wbc, 1, count);
896 }
897 988
898 if (iomp) { 989 if (ioend && iomap_valid) {
899 offset = (iomp->iomap_offset + iomp->iomap_bsize - 1) >> 990 offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >>
900 PAGE_CACHE_SHIFT; 991 PAGE_CACHE_SHIFT;
901 tlast = min_t(pgoff_t, offset, last_index); 992 tlast = min_t(pgoff_t, offset, last_index);
902 xfs_cluster_write(inode, page->index + 1, iomp, wbc, 993 xfs_cluster_write(inode, page->index + 1, &iomap, &ioend,
903 startio, unmapped, tlast); 994 wbc, startio, all_bh, tlast);
904 } 995 }
905 996
997 if (iohead)
998 xfs_submit_ioend(iohead);
999
906 return page_dirty; 1000 return page_dirty;
907 1001
908error: 1002error:
909 for (i = 0; i < cnt; i++) { 1003 if (iohead)
910 unlock_buffer(bh_arr[i]); 1004 xfs_cancel_ioend(iohead);
911 }
912 1005
913 /* 1006 /*
914 * If it's delalloc and we have nowhere to put it, 1007 * If it's delalloc and we have nowhere to put it,
@@ -916,9 +1009,8 @@ error:
916 * us to try again. 1009 * us to try again.
917 */ 1010 */
918 if (err != -EAGAIN) { 1011 if (err != -EAGAIN) {
919 if (!unmapped) { 1012 if (!unmapped)
920 block_invalidatepage(page, 0); 1013 block_invalidatepage(page, 0);
921 }
922 ClearPageUptodate(page); 1014 ClearPageUptodate(page);
923 } 1015 }
924 return err; 1016 return err;
@@ -982,7 +1074,7 @@ __linvfs_get_block(
982 } 1074 }
983 1075
984 /* If this is a realtime file, data might be on a new device */ 1076 /* If this is a realtime file, data might be on a new device */
985 bh_result->b_bdev = iomap.iomap_target->pbr_bdev; 1077 bh_result->b_bdev = iomap.iomap_target->bt_bdev;
986 1078
987 /* If we previously allocated a block out beyond eof and 1079 /* If we previously allocated a block out beyond eof and
988 * we are now coming back to use it then we will need to 1080 * we are now coming back to use it then we will need to
@@ -1094,10 +1186,10 @@ linvfs_direct_IO(
1094 if (error) 1186 if (error)
1095 return -error; 1187 return -error;
1096 1188
1097 iocb->private = xfs_alloc_ioend(inode); 1189 iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);
1098 1190
1099 ret = blockdev_direct_IO_own_locking(rw, iocb, inode, 1191 ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
1100 iomap.iomap_target->pbr_bdev, 1192 iomap.iomap_target->bt_bdev,
1101 iov, offset, nr_segs, 1193 iov, offset, nr_segs,
1102 linvfs_get_blocks_direct, 1194 linvfs_get_blocks_direct,
1103 linvfs_end_io_direct); 1195 linvfs_end_io_direct);
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 4720758a9ade..55339dd5a30d 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -23,14 +23,24 @@ extern mempool_t *xfs_ioend_pool;
23 23
24typedef void (*xfs_ioend_func_t)(void *); 24typedef void (*xfs_ioend_func_t)(void *);
25 25
26/*
27 * xfs_ioend struct manages large extent writes for XFS.
28 * It can manage several multi-page bio's at once.
29 */
26typedef struct xfs_ioend { 30typedef struct xfs_ioend {
31 struct xfs_ioend *io_list; /* next ioend in chain */
32 unsigned int io_type; /* delalloc / unwritten */
27 unsigned int io_uptodate; /* I/O status register */ 33 unsigned int io_uptodate; /* I/O status register */
28 atomic_t io_remaining; /* hold count */ 34 atomic_t io_remaining; /* hold count */
29 struct vnode *io_vnode; /* file being written to */ 35 struct vnode *io_vnode; /* file being written to */
30 struct buffer_head *io_buffer_head;/* buffer linked list head */ 36 struct buffer_head *io_buffer_head;/* buffer linked list head */
37 struct buffer_head *io_buffer_tail;/* buffer linked list tail */
31 size_t io_size; /* size of the extent */ 38 size_t io_size; /* size of the extent */
32 xfs_off_t io_offset; /* offset in the file */ 39 xfs_off_t io_offset; /* offset in the file */
33 struct work_struct io_work; /* xfsdatad work queue */ 40 struct work_struct io_work; /* xfsdatad work queue */
34} xfs_ioend_t; 41} xfs_ioend_t;
35 42
43extern struct address_space_operations linvfs_aops;
44extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
45
36#endif /* __XFS_IOPS_H__ */ 46#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 6fe21d2b8847..e44b7c1a3a36 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -31,76 +31,77 @@
31#include <linux/kthread.h> 31#include <linux/kthread.h>
32#include "xfs_linux.h" 32#include "xfs_linux.h"
33 33
34STATIC kmem_cache_t *pagebuf_zone; 34STATIC kmem_zone_t *xfs_buf_zone;
35STATIC kmem_shaker_t pagebuf_shake; 35STATIC kmem_shaker_t xfs_buf_shake;
36STATIC int xfsbufd(void *);
36STATIC int xfsbufd_wakeup(int, gfp_t); 37STATIC int xfsbufd_wakeup(int, gfp_t);
37STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); 38STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
38 39
39STATIC struct workqueue_struct *xfslogd_workqueue; 40STATIC struct workqueue_struct *xfslogd_workqueue;
40struct workqueue_struct *xfsdatad_workqueue; 41struct workqueue_struct *xfsdatad_workqueue;
41 42
42#ifdef PAGEBUF_TRACE 43#ifdef XFS_BUF_TRACE
43void 44void
44pagebuf_trace( 45xfs_buf_trace(
45 xfs_buf_t *pb, 46 xfs_buf_t *bp,
46 char *id, 47 char *id,
47 void *data, 48 void *data,
48 void *ra) 49 void *ra)
49{ 50{
50 ktrace_enter(pagebuf_trace_buf, 51 ktrace_enter(xfs_buf_trace_buf,
51 pb, id, 52 bp, id,
52 (void *)(unsigned long)pb->pb_flags, 53 (void *)(unsigned long)bp->b_flags,
53 (void *)(unsigned long)pb->pb_hold.counter, 54 (void *)(unsigned long)bp->b_hold.counter,
54 (void *)(unsigned long)pb->pb_sema.count.counter, 55 (void *)(unsigned long)bp->b_sema.count.counter,
55 (void *)current, 56 (void *)current,
56 data, ra, 57 data, ra,
57 (void *)(unsigned long)((pb->pb_file_offset>>32) & 0xffffffff), 58 (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff),
58 (void *)(unsigned long)(pb->pb_file_offset & 0xffffffff), 59 (void *)(unsigned long)(bp->b_file_offset & 0xffffffff),
59 (void *)(unsigned long)pb->pb_buffer_length, 60 (void *)(unsigned long)bp->b_buffer_length,
60 NULL, NULL, NULL, NULL, NULL); 61 NULL, NULL, NULL, NULL, NULL);
61} 62}
62ktrace_t *pagebuf_trace_buf; 63ktrace_t *xfs_buf_trace_buf;
63#define PAGEBUF_TRACE_SIZE 4096 64#define XFS_BUF_TRACE_SIZE 4096
64#define PB_TRACE(pb, id, data) \ 65#define XB_TRACE(bp, id, data) \
65 pagebuf_trace(pb, id, (void *)data, (void *)__builtin_return_address(0)) 66 xfs_buf_trace(bp, id, (void *)data, (void *)__builtin_return_address(0))
66#else 67#else
67#define PB_TRACE(pb, id, data) do { } while (0) 68#define XB_TRACE(bp, id, data) do { } while (0)
68#endif 69#endif
69 70
70#ifdef PAGEBUF_LOCK_TRACKING 71#ifdef XFS_BUF_LOCK_TRACKING
71# define PB_SET_OWNER(pb) ((pb)->pb_last_holder = current->pid) 72# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid)
72# define PB_CLEAR_OWNER(pb) ((pb)->pb_last_holder = -1) 73# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1)
73# define PB_GET_OWNER(pb) ((pb)->pb_last_holder) 74# define XB_GET_OWNER(bp) ((bp)->b_last_holder)
74#else 75#else
75# define PB_SET_OWNER(pb) do { } while (0) 76# define XB_SET_OWNER(bp) do { } while (0)
76# define PB_CLEAR_OWNER(pb) do { } while (0) 77# define XB_CLEAR_OWNER(bp) do { } while (0)
77# define PB_GET_OWNER(pb) do { } while (0) 78# define XB_GET_OWNER(bp) do { } while (0)
78#endif 79#endif
79 80
80#define pb_to_gfp(flags) \ 81#define xb_to_gfp(flags) \
81 ((((flags) & PBF_READ_AHEAD) ? __GFP_NORETRY : \ 82 ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \
82 ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN) 83 ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
83 84
84#define pb_to_km(flags) \ 85#define xb_to_km(flags) \
85 (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) 86 (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
86 87
87#define pagebuf_allocate(flags) \ 88#define xfs_buf_allocate(flags) \
88 kmem_zone_alloc(pagebuf_zone, pb_to_km(flags)) 89 kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags))
89#define pagebuf_deallocate(pb) \ 90#define xfs_buf_deallocate(bp) \
90 kmem_zone_free(pagebuf_zone, (pb)); 91 kmem_zone_free(xfs_buf_zone, (bp));
91 92
92/* 93/*
93 * Page Region interfaces. 94 * Page Region interfaces.
94 * 95 *
95 * For pages in filesystems where the blocksize is smaller than the 96 * For pages in filesystems where the blocksize is smaller than the
96 * pagesize, we use the page->private field (long) to hold a bitmap 97 * pagesize, we use the page->private field (long) to hold a bitmap
97 * of uptodate regions within the page. 98 * of uptodate regions within the page.
98 * 99 *
99 * Each such region is "bytes per page / bits per long" bytes long. 100 * Each such region is "bytes per page / bits per long" bytes long.
100 * 101 *
101 * NBPPR == number-of-bytes-per-page-region 102 * NBPPR == number-of-bytes-per-page-region
102 * BTOPR == bytes-to-page-region (rounded up) 103 * BTOPR == bytes-to-page-region (rounded up)
103 * BTOPRT == bytes-to-page-region-truncated (rounded down) 104 * BTOPRT == bytes-to-page-region-truncated (rounded down)
104 */ 105 */
105#if (BITS_PER_LONG == 32) 106#if (BITS_PER_LONG == 32)
106#define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */ 107#define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */
@@ -159,7 +160,7 @@ test_page_region(
159} 160}
160 161
161/* 162/*
162 * Mapping of multi-page buffers into contiguous virtual space 163 * Mapping of multi-page buffers into contiguous virtual space
163 */ 164 */
164 165
165typedef struct a_list { 166typedef struct a_list {
@@ -172,7 +173,7 @@ STATIC int as_list_len;
172STATIC DEFINE_SPINLOCK(as_lock); 173STATIC DEFINE_SPINLOCK(as_lock);
173 174
174/* 175/*
175 * Try to batch vunmaps because they are costly. 176 * Try to batch vunmaps because they are costly.
176 */ 177 */
177STATIC void 178STATIC void
178free_address( 179free_address(
@@ -215,83 +216,83 @@ purge_addresses(void)
215} 216}
216 217
217/* 218/*
218 * Internal pagebuf object manipulation 219 * Internal xfs_buf_t object manipulation
219 */ 220 */
220 221
221STATIC void 222STATIC void
222_pagebuf_initialize( 223_xfs_buf_initialize(
223 xfs_buf_t *pb, 224 xfs_buf_t *bp,
224 xfs_buftarg_t *target, 225 xfs_buftarg_t *target,
225 loff_t range_base, 226 xfs_off_t range_base,
226 size_t range_length, 227 size_t range_length,
227 page_buf_flags_t flags) 228 xfs_buf_flags_t flags)
228{ 229{
229 /* 230 /*
230 * We don't want certain flags to appear in pb->pb_flags. 231 * We don't want certain flags to appear in b_flags.
231 */ 232 */
232 flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD); 233 flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD);
233 234
234 memset(pb, 0, sizeof(xfs_buf_t)); 235 memset(bp, 0, sizeof(xfs_buf_t));
235 atomic_set(&pb->pb_hold, 1); 236 atomic_set(&bp->b_hold, 1);
236 init_MUTEX_LOCKED(&pb->pb_iodonesema); 237 init_MUTEX_LOCKED(&bp->b_iodonesema);
237 INIT_LIST_HEAD(&pb->pb_list); 238 INIT_LIST_HEAD(&bp->b_list);
238 INIT_LIST_HEAD(&pb->pb_hash_list); 239 INIT_LIST_HEAD(&bp->b_hash_list);
239 init_MUTEX_LOCKED(&pb->pb_sema); /* held, no waiters */ 240 init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
240 PB_SET_OWNER(pb); 241 XB_SET_OWNER(bp);
241 pb->pb_target = target; 242 bp->b_target = target;
242 pb->pb_file_offset = range_base; 243 bp->b_file_offset = range_base;
243 /* 244 /*
244 * Set buffer_length and count_desired to the same value initially. 245 * Set buffer_length and count_desired to the same value initially.
245 * I/O routines should use count_desired, which will be the same in 246 * I/O routines should use count_desired, which will be the same in
246 * most cases but may be reset (e.g. XFS recovery). 247 * most cases but may be reset (e.g. XFS recovery).
247 */ 248 */
248 pb->pb_buffer_length = pb->pb_count_desired = range_length; 249 bp->b_buffer_length = bp->b_count_desired = range_length;
249 pb->pb_flags = flags; 250 bp->b_flags = flags;
250 pb->pb_bn = XFS_BUF_DADDR_NULL; 251 bp->b_bn = XFS_BUF_DADDR_NULL;
251 atomic_set(&pb->pb_pin_count, 0); 252 atomic_set(&bp->b_pin_count, 0);
252 init_waitqueue_head(&pb->pb_waiters); 253 init_waitqueue_head(&bp->b_waiters);
253 254
254 XFS_STATS_INC(pb_create); 255 XFS_STATS_INC(xb_create);
255 PB_TRACE(pb, "initialize", target); 256 XB_TRACE(bp, "initialize", target);
256} 257}
257 258
258/* 259/*
259 * Allocate a page array capable of holding a specified number 260 * Allocate a page array capable of holding a specified number
260 * of pages, and point the page buf at it. 261 * of pages, and point the page buf at it.
261 */ 262 */
262STATIC int 263STATIC int
263_pagebuf_get_pages( 264_xfs_buf_get_pages(
264 xfs_buf_t *pb, 265 xfs_buf_t *bp,
265 int page_count, 266 int page_count,
266 page_buf_flags_t flags) 267 xfs_buf_flags_t flags)
267{ 268{
268 /* Make sure that we have a page list */ 269 /* Make sure that we have a page list */
269 if (pb->pb_pages == NULL) { 270 if (bp->b_pages == NULL) {
270 pb->pb_offset = page_buf_poff(pb->pb_file_offset); 271 bp->b_offset = xfs_buf_poff(bp->b_file_offset);
271 pb->pb_page_count = page_count; 272 bp->b_page_count = page_count;
272 if (page_count <= PB_PAGES) { 273 if (page_count <= XB_PAGES) {
273 pb->pb_pages = pb->pb_page_array; 274 bp->b_pages = bp->b_page_array;
274 } else { 275 } else {
275 pb->pb_pages = kmem_alloc(sizeof(struct page *) * 276 bp->b_pages = kmem_alloc(sizeof(struct page *) *
276 page_count, pb_to_km(flags)); 277 page_count, xb_to_km(flags));
277 if (pb->pb_pages == NULL) 278 if (bp->b_pages == NULL)
278 return -ENOMEM; 279 return -ENOMEM;
279 } 280 }
280 memset(pb->pb_pages, 0, sizeof(struct page *) * page_count); 281 memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
281 } 282 }
282 return 0; 283 return 0;
283} 284}
284 285
285/* 286/*
286 * Frees pb_pages if it was malloced. 287 * Frees b_pages if it was allocated.
287 */ 288 */
288STATIC void 289STATIC void
289_pagebuf_free_pages( 290_xfs_buf_free_pages(
290 xfs_buf_t *bp) 291 xfs_buf_t *bp)
291{ 292{
292 if (bp->pb_pages != bp->pb_page_array) { 293 if (bp->b_pages != bp->b_page_array) {
293 kmem_free(bp->pb_pages, 294 kmem_free(bp->b_pages,
294 bp->pb_page_count * sizeof(struct page *)); 295 bp->b_page_count * sizeof(struct page *));
295 } 296 }
296} 297}
297 298
@@ -299,79 +300,79 @@ _pagebuf_free_pages(
299 * Releases the specified buffer. 300 * Releases the specified buffer.
300 * 301 *
301 * The modification state of any associated pages is left unchanged. 302 * The modification state of any associated pages is left unchanged.
302 * The buffer most not be on any hash - use pagebuf_rele instead for 303 * The buffer most not be on any hash - use xfs_buf_rele instead for
303 * hashed and refcounted buffers 304 * hashed and refcounted buffers
304 */ 305 */
305void 306void
306pagebuf_free( 307xfs_buf_free(
307 xfs_buf_t *bp) 308 xfs_buf_t *bp)
308{ 309{
309 PB_TRACE(bp, "free", 0); 310 XB_TRACE(bp, "free", 0);
310 311
311 ASSERT(list_empty(&bp->pb_hash_list)); 312 ASSERT(list_empty(&bp->b_hash_list));
312 313
313 if (bp->pb_flags & _PBF_PAGE_CACHE) { 314 if (bp->b_flags & _XBF_PAGE_CACHE) {
314 uint i; 315 uint i;
315 316
316 if ((bp->pb_flags & PBF_MAPPED) && (bp->pb_page_count > 1)) 317 if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))
317 free_address(bp->pb_addr - bp->pb_offset); 318 free_address(bp->b_addr - bp->b_offset);
318 319
319 for (i = 0; i < bp->pb_page_count; i++) 320 for (i = 0; i < bp->b_page_count; i++)
320 page_cache_release(bp->pb_pages[i]); 321 page_cache_release(bp->b_pages[i]);
321 _pagebuf_free_pages(bp); 322 _xfs_buf_free_pages(bp);
322 } else if (bp->pb_flags & _PBF_KMEM_ALLOC) { 323 } else if (bp->b_flags & _XBF_KMEM_ALLOC) {
323 /* 324 /*
324 * XXX(hch): bp->pb_count_desired might be incorrect (see 325 * XXX(hch): bp->b_count_desired might be incorrect (see
325 * pagebuf_associate_memory for details), but fortunately 326 * xfs_buf_associate_memory for details), but fortunately
326 * the Linux version of kmem_free ignores the len argument.. 327 * the Linux version of kmem_free ignores the len argument..
327 */ 328 */
328 kmem_free(bp->pb_addr, bp->pb_count_desired); 329 kmem_free(bp->b_addr, bp->b_count_desired);
329 _pagebuf_free_pages(bp); 330 _xfs_buf_free_pages(bp);
330 } 331 }
331 332
332 pagebuf_deallocate(bp); 333 xfs_buf_deallocate(bp);
333} 334}
334 335
335/* 336/*
336 * Finds all pages for buffer in question and builds it's page list. 337 * Finds all pages for buffer in question and builds it's page list.
337 */ 338 */
338STATIC int 339STATIC int
339_pagebuf_lookup_pages( 340_xfs_buf_lookup_pages(
340 xfs_buf_t *bp, 341 xfs_buf_t *bp,
341 uint flags) 342 uint flags)
342{ 343{
343 struct address_space *mapping = bp->pb_target->pbr_mapping; 344 struct address_space *mapping = bp->b_target->bt_mapping;
344 size_t blocksize = bp->pb_target->pbr_bsize; 345 size_t blocksize = bp->b_target->bt_bsize;
345 size_t size = bp->pb_count_desired; 346 size_t size = bp->b_count_desired;
346 size_t nbytes, offset; 347 size_t nbytes, offset;
347 gfp_t gfp_mask = pb_to_gfp(flags); 348 gfp_t gfp_mask = xb_to_gfp(flags);
348 unsigned short page_count, i; 349 unsigned short page_count, i;
349 pgoff_t first; 350 pgoff_t first;
350 loff_t end; 351 xfs_off_t end;
351 int error; 352 int error;
352 353
353 end = bp->pb_file_offset + bp->pb_buffer_length; 354 end = bp->b_file_offset + bp->b_buffer_length;
354 page_count = page_buf_btoc(end) - page_buf_btoct(bp->pb_file_offset); 355 page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
355 356
356 error = _pagebuf_get_pages(bp, page_count, flags); 357 error = _xfs_buf_get_pages(bp, page_count, flags);
357 if (unlikely(error)) 358 if (unlikely(error))
358 return error; 359 return error;
359 bp->pb_flags |= _PBF_PAGE_CACHE; 360 bp->b_flags |= _XBF_PAGE_CACHE;
360 361
361 offset = bp->pb_offset; 362 offset = bp->b_offset;
362 first = bp->pb_file_offset >> PAGE_CACHE_SHIFT; 363 first = bp->b_file_offset >> PAGE_CACHE_SHIFT;
363 364
364 for (i = 0; i < bp->pb_page_count; i++) { 365 for (i = 0; i < bp->b_page_count; i++) {
365 struct page *page; 366 struct page *page;
366 uint retries = 0; 367 uint retries = 0;
367 368
368 retry: 369 retry:
369 page = find_or_create_page(mapping, first + i, gfp_mask); 370 page = find_or_create_page(mapping, first + i, gfp_mask);
370 if (unlikely(page == NULL)) { 371 if (unlikely(page == NULL)) {
371 if (flags & PBF_READ_AHEAD) { 372 if (flags & XBF_READ_AHEAD) {
372 bp->pb_page_count = i; 373 bp->b_page_count = i;
373 for (i = 0; i < bp->pb_page_count; i++) 374 for (i = 0; i < bp->b_page_count; i++)
374 unlock_page(bp->pb_pages[i]); 375 unlock_page(bp->b_pages[i]);
375 return -ENOMEM; 376 return -ENOMEM;
376 } 377 }
377 378
@@ -387,13 +388,13 @@ _pagebuf_lookup_pages(
387 "deadlock in %s (mode:0x%x)\n", 388 "deadlock in %s (mode:0x%x)\n",
388 __FUNCTION__, gfp_mask); 389 __FUNCTION__, gfp_mask);
389 390
390 XFS_STATS_INC(pb_page_retries); 391 XFS_STATS_INC(xb_page_retries);
391 xfsbufd_wakeup(0, gfp_mask); 392 xfsbufd_wakeup(0, gfp_mask);
392 blk_congestion_wait(WRITE, HZ/50); 393 blk_congestion_wait(WRITE, HZ/50);
393 goto retry; 394 goto retry;
394 } 395 }
395 396
396 XFS_STATS_INC(pb_page_found); 397 XFS_STATS_INC(xb_page_found);
397 398
398 nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); 399 nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);
399 size -= nbytes; 400 size -= nbytes;
@@ -401,27 +402,27 @@ _pagebuf_lookup_pages(
401 if (!PageUptodate(page)) { 402 if (!PageUptodate(page)) {
402 page_count--; 403 page_count--;
403 if (blocksize >= PAGE_CACHE_SIZE) { 404 if (blocksize >= PAGE_CACHE_SIZE) {
404 if (flags & PBF_READ) 405 if (flags & XBF_READ)
405 bp->pb_locked = 1; 406 bp->b_locked = 1;
406 } else if (!PagePrivate(page)) { 407 } else if (!PagePrivate(page)) {
407 if (test_page_region(page, offset, nbytes)) 408 if (test_page_region(page, offset, nbytes))
408 page_count++; 409 page_count++;
409 } 410 }
410 } 411 }
411 412
412 bp->pb_pages[i] = page; 413 bp->b_pages[i] = page;
413 offset = 0; 414 offset = 0;
414 } 415 }
415 416
416 if (!bp->pb_locked) { 417 if (!bp->b_locked) {
417 for (i = 0; i < bp->pb_page_count; i++) 418 for (i = 0; i < bp->b_page_count; i++)
418 unlock_page(bp->pb_pages[i]); 419 unlock_page(bp->b_pages[i]);
419 } 420 }
420 421
421 if (page_count == bp->pb_page_count) 422 if (page_count == bp->b_page_count)
422 bp->pb_flags |= PBF_DONE; 423 bp->b_flags |= XBF_DONE;
423 424
424 PB_TRACE(bp, "lookup_pages", (long)page_count); 425 XB_TRACE(bp, "lookup_pages", (long)page_count);
425 return error; 426 return error;
426} 427}
427 428
@@ -429,23 +430,23 @@ _pagebuf_lookup_pages(
429 * Map buffer into kernel address-space if nessecary. 430 * Map buffer into kernel address-space if nessecary.
430 */ 431 */
431STATIC int 432STATIC int
432_pagebuf_map_pages( 433_xfs_buf_map_pages(
433 xfs_buf_t *bp, 434 xfs_buf_t *bp,
434 uint flags) 435 uint flags)
435{ 436{
436 /* A single page buffer is always mappable */ 437 /* A single page buffer is always mappable */
437 if (bp->pb_page_count == 1) { 438 if (bp->b_page_count == 1) {
438 bp->pb_addr = page_address(bp->pb_pages[0]) + bp->pb_offset; 439 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
439 bp->pb_flags |= PBF_MAPPED; 440 bp->b_flags |= XBF_MAPPED;
440 } else if (flags & PBF_MAPPED) { 441 } else if (flags & XBF_MAPPED) {
441 if (as_list_len > 64) 442 if (as_list_len > 64)
442 purge_addresses(); 443 purge_addresses();
443 bp->pb_addr = vmap(bp->pb_pages, bp->pb_page_count, 444 bp->b_addr = vmap(bp->b_pages, bp->b_page_count,
444 VM_MAP, PAGE_KERNEL); 445 VM_MAP, PAGE_KERNEL);
445 if (unlikely(bp->pb_addr == NULL)) 446 if (unlikely(bp->b_addr == NULL))
446 return -ENOMEM; 447 return -ENOMEM;
447 bp->pb_addr += bp->pb_offset; 448 bp->b_addr += bp->b_offset;
448 bp->pb_flags |= PBF_MAPPED; 449 bp->b_flags |= XBF_MAPPED;
449 } 450 }
450 451
451 return 0; 452 return 0;
@@ -456,9 +457,7 @@ _pagebuf_map_pages(
456 */ 457 */
457 458
458/* 459/*
459 * _pagebuf_find 460 * Look up, and creates if absent, a lockable buffer for
460 *
461 * Looks up, and creates if absent, a lockable buffer for
462 * a given range of an inode. The buffer is returned 461 * a given range of an inode. The buffer is returned
463 * locked. If other overlapping buffers exist, they are 462 * locked. If other overlapping buffers exist, they are
464 * released before the new buffer is created and locked, 463 * released before the new buffer is created and locked,
@@ -466,55 +465,55 @@ _pagebuf_map_pages(
466 * are unlocked. No I/O is implied by this call. 465 * are unlocked. No I/O is implied by this call.
467 */ 466 */
468xfs_buf_t * 467xfs_buf_t *
469_pagebuf_find( 468_xfs_buf_find(
470 xfs_buftarg_t *btp, /* block device target */ 469 xfs_buftarg_t *btp, /* block device target */
471 loff_t ioff, /* starting offset of range */ 470 xfs_off_t ioff, /* starting offset of range */
472 size_t isize, /* length of range */ 471 size_t isize, /* length of range */
473 page_buf_flags_t flags, /* PBF_TRYLOCK */ 472 xfs_buf_flags_t flags,
474 xfs_buf_t *new_pb)/* newly allocated buffer */ 473 xfs_buf_t *new_bp)
475{ 474{
476 loff_t range_base; 475 xfs_off_t range_base;
477 size_t range_length; 476 size_t range_length;
478 xfs_bufhash_t *hash; 477 xfs_bufhash_t *hash;
479 xfs_buf_t *pb, *n; 478 xfs_buf_t *bp, *n;
480 479
481 range_base = (ioff << BBSHIFT); 480 range_base = (ioff << BBSHIFT);
482 range_length = (isize << BBSHIFT); 481 range_length = (isize << BBSHIFT);
483 482
484 /* Check for IOs smaller than the sector size / not sector aligned */ 483 /* Check for IOs smaller than the sector size / not sector aligned */
485 ASSERT(!(range_length < (1 << btp->pbr_sshift))); 484 ASSERT(!(range_length < (1 << btp->bt_sshift)));
486 ASSERT(!(range_base & (loff_t)btp->pbr_smask)); 485 ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
487 486
488 hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)]; 487 hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
489 488
490 spin_lock(&hash->bh_lock); 489 spin_lock(&hash->bh_lock);
491 490
492 list_for_each_entry_safe(pb, n, &hash->bh_list, pb_hash_list) { 491 list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
493 ASSERT(btp == pb->pb_target); 492 ASSERT(btp == bp->b_target);
494 if (pb->pb_file_offset == range_base && 493 if (bp->b_file_offset == range_base &&
495 pb->pb_buffer_length == range_length) { 494 bp->b_buffer_length == range_length) {
496 /* 495 /*
497 * If we look at something bring it to the 496 * If we look at something, bring it to the
498 * front of the list for next time. 497 * front of the list for next time.
499 */ 498 */
500 atomic_inc(&pb->pb_hold); 499 atomic_inc(&bp->b_hold);
501 list_move(&pb->pb_hash_list, &hash->bh_list); 500 list_move(&bp->b_hash_list, &hash->bh_list);
502 goto found; 501 goto found;
503 } 502 }
504 } 503 }
505 504
506 /* No match found */ 505 /* No match found */
507 if (new_pb) { 506 if (new_bp) {
508 _pagebuf_initialize(new_pb, btp, range_base, 507 _xfs_buf_initialize(new_bp, btp, range_base,
509 range_length, flags); 508 range_length, flags);
510 new_pb->pb_hash = hash; 509 new_bp->b_hash = hash;
511 list_add(&new_pb->pb_hash_list, &hash->bh_list); 510 list_add(&new_bp->b_hash_list, &hash->bh_list);
512 } else { 511 } else {
513 XFS_STATS_INC(pb_miss_locked); 512 XFS_STATS_INC(xb_miss_locked);
514 } 513 }
515 514
516 spin_unlock(&hash->bh_lock); 515 spin_unlock(&hash->bh_lock);
517 return new_pb; 516 return new_bp;
518 517
519found: 518found:
520 spin_unlock(&hash->bh_lock); 519 spin_unlock(&hash->bh_lock);
@@ -523,74 +522,72 @@ found:
523 * if this does not work then we need to drop the 522 * if this does not work then we need to drop the
524 * spinlock and do a hard attempt on the semaphore. 523 * spinlock and do a hard attempt on the semaphore.
525 */ 524 */
526 if (down_trylock(&pb->pb_sema)) { 525 if (down_trylock(&bp->b_sema)) {
527 if (!(flags & PBF_TRYLOCK)) { 526 if (!(flags & XBF_TRYLOCK)) {
528 /* wait for buffer ownership */ 527 /* wait for buffer ownership */
529 PB_TRACE(pb, "get_lock", 0); 528 XB_TRACE(bp, "get_lock", 0);
530 pagebuf_lock(pb); 529 xfs_buf_lock(bp);
531 XFS_STATS_INC(pb_get_locked_waited); 530 XFS_STATS_INC(xb_get_locked_waited);
532 } else { 531 } else {
533 /* We asked for a trylock and failed, no need 532 /* We asked for a trylock and failed, no need
534 * to look at file offset and length here, we 533 * to look at file offset and length here, we
535 * know that this pagebuf at least overlaps our 534 * know that this buffer at least overlaps our
536 * pagebuf and is locked, therefore our buffer 535 * buffer and is locked, therefore our buffer
537 * either does not exist, or is this buffer 536 * either does not exist, or is this buffer.
538 */ 537 */
539 538 xfs_buf_rele(bp);
540 pagebuf_rele(pb); 539 XFS_STATS_INC(xb_busy_locked);
541 XFS_STATS_INC(pb_busy_locked); 540 return NULL;
542 return (NULL);
543 } 541 }
544 } else { 542 } else {
545 /* trylock worked */ 543 /* trylock worked */
546 PB_SET_OWNER(pb); 544 XB_SET_OWNER(bp);
547 } 545 }
548 546
549 if (pb->pb_flags & PBF_STALE) { 547 if (bp->b_flags & XBF_STALE) {
550 ASSERT((pb->pb_flags & _PBF_DELWRI_Q) == 0); 548 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
551 pb->pb_flags &= PBF_MAPPED; 549 bp->b_flags &= XBF_MAPPED;
552 } 550 }
553 PB_TRACE(pb, "got_lock", 0); 551 XB_TRACE(bp, "got_lock", 0);
554 XFS_STATS_INC(pb_get_locked); 552 XFS_STATS_INC(xb_get_locked);
555 return (pb); 553 return bp;
556} 554}
557 555
558/* 556/*
559 * xfs_buf_get_flags assembles a buffer covering the specified range. 557 * Assembles a buffer covering the specified range.
560 *
561 * Storage in memory for all portions of the buffer will be allocated, 558 * Storage in memory for all portions of the buffer will be allocated,
562 * although backing storage may not be. 559 * although backing storage may not be.
563 */ 560 */
564xfs_buf_t * 561xfs_buf_t *
565xfs_buf_get_flags( /* allocate a buffer */ 562xfs_buf_get_flags(
566 xfs_buftarg_t *target,/* target for buffer */ 563 xfs_buftarg_t *target,/* target for buffer */
567 loff_t ioff, /* starting offset of range */ 564 xfs_off_t ioff, /* starting offset of range */
568 size_t isize, /* length of range */ 565 size_t isize, /* length of range */
569 page_buf_flags_t flags) /* PBF_TRYLOCK */ 566 xfs_buf_flags_t flags)
570{ 567{
571 xfs_buf_t *pb, *new_pb; 568 xfs_buf_t *bp, *new_bp;
572 int error = 0, i; 569 int error = 0, i;
573 570
574 new_pb = pagebuf_allocate(flags); 571 new_bp = xfs_buf_allocate(flags);
575 if (unlikely(!new_pb)) 572 if (unlikely(!new_bp))
576 return NULL; 573 return NULL;
577 574
578 pb = _pagebuf_find(target, ioff, isize, flags, new_pb); 575 bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
579 if (pb == new_pb) { 576 if (bp == new_bp) {
580 error = _pagebuf_lookup_pages(pb, flags); 577 error = _xfs_buf_lookup_pages(bp, flags);
581 if (error) 578 if (error)
582 goto no_buffer; 579 goto no_buffer;
583 } else { 580 } else {
584 pagebuf_deallocate(new_pb); 581 xfs_buf_deallocate(new_bp);
585 if (unlikely(pb == NULL)) 582 if (unlikely(bp == NULL))
586 return NULL; 583 return NULL;
587 } 584 }
588 585
589 for (i = 0; i < pb->pb_page_count; i++) 586 for (i = 0; i < bp->b_page_count; i++)
590 mark_page_accessed(pb->pb_pages[i]); 587 mark_page_accessed(bp->b_pages[i]);
591 588
592 if (!(pb->pb_flags & PBF_MAPPED)) { 589 if (!(bp->b_flags & XBF_MAPPED)) {
593 error = _pagebuf_map_pages(pb, flags); 590 error = _xfs_buf_map_pages(bp, flags);
594 if (unlikely(error)) { 591 if (unlikely(error)) {
595 printk(KERN_WARNING "%s: failed to map pages\n", 592 printk(KERN_WARNING "%s: failed to map pages\n",
596 __FUNCTION__); 593 __FUNCTION__);
@@ -598,97 +595,97 @@ xfs_buf_get_flags( /* allocate a buffer */
598 } 595 }
599 } 596 }
600 597
601 XFS_STATS_INC(pb_get); 598 XFS_STATS_INC(xb_get);
602 599
603 /* 600 /*
604 * Always fill in the block number now, the mapped cases can do 601 * Always fill in the block number now, the mapped cases can do
605 * their own overlay of this later. 602 * their own overlay of this later.
606 */ 603 */
607 pb->pb_bn = ioff; 604 bp->b_bn = ioff;
608 pb->pb_count_desired = pb->pb_buffer_length; 605 bp->b_count_desired = bp->b_buffer_length;
609 606
610 PB_TRACE(pb, "get", (unsigned long)flags); 607 XB_TRACE(bp, "get", (unsigned long)flags);
611 return pb; 608 return bp;
612 609
613 no_buffer: 610 no_buffer:
614 if (flags & (PBF_LOCK | PBF_TRYLOCK)) 611 if (flags & (XBF_LOCK | XBF_TRYLOCK))
615 pagebuf_unlock(pb); 612 xfs_buf_unlock(bp);
616 pagebuf_rele(pb); 613 xfs_buf_rele(bp);
617 return NULL; 614 return NULL;
618} 615}
619 616
620xfs_buf_t * 617xfs_buf_t *
621xfs_buf_read_flags( 618xfs_buf_read_flags(
622 xfs_buftarg_t *target, 619 xfs_buftarg_t *target,
623 loff_t ioff, 620 xfs_off_t ioff,
624 size_t isize, 621 size_t isize,
625 page_buf_flags_t flags) 622 xfs_buf_flags_t flags)
626{ 623{
627 xfs_buf_t *pb; 624 xfs_buf_t *bp;
628 625
629 flags |= PBF_READ; 626 flags |= XBF_READ;
630 627
631 pb = xfs_buf_get_flags(target, ioff, isize, flags); 628 bp = xfs_buf_get_flags(target, ioff, isize, flags);
632 if (pb) { 629 if (bp) {
633 if (!XFS_BUF_ISDONE(pb)) { 630 if (!XFS_BUF_ISDONE(bp)) {
634 PB_TRACE(pb, "read", (unsigned long)flags); 631 XB_TRACE(bp, "read", (unsigned long)flags);
635 XFS_STATS_INC(pb_get_read); 632 XFS_STATS_INC(xb_get_read);
636 pagebuf_iostart(pb, flags); 633 xfs_buf_iostart(bp, flags);
637 } else if (flags & PBF_ASYNC) { 634 } else if (flags & XBF_ASYNC) {
638 PB_TRACE(pb, "read_async", (unsigned long)flags); 635 XB_TRACE(bp, "read_async", (unsigned long)flags);
639 /* 636 /*
640 * Read ahead call which is already satisfied, 637 * Read ahead call which is already satisfied,
641 * drop the buffer 638 * drop the buffer
642 */ 639 */
643 goto no_buffer; 640 goto no_buffer;
644 } else { 641 } else {
645 PB_TRACE(pb, "read_done", (unsigned long)flags); 642 XB_TRACE(bp, "read_done", (unsigned long)flags);
646 /* We do not want read in the flags */ 643 /* We do not want read in the flags */
647 pb->pb_flags &= ~PBF_READ; 644 bp->b_flags &= ~XBF_READ;
648 } 645 }
649 } 646 }
650 647
651 return pb; 648 return bp;
652 649
653 no_buffer: 650 no_buffer:
654 if (flags & (PBF_LOCK | PBF_TRYLOCK)) 651 if (flags & (XBF_LOCK | XBF_TRYLOCK))
655 pagebuf_unlock(pb); 652 xfs_buf_unlock(bp);
656 pagebuf_rele(pb); 653 xfs_buf_rele(bp);
657 return NULL; 654 return NULL;
658} 655}
659 656
660/* 657/*
661 * If we are not low on memory then do the readahead in a deadlock 658 * If we are not low on memory then do the readahead in a deadlock
662 * safe manner. 659 * safe manner.
663 */ 660 */
664void 661void
665pagebuf_readahead( 662xfs_buf_readahead(
666 xfs_buftarg_t *target, 663 xfs_buftarg_t *target,
667 loff_t ioff, 664 xfs_off_t ioff,
668 size_t isize, 665 size_t isize,
669 page_buf_flags_t flags) 666 xfs_buf_flags_t flags)
670{ 667{
671 struct backing_dev_info *bdi; 668 struct backing_dev_info *bdi;
672 669
673 bdi = target->pbr_mapping->backing_dev_info; 670 bdi = target->bt_mapping->backing_dev_info;
674 if (bdi_read_congested(bdi)) 671 if (bdi_read_congested(bdi))
675 return; 672 return;
676 673
677 flags |= (PBF_TRYLOCK|PBF_ASYNC|PBF_READ_AHEAD); 674 flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
678 xfs_buf_read_flags(target, ioff, isize, flags); 675 xfs_buf_read_flags(target, ioff, isize, flags);
679} 676}
680 677
681xfs_buf_t * 678xfs_buf_t *
682pagebuf_get_empty( 679xfs_buf_get_empty(
683 size_t len, 680 size_t len,
684 xfs_buftarg_t *target) 681 xfs_buftarg_t *target)
685{ 682{
686 xfs_buf_t *pb; 683 xfs_buf_t *bp;
687 684
688 pb = pagebuf_allocate(0); 685 bp = xfs_buf_allocate(0);
689 if (pb) 686 if (bp)
690 _pagebuf_initialize(pb, target, 0, len, 0); 687 _xfs_buf_initialize(bp, target, 0, len, 0);
691 return pb; 688 return bp;
692} 689}
693 690
694static inline struct page * 691static inline struct page *
@@ -704,8 +701,8 @@ mem_to_page(
704} 701}
705 702
706int 703int
707pagebuf_associate_memory( 704xfs_buf_associate_memory(
708 xfs_buf_t *pb, 705 xfs_buf_t *bp,
709 void *mem, 706 void *mem,
710 size_t len) 707 size_t len)
711{ 708{
@@ -722,40 +719,40 @@ pagebuf_associate_memory(
722 page_count++; 719 page_count++;
723 720
724 /* Free any previous set of page pointers */ 721 /* Free any previous set of page pointers */
725 if (pb->pb_pages) 722 if (bp->b_pages)
726 _pagebuf_free_pages(pb); 723 _xfs_buf_free_pages(bp);
727 724
728 pb->pb_pages = NULL; 725 bp->b_pages = NULL;
729 pb->pb_addr = mem; 726 bp->b_addr = mem;
730 727
731 rval = _pagebuf_get_pages(pb, page_count, 0); 728 rval = _xfs_buf_get_pages(bp, page_count, 0);
732 if (rval) 729 if (rval)
733 return rval; 730 return rval;
734 731
735 pb->pb_offset = offset; 732 bp->b_offset = offset;
736 ptr = (size_t) mem & PAGE_CACHE_MASK; 733 ptr = (size_t) mem & PAGE_CACHE_MASK;
737 end = PAGE_CACHE_ALIGN((size_t) mem + len); 734 end = PAGE_CACHE_ALIGN((size_t) mem + len);
738 end_cur = end; 735 end_cur = end;
739 /* set up first page */ 736 /* set up first page */
740 pb->pb_pages[0] = mem_to_page(mem); 737 bp->b_pages[0] = mem_to_page(mem);
741 738
742 ptr += PAGE_CACHE_SIZE; 739 ptr += PAGE_CACHE_SIZE;
743 pb->pb_page_count = ++i; 740 bp->b_page_count = ++i;
744 while (ptr < end) { 741 while (ptr < end) {
745 pb->pb_pages[i] = mem_to_page((void *)ptr); 742 bp->b_pages[i] = mem_to_page((void *)ptr);
746 pb->pb_page_count = ++i; 743 bp->b_page_count = ++i;
747 ptr += PAGE_CACHE_SIZE; 744 ptr += PAGE_CACHE_SIZE;
748 } 745 }
749 pb->pb_locked = 0; 746 bp->b_locked = 0;
750 747
751 pb->pb_count_desired = pb->pb_buffer_length = len; 748 bp->b_count_desired = bp->b_buffer_length = len;
752 pb->pb_flags |= PBF_MAPPED; 749 bp->b_flags |= XBF_MAPPED;
753 750
754 return 0; 751 return 0;
755} 752}
756 753
757xfs_buf_t * 754xfs_buf_t *
758pagebuf_get_no_daddr( 755xfs_buf_get_noaddr(
759 size_t len, 756 size_t len,
760 xfs_buftarg_t *target) 757 xfs_buftarg_t *target)
761{ 758{
@@ -764,10 +761,10 @@ pagebuf_get_no_daddr(
764 void *data; 761 void *data;
765 int error; 762 int error;
766 763
767 bp = pagebuf_allocate(0); 764 bp = xfs_buf_allocate(0);
768 if (unlikely(bp == NULL)) 765 if (unlikely(bp == NULL))
769 goto fail; 766 goto fail;
770 _pagebuf_initialize(bp, target, 0, len, 0); 767 _xfs_buf_initialize(bp, target, 0, len, 0);
771 768
772 try_again: 769 try_again:
773 data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL); 770 data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL);
@@ -776,78 +773,73 @@ pagebuf_get_no_daddr(
776 773
777 /* check whether alignment matches.. */ 774 /* check whether alignment matches.. */
778 if ((__psunsigned_t)data != 775 if ((__psunsigned_t)data !=
779 ((__psunsigned_t)data & ~target->pbr_smask)) { 776 ((__psunsigned_t)data & ~target->bt_smask)) {
780 /* .. else double the size and try again */ 777 /* .. else double the size and try again */
781 kmem_free(data, malloc_len); 778 kmem_free(data, malloc_len);
782 malloc_len <<= 1; 779 malloc_len <<= 1;
783 goto try_again; 780 goto try_again;
784 } 781 }
785 782
786 error = pagebuf_associate_memory(bp, data, len); 783 error = xfs_buf_associate_memory(bp, data, len);
787 if (error) 784 if (error)
788 goto fail_free_mem; 785 goto fail_free_mem;
789 bp->pb_flags |= _PBF_KMEM_ALLOC; 786 bp->b_flags |= _XBF_KMEM_ALLOC;
790 787
791 pagebuf_unlock(bp); 788 xfs_buf_unlock(bp);
792 789
793 PB_TRACE(bp, "no_daddr", data); 790 XB_TRACE(bp, "no_daddr", data);
794 return bp; 791 return bp;
795 fail_free_mem: 792 fail_free_mem:
796 kmem_free(data, malloc_len); 793 kmem_free(data, malloc_len);
797 fail_free_buf: 794 fail_free_buf:
798 pagebuf_free(bp); 795 xfs_buf_free(bp);
799 fail: 796 fail:
800 return NULL; 797 return NULL;
801} 798}
802 799
803/* 800/*
804 * pagebuf_hold
805 *
806 * Increment reference count on buffer, to hold the buffer concurrently 801 * Increment reference count on buffer, to hold the buffer concurrently
807 * with another thread which may release (free) the buffer asynchronously. 802 * with another thread which may release (free) the buffer asynchronously.
808 *
809 * Must hold the buffer already to call this function. 803 * Must hold the buffer already to call this function.
810 */ 804 */
811void 805void
812pagebuf_hold( 806xfs_buf_hold(
813 xfs_buf_t *pb) 807 xfs_buf_t *bp)
814{ 808{
815 atomic_inc(&pb->pb_hold); 809 atomic_inc(&bp->b_hold);
816 PB_TRACE(pb, "hold", 0); 810 XB_TRACE(bp, "hold", 0);
817} 811}
818 812
819/* 813/*
820 * pagebuf_rele 814 * Releases a hold on the specified buffer. If the
821 * 815 * the hold count is 1, calls xfs_buf_free.
822 * pagebuf_rele releases a hold on the specified buffer. If the
823 * the hold count is 1, pagebuf_rele calls pagebuf_free.
824 */ 816 */
825void 817void
826pagebuf_rele( 818xfs_buf_rele(
827 xfs_buf_t *pb) 819 xfs_buf_t *bp)
828{ 820{
829 xfs_bufhash_t *hash = pb->pb_hash; 821 xfs_bufhash_t *hash = bp->b_hash;
830 822
831 PB_TRACE(pb, "rele", pb->pb_relse); 823 XB_TRACE(bp, "rele", bp->b_relse);
832 824
833 if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) { 825 if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
834 if (pb->pb_relse) { 826 if (bp->b_relse) {
835 atomic_inc(&pb->pb_hold); 827 atomic_inc(&bp->b_hold);
836 spin_unlock(&hash->bh_lock); 828 spin_unlock(&hash->bh_lock);
837 (*(pb->pb_relse)) (pb); 829 (*(bp->b_relse)) (bp);
838 } else if (pb->pb_flags & PBF_FS_MANAGED) { 830 } else if (bp->b_flags & XBF_FS_MANAGED) {
839 spin_unlock(&hash->bh_lock); 831 spin_unlock(&hash->bh_lock);
840 } else { 832 } else {
841 ASSERT(!(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q))); 833 ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
842 list_del_init(&pb->pb_hash_list); 834 list_del_init(&bp->b_hash_list);
843 spin_unlock(&hash->bh_lock); 835 spin_unlock(&hash->bh_lock);
844 pagebuf_free(pb); 836 xfs_buf_free(bp);
845 } 837 }
846 } else { 838 } else {
847 /* 839 /*
848 * Catch reference count leaks 840 * Catch reference count leaks
849 */ 841 */
850 ASSERT(atomic_read(&pb->pb_hold) >= 0); 842 ASSERT(atomic_read(&bp->b_hold) >= 0);
851 } 843 }
852} 844}
853 845
@@ -863,168 +855,122 @@ pagebuf_rele(
863 */ 855 */
864 856
865/* 857/*
866 * pagebuf_cond_lock 858 * Locks a buffer object, if it is not already locked.
867 * 859 * Note that this in no way locks the underlying pages, so it is only
868 * pagebuf_cond_lock locks a buffer object, if it is not already locked. 860 * useful for synchronizing concurrent use of buffer objects, not for
869 * Note that this in no way 861 * synchronizing independent access to the underlying pages.
870 * locks the underlying pages, so it is only useful for synchronizing
871 * concurrent use of page buffer objects, not for synchronizing independent
872 * access to the underlying pages.
873 */ 862 */
874int 863int
875pagebuf_cond_lock( /* lock buffer, if not locked */ 864xfs_buf_cond_lock(
876 /* returns -EBUSY if locked) */ 865 xfs_buf_t *bp)
877 xfs_buf_t *pb)
878{ 866{
879 int locked; 867 int locked;
880 868
881 locked = down_trylock(&pb->pb_sema) == 0; 869 locked = down_trylock(&bp->b_sema) == 0;
882 if (locked) { 870 if (locked) {
883 PB_SET_OWNER(pb); 871 XB_SET_OWNER(bp);
884 } 872 }
885 PB_TRACE(pb, "cond_lock", (long)locked); 873 XB_TRACE(bp, "cond_lock", (long)locked);
886 return(locked ? 0 : -EBUSY); 874 return locked ? 0 : -EBUSY;
887} 875}
888 876
889#if defined(DEBUG) || defined(XFS_BLI_TRACE) 877#if defined(DEBUG) || defined(XFS_BLI_TRACE)
890/*
891 * pagebuf_lock_value
892 *
893 * Return lock value for a pagebuf
894 */
895int 878int
896pagebuf_lock_value( 879xfs_buf_lock_value(
897 xfs_buf_t *pb) 880 xfs_buf_t *bp)
898{ 881{
899 return(atomic_read(&pb->pb_sema.count)); 882 return atomic_read(&bp->b_sema.count);
900} 883}
901#endif 884#endif
902 885
903/* 886/*
904 * pagebuf_lock 887 * Locks a buffer object.
905 * 888 * Note that this in no way locks the underlying pages, so it is only
906 * pagebuf_lock locks a buffer object. Note that this in no way 889 * useful for synchronizing concurrent use of buffer objects, not for
907 * locks the underlying pages, so it is only useful for synchronizing 890 * synchronizing independent access to the underlying pages.
908 * concurrent use of page buffer objects, not for synchronizing independent
909 * access to the underlying pages.
910 */ 891 */
911int 892void
912pagebuf_lock( 893xfs_buf_lock(
913 xfs_buf_t *pb) 894 xfs_buf_t *bp)
914{ 895{
915 PB_TRACE(pb, "lock", 0); 896 XB_TRACE(bp, "lock", 0);
916 if (atomic_read(&pb->pb_io_remaining)) 897 if (atomic_read(&bp->b_io_remaining))
917 blk_run_address_space(pb->pb_target->pbr_mapping); 898 blk_run_address_space(bp->b_target->bt_mapping);
918 down(&pb->pb_sema); 899 down(&bp->b_sema);
919 PB_SET_OWNER(pb); 900 XB_SET_OWNER(bp);
920 PB_TRACE(pb, "locked", 0); 901 XB_TRACE(bp, "locked", 0);
921 return 0;
922} 902}
923 903
924/* 904/*
925 * pagebuf_unlock 905 * Releases the lock on the buffer object.
926 *
927 * pagebuf_unlock releases the lock on the buffer object created by
928 * pagebuf_lock or pagebuf_cond_lock (not any pinning of underlying pages
929 * created by pagebuf_pin).
930 *
931 * If the buffer is marked delwri but is not queued, do so before we 906 * If the buffer is marked delwri but is not queued, do so before we
932 * unlock the buffer as we need to set flags correctly. We also need to 907 * unlock the buffer as we need to set flags correctly. We also need to
933 * take a reference for the delwri queue because the unlocker is going to 908 * take a reference for the delwri queue because the unlocker is going to
934 * drop their's and they don't know we just queued it. 909 * drop their's and they don't know we just queued it.
935 */ 910 */
936void 911void
937pagebuf_unlock( /* unlock buffer */ 912xfs_buf_unlock(
938 xfs_buf_t *pb) /* buffer to unlock */ 913 xfs_buf_t *bp)
939{ 914{
940 if ((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == PBF_DELWRI) { 915 if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
941 atomic_inc(&pb->pb_hold); 916 atomic_inc(&bp->b_hold);
942 pb->pb_flags |= PBF_ASYNC; 917 bp->b_flags |= XBF_ASYNC;
943 pagebuf_delwri_queue(pb, 0); 918 xfs_buf_delwri_queue(bp, 0);
944 } 919 }
945 920
946 PB_CLEAR_OWNER(pb); 921 XB_CLEAR_OWNER(bp);
947 up(&pb->pb_sema); 922 up(&bp->b_sema);
948 PB_TRACE(pb, "unlock", 0); 923 XB_TRACE(bp, "unlock", 0);
949} 924}
950 925
951 926
952/* 927/*
953 * Pinning Buffer Storage in Memory 928 * Pinning Buffer Storage in Memory
954 */ 929 * Ensure that no attempt to force a buffer to disk will succeed.
955
956/*
957 * pagebuf_pin
958 *
959 * pagebuf_pin locks all of the memory represented by a buffer in
960 * memory. Multiple calls to pagebuf_pin and pagebuf_unpin, for
961 * the same or different buffers affecting a given page, will
962 * properly count the number of outstanding "pin" requests. The
963 * buffer may be released after the pagebuf_pin and a different
964 * buffer used when calling pagebuf_unpin, if desired.
965 * pagebuf_pin should be used by the file system when it wants be
966 * assured that no attempt will be made to force the affected
967 * memory to disk. It does not assure that a given logical page
968 * will not be moved to a different physical page.
969 */ 930 */
970void 931void
971pagebuf_pin( 932xfs_buf_pin(
972 xfs_buf_t *pb) 933 xfs_buf_t *bp)
973{ 934{
974 atomic_inc(&pb->pb_pin_count); 935 atomic_inc(&bp->b_pin_count);
975 PB_TRACE(pb, "pin", (long)pb->pb_pin_count.counter); 936 XB_TRACE(bp, "pin", (long)bp->b_pin_count.counter);
976} 937}
977 938
978/*
979 * pagebuf_unpin
980 *
981 * pagebuf_unpin reverses the locking of memory performed by
982 * pagebuf_pin. Note that both functions affected the logical
983 * pages associated with the buffer, not the buffer itself.
984 */
985void 939void
986pagebuf_unpin( 940xfs_buf_unpin(
987 xfs_buf_t *pb) 941 xfs_buf_t *bp)
988{ 942{
989 if (atomic_dec_and_test(&pb->pb_pin_count)) { 943 if (atomic_dec_and_test(&bp->b_pin_count))
990 wake_up_all(&pb->pb_waiters); 944 wake_up_all(&bp->b_waiters);
991 } 945 XB_TRACE(bp, "unpin", (long)bp->b_pin_count.counter);
992 PB_TRACE(pb, "unpin", (long)pb->pb_pin_count.counter);
993} 946}
994 947
995int 948int
996pagebuf_ispin( 949xfs_buf_ispin(
997 xfs_buf_t *pb) 950 xfs_buf_t *bp)
998{ 951{
999 return atomic_read(&pb->pb_pin_count); 952 return atomic_read(&bp->b_pin_count);
1000} 953}
1001 954
1002/* 955STATIC void
1003 * pagebuf_wait_unpin 956xfs_buf_wait_unpin(
1004 * 957 xfs_buf_t *bp)
1005 * pagebuf_wait_unpin waits until all of the memory associated
1006 * with the buffer is not longer locked in memory. It returns
1007 * immediately if none of the affected pages are locked.
1008 */
1009static inline void
1010_pagebuf_wait_unpin(
1011 xfs_buf_t *pb)
1012{ 958{
1013 DECLARE_WAITQUEUE (wait, current); 959 DECLARE_WAITQUEUE (wait, current);
1014 960
1015 if (atomic_read(&pb->pb_pin_count) == 0) 961 if (atomic_read(&bp->b_pin_count) == 0)
1016 return; 962 return;
1017 963
1018 add_wait_queue(&pb->pb_waiters, &wait); 964 add_wait_queue(&bp->b_waiters, &wait);
1019 for (;;) { 965 for (;;) {
1020 set_current_state(TASK_UNINTERRUPTIBLE); 966 set_current_state(TASK_UNINTERRUPTIBLE);
1021 if (atomic_read(&pb->pb_pin_count) == 0) 967 if (atomic_read(&bp->b_pin_count) == 0)
1022 break; 968 break;
1023 if (atomic_read(&pb->pb_io_remaining)) 969 if (atomic_read(&bp->b_io_remaining))
1024 blk_run_address_space(pb->pb_target->pbr_mapping); 970 blk_run_address_space(bp->b_target->bt_mapping);
1025 schedule(); 971 schedule();
1026 } 972 }
1027 remove_wait_queue(&pb->pb_waiters, &wait); 973 remove_wait_queue(&bp->b_waiters, &wait);
1028 set_current_state(TASK_RUNNING); 974 set_current_state(TASK_RUNNING);
1029} 975}
1030 976
@@ -1032,241 +978,216 @@ _pagebuf_wait_unpin(
1032 * Buffer Utility Routines 978 * Buffer Utility Routines
1033 */ 979 */
1034 980
1035/*
1036 * pagebuf_iodone
1037 *
1038 * pagebuf_iodone marks a buffer for which I/O is in progress
1039 * done with respect to that I/O. The pb_iodone routine, if
1040 * present, will be called as a side-effect.
1041 */
1042STATIC void 981STATIC void
1043pagebuf_iodone_work( 982xfs_buf_iodone_work(
1044 void *v) 983 void *v)
1045{ 984{
1046 xfs_buf_t *bp = (xfs_buf_t *)v; 985 xfs_buf_t *bp = (xfs_buf_t *)v;
1047 986
1048 if (bp->pb_iodone) 987 if (bp->b_iodone)
1049 (*(bp->pb_iodone))(bp); 988 (*(bp->b_iodone))(bp);
1050 else if (bp->pb_flags & PBF_ASYNC) 989 else if (bp->b_flags & XBF_ASYNC)
1051 xfs_buf_relse(bp); 990 xfs_buf_relse(bp);
1052} 991}
1053 992
1054void 993void
1055pagebuf_iodone( 994xfs_buf_ioend(
1056 xfs_buf_t *pb, 995 xfs_buf_t *bp,
1057 int schedule) 996 int schedule)
1058{ 997{
1059 pb->pb_flags &= ~(PBF_READ | PBF_WRITE); 998 bp->b_flags &= ~(XBF_READ | XBF_WRITE);
1060 if (pb->pb_error == 0) 999 if (bp->b_error == 0)
1061 pb->pb_flags |= PBF_DONE; 1000 bp->b_flags |= XBF_DONE;
1062 1001
1063 PB_TRACE(pb, "iodone", pb->pb_iodone); 1002 XB_TRACE(bp, "iodone", bp->b_iodone);
1064 1003
1065 if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) { 1004 if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
1066 if (schedule) { 1005 if (schedule) {
1067 INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb); 1006 INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work, bp);
1068 queue_work(xfslogd_workqueue, &pb->pb_iodone_work); 1007 queue_work(xfslogd_workqueue, &bp->b_iodone_work);
1069 } else { 1008 } else {
1070 pagebuf_iodone_work(pb); 1009 xfs_buf_iodone_work(bp);
1071 } 1010 }
1072 } else { 1011 } else {
1073 up(&pb->pb_iodonesema); 1012 up(&bp->b_iodonesema);
1074 } 1013 }
1075} 1014}
1076 1015
1077/*
1078 * pagebuf_ioerror
1079 *
1080 * pagebuf_ioerror sets the error code for a buffer.
1081 */
1082void 1016void
1083pagebuf_ioerror( /* mark/clear buffer error flag */ 1017xfs_buf_ioerror(
1084 xfs_buf_t *pb, /* buffer to mark */ 1018 xfs_buf_t *bp,
1085 int error) /* error to store (0 if none) */ 1019 int error)
1086{ 1020{
1087 ASSERT(error >= 0 && error <= 0xffff); 1021 ASSERT(error >= 0 && error <= 0xffff);
1088 pb->pb_error = (unsigned short)error; 1022 bp->b_error = (unsigned short)error;
1089 PB_TRACE(pb, "ioerror", (unsigned long)error); 1023 XB_TRACE(bp, "ioerror", (unsigned long)error);
1090} 1024}
1091 1025
1092/* 1026/*
1093 * pagebuf_iostart 1027 * Initiate I/O on a buffer, based on the flags supplied.
1094 * 1028 * The b_iodone routine in the buffer supplied will only be called
1095 * pagebuf_iostart initiates I/O on a buffer, based on the flags supplied.
1096 * If necessary, it will arrange for any disk space allocation required,
1097 * and it will break up the request if the block mappings require it.
1098 * The pb_iodone routine in the buffer supplied will only be called
1099 * when all of the subsidiary I/O requests, if any, have been completed. 1029 * when all of the subsidiary I/O requests, if any, have been completed.
1100 * pagebuf_iostart calls the pagebuf_ioinitiate routine or
1101 * pagebuf_iorequest, if the former routine is not defined, to start
1102 * the I/O on a given low-level request.
1103 */ 1030 */
1104int 1031int
1105pagebuf_iostart( /* start I/O on a buffer */ 1032xfs_buf_iostart(
1106 xfs_buf_t *pb, /* buffer to start */ 1033 xfs_buf_t *bp,
1107 page_buf_flags_t flags) /* PBF_LOCK, PBF_ASYNC, PBF_READ, */ 1034 xfs_buf_flags_t flags)
1108 /* PBF_WRITE, PBF_DELWRI, */
1109 /* PBF_DONT_BLOCK */
1110{ 1035{
1111 int status = 0; 1036 int status = 0;
1112 1037
1113 PB_TRACE(pb, "iostart", (unsigned long)flags); 1038 XB_TRACE(bp, "iostart", (unsigned long)flags);
1114 1039
1115 if (flags & PBF_DELWRI) { 1040 if (flags & XBF_DELWRI) {
1116 pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC); 1041 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC);
1117 pb->pb_flags |= flags & (PBF_DELWRI | PBF_ASYNC); 1042 bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC);
1118 pagebuf_delwri_queue(pb, 1); 1043 xfs_buf_delwri_queue(bp, 1);
1119 return status; 1044 return status;
1120 } 1045 }
1121 1046
1122 pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC | PBF_DELWRI | \ 1047 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
1123 PBF_READ_AHEAD | _PBF_RUN_QUEUES); 1048 XBF_READ_AHEAD | _XBF_RUN_QUEUES);
1124 pb->pb_flags |= flags & (PBF_READ | PBF_WRITE | PBF_ASYNC | \ 1049 bp->b_flags |= flags & (XBF_READ | XBF_WRITE | XBF_ASYNC | \
1125 PBF_READ_AHEAD | _PBF_RUN_QUEUES); 1050 XBF_READ_AHEAD | _XBF_RUN_QUEUES);
1126 1051
1127 BUG_ON(pb->pb_bn == XFS_BUF_DADDR_NULL); 1052 BUG_ON(bp->b_bn == XFS_BUF_DADDR_NULL);
1128 1053
1129 /* For writes allow an alternate strategy routine to precede 1054 /* For writes allow an alternate strategy routine to precede
1130 * the actual I/O request (which may not be issued at all in 1055 * the actual I/O request (which may not be issued at all in
1131 * a shutdown situation, for example). 1056 * a shutdown situation, for example).
1132 */ 1057 */
1133 status = (flags & PBF_WRITE) ? 1058 status = (flags & XBF_WRITE) ?
1134 pagebuf_iostrategy(pb) : pagebuf_iorequest(pb); 1059 xfs_buf_iostrategy(bp) : xfs_buf_iorequest(bp);
1135 1060
1136 /* Wait for I/O if we are not an async request. 1061 /* Wait for I/O if we are not an async request.
1137 * Note: async I/O request completion will release the buffer, 1062 * Note: async I/O request completion will release the buffer,
1138 * and that can already be done by this point. So using the 1063 * and that can already be done by this point. So using the
1139 * buffer pointer from here on, after async I/O, is invalid. 1064 * buffer pointer from here on, after async I/O, is invalid.
1140 */ 1065 */
1141 if (!status && !(flags & PBF_ASYNC)) 1066 if (!status && !(flags & XBF_ASYNC))
1142 status = pagebuf_iowait(pb); 1067 status = xfs_buf_iowait(bp);
1143 1068
1144 return status; 1069 return status;
1145} 1070}
1146 1071
1147/*
1148 * Helper routine for pagebuf_iorequest
1149 */
1150
1151STATIC __inline__ int 1072STATIC __inline__ int
1152_pagebuf_iolocked( 1073_xfs_buf_iolocked(
1153 xfs_buf_t *pb) 1074 xfs_buf_t *bp)
1154{ 1075{
1155 ASSERT(pb->pb_flags & (PBF_READ|PBF_WRITE)); 1076 ASSERT(bp->b_flags & (XBF_READ | XBF_WRITE));
1156 if (pb->pb_flags & PBF_READ) 1077 if (bp->b_flags & XBF_READ)
1157 return pb->pb_locked; 1078 return bp->b_locked;
1158 return 0; 1079 return 0;
1159} 1080}
1160 1081
1161STATIC __inline__ void 1082STATIC __inline__ void
1162_pagebuf_iodone( 1083_xfs_buf_ioend(
1163 xfs_buf_t *pb, 1084 xfs_buf_t *bp,
1164 int schedule) 1085 int schedule)
1165{ 1086{
1166 if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { 1087 if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
1167 pb->pb_locked = 0; 1088 bp->b_locked = 0;
1168 pagebuf_iodone(pb, schedule); 1089 xfs_buf_ioend(bp, schedule);
1169 } 1090 }
1170} 1091}
1171 1092
1172STATIC int 1093STATIC int
1173bio_end_io_pagebuf( 1094xfs_buf_bio_end_io(
1174 struct bio *bio, 1095 struct bio *bio,
1175 unsigned int bytes_done, 1096 unsigned int bytes_done,
1176 int error) 1097 int error)
1177{ 1098{
1178 xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private; 1099 xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;
1179 unsigned int blocksize = pb->pb_target->pbr_bsize; 1100 unsigned int blocksize = bp->b_target->bt_bsize;
1180 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1101 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1181 1102
1182 if (bio->bi_size) 1103 if (bio->bi_size)
1183 return 1; 1104 return 1;
1184 1105
1185 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 1106 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1186 pb->pb_error = EIO; 1107 bp->b_error = EIO;
1187 1108
1188 do { 1109 do {
1189 struct page *page = bvec->bv_page; 1110 struct page *page = bvec->bv_page;
1190 1111
1191 if (unlikely(pb->pb_error)) { 1112 if (unlikely(bp->b_error)) {
1192 if (pb->pb_flags & PBF_READ) 1113 if (bp->b_flags & XBF_READ)
1193 ClearPageUptodate(page); 1114 ClearPageUptodate(page);
1194 SetPageError(page); 1115 SetPageError(page);
1195 } else if (blocksize == PAGE_CACHE_SIZE) { 1116 } else if (blocksize >= PAGE_CACHE_SIZE) {
1196 SetPageUptodate(page); 1117 SetPageUptodate(page);
1197 } else if (!PagePrivate(page) && 1118 } else if (!PagePrivate(page) &&
1198 (pb->pb_flags & _PBF_PAGE_CACHE)) { 1119 (bp->b_flags & _XBF_PAGE_CACHE)) {
1199 set_page_region(page, bvec->bv_offset, bvec->bv_len); 1120 set_page_region(page, bvec->bv_offset, bvec->bv_len);
1200 } 1121 }
1201 1122
1202 if (--bvec >= bio->bi_io_vec) 1123 if (--bvec >= bio->bi_io_vec)
1203 prefetchw(&bvec->bv_page->flags); 1124 prefetchw(&bvec->bv_page->flags);
1204 1125
1205 if (_pagebuf_iolocked(pb)) { 1126 if (_xfs_buf_iolocked(bp)) {
1206 unlock_page(page); 1127 unlock_page(page);
1207 } 1128 }
1208 } while (bvec >= bio->bi_io_vec); 1129 } while (bvec >= bio->bi_io_vec);
1209 1130
1210 _pagebuf_iodone(pb, 1); 1131 _xfs_buf_ioend(bp, 1);
1211 bio_put(bio); 1132 bio_put(bio);
1212 return 0; 1133 return 0;
1213} 1134}
1214 1135
1215STATIC void 1136STATIC void
1216_pagebuf_ioapply( 1137_xfs_buf_ioapply(
1217 xfs_buf_t *pb) 1138 xfs_buf_t *bp)
1218{ 1139{
1219 int i, rw, map_i, total_nr_pages, nr_pages; 1140 int i, rw, map_i, total_nr_pages, nr_pages;
1220 struct bio *bio; 1141 struct bio *bio;
1221 int offset = pb->pb_offset; 1142 int offset = bp->b_offset;
1222 int size = pb->pb_count_desired; 1143 int size = bp->b_count_desired;
1223 sector_t sector = pb->pb_bn; 1144 sector_t sector = bp->b_bn;
1224 unsigned int blocksize = pb->pb_target->pbr_bsize; 1145 unsigned int blocksize = bp->b_target->bt_bsize;
1225 int locking = _pagebuf_iolocked(pb); 1146 int locking = _xfs_buf_iolocked(bp);
1226 1147
1227 total_nr_pages = pb->pb_page_count; 1148 total_nr_pages = bp->b_page_count;
1228 map_i = 0; 1149 map_i = 0;
1229 1150
1230 if (pb->pb_flags & _PBF_RUN_QUEUES) { 1151 if (bp->b_flags & _XBF_RUN_QUEUES) {
1231 pb->pb_flags &= ~_PBF_RUN_QUEUES; 1152 bp->b_flags &= ~_XBF_RUN_QUEUES;
1232 rw = (pb->pb_flags & PBF_READ) ? READ_SYNC : WRITE_SYNC; 1153 rw = (bp->b_flags & XBF_READ) ? READ_SYNC : WRITE_SYNC;
1233 } else { 1154 } else {
1234 rw = (pb->pb_flags & PBF_READ) ? READ : WRITE; 1155 rw = (bp->b_flags & XBF_READ) ? READ : WRITE;
1235 } 1156 }
1236 1157
1237 if (pb->pb_flags & PBF_ORDERED) { 1158 if (bp->b_flags & XBF_ORDERED) {
1238 ASSERT(!(pb->pb_flags & PBF_READ)); 1159 ASSERT(!(bp->b_flags & XBF_READ));
1239 rw = WRITE_BARRIER; 1160 rw = WRITE_BARRIER;
1240 } 1161 }
1241 1162
1242 /* Special code path for reading a sub page size pagebuf in -- 1163 /* Special code path for reading a sub page size buffer in --
1243 * we populate up the whole page, and hence the other metadata 1164 * we populate up the whole page, and hence the other metadata
1244 * in the same page. This optimization is only valid when the 1165 * in the same page. This optimization is only valid when the
1245 * filesystem block size and the page size are equal. 1166 * filesystem block size is not smaller than the page size.
1246 */ 1167 */
1247 if ((pb->pb_buffer_length < PAGE_CACHE_SIZE) && 1168 if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
1248 (pb->pb_flags & PBF_READ) && locking && 1169 (bp->b_flags & XBF_READ) && locking &&
1249 (blocksize == PAGE_CACHE_SIZE)) { 1170 (blocksize >= PAGE_CACHE_SIZE)) {
1250 bio = bio_alloc(GFP_NOIO, 1); 1171 bio = bio_alloc(GFP_NOIO, 1);
1251 1172
1252 bio->bi_bdev = pb->pb_target->pbr_bdev; 1173 bio->bi_bdev = bp->b_target->bt_bdev;
1253 bio->bi_sector = sector - (offset >> BBSHIFT); 1174 bio->bi_sector = sector - (offset >> BBSHIFT);
1254 bio->bi_end_io = bio_end_io_pagebuf; 1175 bio->bi_end_io = xfs_buf_bio_end_io;
1255 bio->bi_private = pb; 1176 bio->bi_private = bp;
1256 1177
1257 bio_add_page(bio, pb->pb_pages[0], PAGE_CACHE_SIZE, 0); 1178 bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0);
1258 size = 0; 1179 size = 0;
1259 1180
1260 atomic_inc(&pb->pb_io_remaining); 1181 atomic_inc(&bp->b_io_remaining);
1261 1182
1262 goto submit_io; 1183 goto submit_io;
1263 } 1184 }
1264 1185
1265 /* Lock down the pages which we need to for the request */ 1186 /* Lock down the pages which we need to for the request */
1266 if (locking && (pb->pb_flags & PBF_WRITE) && (pb->pb_locked == 0)) { 1187 if (locking && (bp->b_flags & XBF_WRITE) && (bp->b_locked == 0)) {
1267 for (i = 0; size; i++) { 1188 for (i = 0; size; i++) {
1268 int nbytes = PAGE_CACHE_SIZE - offset; 1189 int nbytes = PAGE_CACHE_SIZE - offset;
1269 struct page *page = pb->pb_pages[i]; 1190 struct page *page = bp->b_pages[i];
1270 1191
1271 if (nbytes > size) 1192 if (nbytes > size)
1272 nbytes = size; 1193 nbytes = size;
@@ -1276,30 +1197,30 @@ _pagebuf_ioapply(
1276 size -= nbytes; 1197 size -= nbytes;
1277 offset = 0; 1198 offset = 0;
1278 } 1199 }
1279 offset = pb->pb_offset; 1200 offset = bp->b_offset;
1280 size = pb->pb_count_desired; 1201 size = bp->b_count_desired;
1281 } 1202 }
1282 1203
1283next_chunk: 1204next_chunk:
1284 atomic_inc(&pb->pb_io_remaining); 1205 atomic_inc(&bp->b_io_remaining);
1285 nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); 1206 nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
1286 if (nr_pages > total_nr_pages) 1207 if (nr_pages > total_nr_pages)
1287 nr_pages = total_nr_pages; 1208 nr_pages = total_nr_pages;
1288 1209
1289 bio = bio_alloc(GFP_NOIO, nr_pages); 1210 bio = bio_alloc(GFP_NOIO, nr_pages);
1290 bio->bi_bdev = pb->pb_target->pbr_bdev; 1211 bio->bi_bdev = bp->b_target->bt_bdev;
1291 bio->bi_sector = sector; 1212 bio->bi_sector = sector;
1292 bio->bi_end_io = bio_end_io_pagebuf; 1213 bio->bi_end_io = xfs_buf_bio_end_io;
1293 bio->bi_private = pb; 1214 bio->bi_private = bp;
1294 1215
1295 for (; size && nr_pages; nr_pages--, map_i++) { 1216 for (; size && nr_pages; nr_pages--, map_i++) {
1296 int nbytes = PAGE_CACHE_SIZE - offset; 1217 int rbytes, nbytes = PAGE_CACHE_SIZE - offset;
1297 1218
1298 if (nbytes > size) 1219 if (nbytes > size)
1299 nbytes = size; 1220 nbytes = size;
1300 1221
1301 if (bio_add_page(bio, pb->pb_pages[map_i], 1222 rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset);
1302 nbytes, offset) < nbytes) 1223 if (rbytes < nbytes)
1303 break; 1224 break;
1304 1225
1305 offset = 0; 1226 offset = 0;
@@ -1315,107 +1236,102 @@ submit_io:
1315 goto next_chunk; 1236 goto next_chunk;
1316 } else { 1237 } else {
1317 bio_put(bio); 1238 bio_put(bio);
1318 pagebuf_ioerror(pb, EIO); 1239 xfs_buf_ioerror(bp, EIO);
1319 } 1240 }
1320} 1241}
1321 1242
1322/*
1323 * pagebuf_iorequest -- the core I/O request routine.
1324 */
1325int 1243int
1326pagebuf_iorequest( /* start real I/O */ 1244xfs_buf_iorequest(
1327 xfs_buf_t *pb) /* buffer to convey to device */ 1245 xfs_buf_t *bp)
1328{ 1246{
1329 PB_TRACE(pb, "iorequest", 0); 1247 XB_TRACE(bp, "iorequest", 0);
1330 1248
1331 if (pb->pb_flags & PBF_DELWRI) { 1249 if (bp->b_flags & XBF_DELWRI) {
1332 pagebuf_delwri_queue(pb, 1); 1250 xfs_buf_delwri_queue(bp, 1);
1333 return 0; 1251 return 0;
1334 } 1252 }
1335 1253
1336 if (pb->pb_flags & PBF_WRITE) { 1254 if (bp->b_flags & XBF_WRITE) {
1337 _pagebuf_wait_unpin(pb); 1255 xfs_buf_wait_unpin(bp);
1338 } 1256 }
1339 1257
1340 pagebuf_hold(pb); 1258 xfs_buf_hold(bp);
1341 1259
1342 /* Set the count to 1 initially, this will stop an I/O 1260 /* Set the count to 1 initially, this will stop an I/O
1343 * completion callout which happens before we have started 1261 * completion callout which happens before we have started
1344 * all the I/O from calling pagebuf_iodone too early. 1262 * all the I/O from calling xfs_buf_ioend too early.
1345 */ 1263 */
1346 atomic_set(&pb->pb_io_remaining, 1); 1264 atomic_set(&bp->b_io_remaining, 1);
1347 _pagebuf_ioapply(pb); 1265 _xfs_buf_ioapply(bp);
1348 _pagebuf_iodone(pb, 0); 1266 _xfs_buf_ioend(bp, 0);
1349 1267
1350 pagebuf_rele(pb); 1268 xfs_buf_rele(bp);
1351 return 0; 1269 return 0;
1352} 1270}
1353 1271
1354/* 1272/*
1355 * pagebuf_iowait 1273 * Waits for I/O to complete on the buffer supplied.
1356 * 1274 * It returns immediately if no I/O is pending.
1357 * pagebuf_iowait waits for I/O to complete on the buffer supplied. 1275 * It returns the I/O error code, if any, or 0 if there was no error.
1358 * It returns immediately if no I/O is pending. In any case, it returns
1359 * the error code, if any, or 0 if there is no error.
1360 */ 1276 */
1361int 1277int
1362pagebuf_iowait( 1278xfs_buf_iowait(
1363 xfs_buf_t *pb) 1279 xfs_buf_t *bp)
1364{ 1280{
1365 PB_TRACE(pb, "iowait", 0); 1281 XB_TRACE(bp, "iowait", 0);
1366 if (atomic_read(&pb->pb_io_remaining)) 1282 if (atomic_read(&bp->b_io_remaining))
1367 blk_run_address_space(pb->pb_target->pbr_mapping); 1283 blk_run_address_space(bp->b_target->bt_mapping);
1368 down(&pb->pb_iodonesema); 1284 down(&bp->b_iodonesema);
1369 PB_TRACE(pb, "iowaited", (long)pb->pb_error); 1285 XB_TRACE(bp, "iowaited", (long)bp->b_error);
1370 return pb->pb_error; 1286 return bp->b_error;
1371} 1287}
1372 1288
1373caddr_t 1289xfs_caddr_t
1374pagebuf_offset( 1290xfs_buf_offset(
1375 xfs_buf_t *pb, 1291 xfs_buf_t *bp,
1376 size_t offset) 1292 size_t offset)
1377{ 1293{
1378 struct page *page; 1294 struct page *page;
1379 1295
1380 offset += pb->pb_offset; 1296 if (bp->b_flags & XBF_MAPPED)
1297 return XFS_BUF_PTR(bp) + offset;
1381 1298
1382 page = pb->pb_pages[offset >> PAGE_CACHE_SHIFT]; 1299 offset += bp->b_offset;
1383 return (caddr_t) page_address(page) + (offset & (PAGE_CACHE_SIZE - 1)); 1300 page = bp->b_pages[offset >> PAGE_CACHE_SHIFT];
1301 return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1));
1384} 1302}
1385 1303
1386/* 1304/*
1387 * pagebuf_iomove
1388 *
1389 * Move data into or out of a buffer. 1305 * Move data into or out of a buffer.
1390 */ 1306 */
1391void 1307void
1392pagebuf_iomove( 1308xfs_buf_iomove(
1393 xfs_buf_t *pb, /* buffer to process */ 1309 xfs_buf_t *bp, /* buffer to process */
1394 size_t boff, /* starting buffer offset */ 1310 size_t boff, /* starting buffer offset */
1395 size_t bsize, /* length to copy */ 1311 size_t bsize, /* length to copy */
1396 caddr_t data, /* data address */ 1312 caddr_t data, /* data address */
1397 page_buf_rw_t mode) /* read/write flag */ 1313 xfs_buf_rw_t mode) /* read/write/zero flag */
1398{ 1314{
1399 size_t bend, cpoff, csize; 1315 size_t bend, cpoff, csize;
1400 struct page *page; 1316 struct page *page;
1401 1317
1402 bend = boff + bsize; 1318 bend = boff + bsize;
1403 while (boff < bend) { 1319 while (boff < bend) {
1404 page = pb->pb_pages[page_buf_btoct(boff + pb->pb_offset)]; 1320 page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
1405 cpoff = page_buf_poff(boff + pb->pb_offset); 1321 cpoff = xfs_buf_poff(boff + bp->b_offset);
1406 csize = min_t(size_t, 1322 csize = min_t(size_t,
1407 PAGE_CACHE_SIZE-cpoff, pb->pb_count_desired-boff); 1323 PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff);
1408 1324
1409 ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); 1325 ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE));
1410 1326
1411 switch (mode) { 1327 switch (mode) {
1412 case PBRW_ZERO: 1328 case XBRW_ZERO:
1413 memset(page_address(page) + cpoff, 0, csize); 1329 memset(page_address(page) + cpoff, 0, csize);
1414 break; 1330 break;
1415 case PBRW_READ: 1331 case XBRW_READ:
1416 memcpy(data, page_address(page) + cpoff, csize); 1332 memcpy(data, page_address(page) + cpoff, csize);
1417 break; 1333 break;
1418 case PBRW_WRITE: 1334 case XBRW_WRITE:
1419 memcpy(page_address(page) + cpoff, data, csize); 1335 memcpy(page_address(page) + cpoff, data, csize);
1420 } 1336 }
1421 1337
@@ -1425,12 +1341,12 @@ pagebuf_iomove(
1425} 1341}
1426 1342
1427/* 1343/*
1428 * Handling of buftargs. 1344 * Handling of buffer targets (buftargs).
1429 */ 1345 */
1430 1346
1431/* 1347/*
1432 * Wait for any bufs with callbacks that have been submitted but 1348 * Wait for any bufs with callbacks that have been submitted but
1433 * have not yet returned... walk the hash list for the target. 1349 * have not yet returned... walk the hash list for the target.
1434 */ 1350 */
1435void 1351void
1436xfs_wait_buftarg( 1352xfs_wait_buftarg(
@@ -1444,15 +1360,15 @@ xfs_wait_buftarg(
1444 hash = &btp->bt_hash[i]; 1360 hash = &btp->bt_hash[i];
1445again: 1361again:
1446 spin_lock(&hash->bh_lock); 1362 spin_lock(&hash->bh_lock);
1447 list_for_each_entry_safe(bp, n, &hash->bh_list, pb_hash_list) { 1363 list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
1448 ASSERT(btp == bp->pb_target); 1364 ASSERT(btp == bp->b_target);
1449 if (!(bp->pb_flags & PBF_FS_MANAGED)) { 1365 if (!(bp->b_flags & XBF_FS_MANAGED)) {
1450 spin_unlock(&hash->bh_lock); 1366 spin_unlock(&hash->bh_lock);
1451 /* 1367 /*
1452 * Catch superblock reference count leaks 1368 * Catch superblock reference count leaks
1453 * immediately 1369 * immediately
1454 */ 1370 */
1455 BUG_ON(bp->pb_bn == 0); 1371 BUG_ON(bp->b_bn == 0);
1456 delay(100); 1372 delay(100);
1457 goto again; 1373 goto again;
1458 } 1374 }
@@ -1462,9 +1378,9 @@ again:
1462} 1378}
1463 1379
1464/* 1380/*
1465 * Allocate buffer hash table for a given target. 1381 * Allocate buffer hash table for a given target.
1466 * For devices containing metadata (i.e. not the log/realtime devices) 1382 * For devices containing metadata (i.e. not the log/realtime devices)
1467 * we need to allocate a much larger hash table. 1383 * we need to allocate a much larger hash table.
1468 */ 1384 */
1469STATIC void 1385STATIC void
1470xfs_alloc_bufhash( 1386xfs_alloc_bufhash(
@@ -1487,11 +1403,34 @@ STATIC void
1487xfs_free_bufhash( 1403xfs_free_bufhash(
1488 xfs_buftarg_t *btp) 1404 xfs_buftarg_t *btp)
1489{ 1405{
1490 kmem_free(btp->bt_hash, 1406 kmem_free(btp->bt_hash, (1<<btp->bt_hashshift) * sizeof(xfs_bufhash_t));
1491 (1 << btp->bt_hashshift) * sizeof(xfs_bufhash_t));
1492 btp->bt_hash = NULL; 1407 btp->bt_hash = NULL;
1493} 1408}
1494 1409
1410/*
1411 * buftarg list for delwrite queue processing
1412 */
1413STATIC LIST_HEAD(xfs_buftarg_list);
1414STATIC DEFINE_SPINLOCK(xfs_buftarg_lock);
1415
1416STATIC void
1417xfs_register_buftarg(
1418 xfs_buftarg_t *btp)
1419{
1420 spin_lock(&xfs_buftarg_lock);
1421 list_add(&btp->bt_list, &xfs_buftarg_list);
1422 spin_unlock(&xfs_buftarg_lock);
1423}
1424
1425STATIC void
1426xfs_unregister_buftarg(
1427 xfs_buftarg_t *btp)
1428{
1429 spin_lock(&xfs_buftarg_lock);
1430 list_del(&btp->bt_list);
1431 spin_unlock(&xfs_buftarg_lock);
1432}
1433
1495void 1434void
1496xfs_free_buftarg( 1435xfs_free_buftarg(
1497 xfs_buftarg_t *btp, 1436 xfs_buftarg_t *btp,
@@ -1499,9 +1438,16 @@ xfs_free_buftarg(
1499{ 1438{
1500 xfs_flush_buftarg(btp, 1); 1439 xfs_flush_buftarg(btp, 1);
1501 if (external) 1440 if (external)
1502 xfs_blkdev_put(btp->pbr_bdev); 1441 xfs_blkdev_put(btp->bt_bdev);
1503 xfs_free_bufhash(btp); 1442 xfs_free_bufhash(btp);
1504 iput(btp->pbr_mapping->host); 1443 iput(btp->bt_mapping->host);
1444
1445 /* Unregister the buftarg first so that we don't get a
1446 * wakeup finding a non-existent task
1447 */
1448 xfs_unregister_buftarg(btp);
1449 kthread_stop(btp->bt_task);
1450
1505 kmem_free(btp, sizeof(*btp)); 1451 kmem_free(btp, sizeof(*btp));
1506} 1452}
1507 1453
@@ -1512,11 +1458,11 @@ xfs_setsize_buftarg_flags(
1512 unsigned int sectorsize, 1458 unsigned int sectorsize,
1513 int verbose) 1459 int verbose)
1514{ 1460{
1515 btp->pbr_bsize = blocksize; 1461 btp->bt_bsize = blocksize;
1516 btp->pbr_sshift = ffs(sectorsize) - 1; 1462 btp->bt_sshift = ffs(sectorsize) - 1;
1517 btp->pbr_smask = sectorsize - 1; 1463 btp->bt_smask = sectorsize - 1;
1518 1464
1519 if (set_blocksize(btp->pbr_bdev, sectorsize)) { 1465 if (set_blocksize(btp->bt_bdev, sectorsize)) {
1520 printk(KERN_WARNING 1466 printk(KERN_WARNING
1521 "XFS: Cannot set_blocksize to %u on device %s\n", 1467 "XFS: Cannot set_blocksize to %u on device %s\n",
1522 sectorsize, XFS_BUFTARG_NAME(btp)); 1468 sectorsize, XFS_BUFTARG_NAME(btp));
@@ -1536,10 +1482,10 @@ xfs_setsize_buftarg_flags(
1536} 1482}
1537 1483
1538/* 1484/*
1539* When allocating the initial buffer target we have not yet 1485 * When allocating the initial buffer target we have not yet
1540* read in the superblock, so don't know what sized sectors 1486 * read in the superblock, so don't know what sized sectors
1541* are being used is at this early stage. Play safe. 1487 * are being used is at this early stage. Play safe.
1542*/ 1488 */
1543STATIC int 1489STATIC int
1544xfs_setsize_buftarg_early( 1490xfs_setsize_buftarg_early(
1545 xfs_buftarg_t *btp, 1491 xfs_buftarg_t *btp,
@@ -1587,10 +1533,30 @@ xfs_mapping_buftarg(
1587 mapping->a_ops = &mapping_aops; 1533 mapping->a_ops = &mapping_aops;
1588 mapping->backing_dev_info = bdi; 1534 mapping->backing_dev_info = bdi;
1589 mapping_set_gfp_mask(mapping, GFP_NOFS); 1535 mapping_set_gfp_mask(mapping, GFP_NOFS);
1590 btp->pbr_mapping = mapping; 1536 btp->bt_mapping = mapping;
1591 return 0; 1537 return 0;
1592} 1538}
1593 1539
1540STATIC int
1541xfs_alloc_delwrite_queue(
1542 xfs_buftarg_t *btp)
1543{
1544 int error = 0;
1545
1546 INIT_LIST_HEAD(&btp->bt_list);
1547 INIT_LIST_HEAD(&btp->bt_delwrite_queue);
1548 spinlock_init(&btp->bt_delwrite_lock, "delwri_lock");
1549 btp->bt_flags = 0;
1550 btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd");
1551 if (IS_ERR(btp->bt_task)) {
1552 error = PTR_ERR(btp->bt_task);
1553 goto out_error;
1554 }
1555 xfs_register_buftarg(btp);
1556out_error:
1557 return error;
1558}
1559
1594xfs_buftarg_t * 1560xfs_buftarg_t *
1595xfs_alloc_buftarg( 1561xfs_alloc_buftarg(
1596 struct block_device *bdev, 1562 struct block_device *bdev,
@@ -1600,12 +1566,14 @@ xfs_alloc_buftarg(
1600 1566
1601 btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); 1567 btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
1602 1568
1603 btp->pbr_dev = bdev->bd_dev; 1569 btp->bt_dev = bdev->bd_dev;
1604 btp->pbr_bdev = bdev; 1570 btp->bt_bdev = bdev;
1605 if (xfs_setsize_buftarg_early(btp, bdev)) 1571 if (xfs_setsize_buftarg_early(btp, bdev))
1606 goto error; 1572 goto error;
1607 if (xfs_mapping_buftarg(btp, bdev)) 1573 if (xfs_mapping_buftarg(btp, bdev))
1608 goto error; 1574 goto error;
1575 if (xfs_alloc_delwrite_queue(btp))
1576 goto error;
1609 xfs_alloc_bufhash(btp, external); 1577 xfs_alloc_bufhash(btp, external);
1610 return btp; 1578 return btp;
1611 1579
@@ -1616,83 +1584,81 @@ error:
1616 1584
1617 1585
1618/* 1586/*
1619 * Pagebuf delayed write buffer handling 1587 * Delayed write buffer handling
1620 */ 1588 */
1621
1622STATIC LIST_HEAD(pbd_delwrite_queue);
1623STATIC DEFINE_SPINLOCK(pbd_delwrite_lock);
1624
1625STATIC void 1589STATIC void
1626pagebuf_delwri_queue( 1590xfs_buf_delwri_queue(
1627 xfs_buf_t *pb, 1591 xfs_buf_t *bp,
1628 int unlock) 1592 int unlock)
1629{ 1593{
1630 PB_TRACE(pb, "delwri_q", (long)unlock); 1594 struct list_head *dwq = &bp->b_target->bt_delwrite_queue;
1631 ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) == 1595 spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;
1632 (PBF_DELWRI|PBF_ASYNC)); 1596
1597 XB_TRACE(bp, "delwri_q", (long)unlock);
1598 ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
1633 1599
1634 spin_lock(&pbd_delwrite_lock); 1600 spin_lock(dwlk);
1635 /* If already in the queue, dequeue and place at tail */ 1601 /* If already in the queue, dequeue and place at tail */
1636 if (!list_empty(&pb->pb_list)) { 1602 if (!list_empty(&bp->b_list)) {
1637 ASSERT(pb->pb_flags & _PBF_DELWRI_Q); 1603 ASSERT(bp->b_flags & _XBF_DELWRI_Q);
1638 if (unlock) { 1604 if (unlock)
1639 atomic_dec(&pb->pb_hold); 1605 atomic_dec(&bp->b_hold);
1640 } 1606 list_del(&bp->b_list);
1641 list_del(&pb->pb_list);
1642 } 1607 }
1643 1608
1644 pb->pb_flags |= _PBF_DELWRI_Q; 1609 bp->b_flags |= _XBF_DELWRI_Q;
1645 list_add_tail(&pb->pb_list, &pbd_delwrite_queue); 1610 list_add_tail(&bp->b_list, dwq);
1646 pb->pb_queuetime = jiffies; 1611 bp->b_queuetime = jiffies;
1647 spin_unlock(&pbd_delwrite_lock); 1612 spin_unlock(dwlk);
1648 1613
1649 if (unlock) 1614 if (unlock)
1650 pagebuf_unlock(pb); 1615 xfs_buf_unlock(bp);
1651} 1616}
1652 1617
1653void 1618void
1654pagebuf_delwri_dequeue( 1619xfs_buf_delwri_dequeue(
1655 xfs_buf_t *pb) 1620 xfs_buf_t *bp)
1656{ 1621{
1622 spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;
1657 int dequeued = 0; 1623 int dequeued = 0;
1658 1624
1659 spin_lock(&pbd_delwrite_lock); 1625 spin_lock(dwlk);
1660 if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) { 1626 if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) {
1661 ASSERT(pb->pb_flags & _PBF_DELWRI_Q); 1627 ASSERT(bp->b_flags & _XBF_DELWRI_Q);
1662 list_del_init(&pb->pb_list); 1628 list_del_init(&bp->b_list);
1663 dequeued = 1; 1629 dequeued = 1;
1664 } 1630 }
1665 pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); 1631 bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
1666 spin_unlock(&pbd_delwrite_lock); 1632 spin_unlock(dwlk);
1667 1633
1668 if (dequeued) 1634 if (dequeued)
1669 pagebuf_rele(pb); 1635 xfs_buf_rele(bp);
1670 1636
1671 PB_TRACE(pb, "delwri_dq", (long)dequeued); 1637 XB_TRACE(bp, "delwri_dq", (long)dequeued);
1672} 1638}
1673 1639
1674STATIC void 1640STATIC void
1675pagebuf_runall_queues( 1641xfs_buf_runall_queues(
1676 struct workqueue_struct *queue) 1642 struct workqueue_struct *queue)
1677{ 1643{
1678 flush_workqueue(queue); 1644 flush_workqueue(queue);
1679} 1645}
1680 1646
1681/* Defines for pagebuf daemon */
1682STATIC struct task_struct *xfsbufd_task;
1683STATIC int xfsbufd_force_flush;
1684STATIC int xfsbufd_force_sleep;
1685
1686STATIC int 1647STATIC int
1687xfsbufd_wakeup( 1648xfsbufd_wakeup(
1688 int priority, 1649 int priority,
1689 gfp_t mask) 1650 gfp_t mask)
1690{ 1651{
1691 if (xfsbufd_force_sleep) 1652 xfs_buftarg_t *btp;
1692 return 0; 1653
1693 xfsbufd_force_flush = 1; 1654 spin_lock(&xfs_buftarg_lock);
1694 barrier(); 1655 list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
1695 wake_up_process(xfsbufd_task); 1656 if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
1657 continue;
1658 set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
1659 wake_up_process(btp->bt_task);
1660 }
1661 spin_unlock(&xfs_buftarg_lock);
1696 return 0; 1662 return 0;
1697} 1663}
1698 1664
@@ -1702,67 +1668,70 @@ xfsbufd(
1702{ 1668{
1703 struct list_head tmp; 1669 struct list_head tmp;
1704 unsigned long age; 1670 unsigned long age;
1705 xfs_buftarg_t *target; 1671 xfs_buftarg_t *target = (xfs_buftarg_t *)data;
1706 xfs_buf_t *pb, *n; 1672 xfs_buf_t *bp, *n;
1673 struct list_head *dwq = &target->bt_delwrite_queue;
1674 spinlock_t *dwlk = &target->bt_delwrite_lock;
1707 1675
1708 current->flags |= PF_MEMALLOC; 1676 current->flags |= PF_MEMALLOC;
1709 1677
1710 INIT_LIST_HEAD(&tmp); 1678 INIT_LIST_HEAD(&tmp);
1711 do { 1679 do {
1712 if (unlikely(freezing(current))) { 1680 if (unlikely(freezing(current))) {
1713 xfsbufd_force_sleep = 1; 1681 set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
1714 refrigerator(); 1682 refrigerator();
1715 } else { 1683 } else {
1716 xfsbufd_force_sleep = 0; 1684 clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
1717 } 1685 }
1718 1686
1719 schedule_timeout_interruptible( 1687 schedule_timeout_interruptible(
1720 xfs_buf_timer_centisecs * msecs_to_jiffies(10)); 1688 xfs_buf_timer_centisecs * msecs_to_jiffies(10));
1721 1689
1722 age = xfs_buf_age_centisecs * msecs_to_jiffies(10); 1690 age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
1723 spin_lock(&pbd_delwrite_lock); 1691 spin_lock(dwlk);
1724 list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { 1692 list_for_each_entry_safe(bp, n, dwq, b_list) {
1725 PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb)); 1693 XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp));
1726 ASSERT(pb->pb_flags & PBF_DELWRI); 1694 ASSERT(bp->b_flags & XBF_DELWRI);
1727 1695
1728 if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) { 1696 if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
1729 if (!xfsbufd_force_flush && 1697 if (!test_bit(XBT_FORCE_FLUSH,
1698 &target->bt_flags) &&
1730 time_before(jiffies, 1699 time_before(jiffies,
1731 pb->pb_queuetime + age)) { 1700 bp->b_queuetime + age)) {
1732 pagebuf_unlock(pb); 1701 xfs_buf_unlock(bp);
1733 break; 1702 break;
1734 } 1703 }
1735 1704
1736 pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); 1705 bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
1737 pb->pb_flags |= PBF_WRITE; 1706 bp->b_flags |= XBF_WRITE;
1738 list_move(&pb->pb_list, &tmp); 1707 list_move(&bp->b_list, &tmp);
1739 } 1708 }
1740 } 1709 }
1741 spin_unlock(&pbd_delwrite_lock); 1710 spin_unlock(dwlk);
1742 1711
1743 while (!list_empty(&tmp)) { 1712 while (!list_empty(&tmp)) {
1744 pb = list_entry(tmp.next, xfs_buf_t, pb_list); 1713 bp = list_entry(tmp.next, xfs_buf_t, b_list);
1745 target = pb->pb_target; 1714 ASSERT(target == bp->b_target);
1746 1715
1747 list_del_init(&pb->pb_list); 1716 list_del_init(&bp->b_list);
1748 pagebuf_iostrategy(pb); 1717 xfs_buf_iostrategy(bp);
1749 1718
1750 blk_run_address_space(target->pbr_mapping); 1719 blk_run_address_space(target->bt_mapping);
1751 } 1720 }
1752 1721
1753 if (as_list_len > 0) 1722 if (as_list_len > 0)
1754 purge_addresses(); 1723 purge_addresses();
1755 1724
1756 xfsbufd_force_flush = 0; 1725 clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
1757 } while (!kthread_should_stop()); 1726 } while (!kthread_should_stop());
1758 1727
1759 return 0; 1728 return 0;
1760} 1729}
1761 1730
1762/* 1731/*
1763 * Go through all incore buffers, and release buffers if they belong to 1732 * Go through all incore buffers, and release buffers if they belong to
1764 * the given device. This is used in filesystem error handling to 1733 * the given device. This is used in filesystem error handling to
1765 * preserve the consistency of its metadata. 1734 * preserve the consistency of its metadata.
1766 */ 1735 */
1767int 1736int
1768xfs_flush_buftarg( 1737xfs_flush_buftarg(
@@ -1770,73 +1739,72 @@ xfs_flush_buftarg(
1770 int wait) 1739 int wait)
1771{ 1740{
1772 struct list_head tmp; 1741 struct list_head tmp;
1773 xfs_buf_t *pb, *n; 1742 xfs_buf_t *bp, *n;
1774 int pincount = 0; 1743 int pincount = 0;
1744 struct list_head *dwq = &target->bt_delwrite_queue;
1745 spinlock_t *dwlk = &target->bt_delwrite_lock;
1775 1746
1776 pagebuf_runall_queues(xfsdatad_workqueue); 1747 xfs_buf_runall_queues(xfsdatad_workqueue);
1777 pagebuf_runall_queues(xfslogd_workqueue); 1748 xfs_buf_runall_queues(xfslogd_workqueue);
1778 1749
1779 INIT_LIST_HEAD(&tmp); 1750 INIT_LIST_HEAD(&tmp);
1780 spin_lock(&pbd_delwrite_lock); 1751 spin_lock(dwlk);
1781 list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { 1752 list_for_each_entry_safe(bp, n, dwq, b_list) {
1782 1753 ASSERT(bp->b_target == target);
1783 if (pb->pb_target != target) 1754 ASSERT(bp->b_flags & (XBF_DELWRI | _XBF_DELWRI_Q));
1784 continue; 1755 XB_TRACE(bp, "walkq2", (long)xfs_buf_ispin(bp));
1785 1756 if (xfs_buf_ispin(bp)) {
1786 ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q));
1787 PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
1788 if (pagebuf_ispin(pb)) {
1789 pincount++; 1757 pincount++;
1790 continue; 1758 continue;
1791 } 1759 }
1792 1760
1793 list_move(&pb->pb_list, &tmp); 1761 list_move(&bp->b_list, &tmp);
1794 } 1762 }
1795 spin_unlock(&pbd_delwrite_lock); 1763 spin_unlock(dwlk);
1796 1764
1797 /* 1765 /*
1798 * Dropped the delayed write list lock, now walk the temporary list 1766 * Dropped the delayed write list lock, now walk the temporary list
1799 */ 1767 */
1800 list_for_each_entry_safe(pb, n, &tmp, pb_list) { 1768 list_for_each_entry_safe(bp, n, &tmp, b_list) {
1801 pagebuf_lock(pb); 1769 xfs_buf_lock(bp);
1802 pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); 1770 bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
1803 pb->pb_flags |= PBF_WRITE; 1771 bp->b_flags |= XBF_WRITE;
1804 if (wait) 1772 if (wait)
1805 pb->pb_flags &= ~PBF_ASYNC; 1773 bp->b_flags &= ~XBF_ASYNC;
1806 else 1774 else
1807 list_del_init(&pb->pb_list); 1775 list_del_init(&bp->b_list);
1808 1776
1809 pagebuf_iostrategy(pb); 1777 xfs_buf_iostrategy(bp);
1810 } 1778 }
1811 1779
1812 /* 1780 /*
1813 * Remaining list items must be flushed before returning 1781 * Remaining list items must be flushed before returning
1814 */ 1782 */
1815 while (!list_empty(&tmp)) { 1783 while (!list_empty(&tmp)) {
1816 pb = list_entry(tmp.next, xfs_buf_t, pb_list); 1784 bp = list_entry(tmp.next, xfs_buf_t, b_list);
1817 1785
1818 list_del_init(&pb->pb_list); 1786 list_del_init(&bp->b_list);
1819 xfs_iowait(pb); 1787 xfs_iowait(bp);
1820 xfs_buf_relse(pb); 1788 xfs_buf_relse(bp);
1821 } 1789 }
1822 1790
1823 if (wait) 1791 if (wait)
1824 blk_run_address_space(target->pbr_mapping); 1792 blk_run_address_space(target->bt_mapping);
1825 1793
1826 return pincount; 1794 return pincount;
1827} 1795}
1828 1796
1829int __init 1797int __init
1830pagebuf_init(void) 1798xfs_buf_init(void)
1831{ 1799{
1832 int error = -ENOMEM; 1800 int error = -ENOMEM;
1833 1801
1834#ifdef PAGEBUF_TRACE 1802#ifdef XFS_BUF_TRACE
1835 pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP); 1803 xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_SLEEP);
1836#endif 1804#endif
1837 1805
1838 pagebuf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf"); 1806 xfs_buf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf");
1839 if (!pagebuf_zone) 1807 if (!xfs_buf_zone)
1840 goto out_free_trace_buf; 1808 goto out_free_trace_buf;
1841 1809
1842 xfslogd_workqueue = create_workqueue("xfslogd"); 1810 xfslogd_workqueue = create_workqueue("xfslogd");
@@ -1847,42 +1815,33 @@ pagebuf_init(void)
1847 if (!xfsdatad_workqueue) 1815 if (!xfsdatad_workqueue)
1848 goto out_destroy_xfslogd_workqueue; 1816 goto out_destroy_xfslogd_workqueue;
1849 1817
1850 xfsbufd_task = kthread_run(xfsbufd, NULL, "xfsbufd"); 1818 xfs_buf_shake = kmem_shake_register(xfsbufd_wakeup);
1851 if (IS_ERR(xfsbufd_task)) { 1819 if (!xfs_buf_shake)
1852 error = PTR_ERR(xfsbufd_task);
1853 goto out_destroy_xfsdatad_workqueue; 1820 goto out_destroy_xfsdatad_workqueue;
1854 }
1855
1856 pagebuf_shake = kmem_shake_register(xfsbufd_wakeup);
1857 if (!pagebuf_shake)
1858 goto out_stop_xfsbufd;
1859 1821
1860 return 0; 1822 return 0;
1861 1823
1862 out_stop_xfsbufd:
1863 kthread_stop(xfsbufd_task);
1864 out_destroy_xfsdatad_workqueue: 1824 out_destroy_xfsdatad_workqueue:
1865 destroy_workqueue(xfsdatad_workqueue); 1825 destroy_workqueue(xfsdatad_workqueue);
1866 out_destroy_xfslogd_workqueue: 1826 out_destroy_xfslogd_workqueue:
1867 destroy_workqueue(xfslogd_workqueue); 1827 destroy_workqueue(xfslogd_workqueue);
1868 out_free_buf_zone: 1828 out_free_buf_zone:
1869 kmem_zone_destroy(pagebuf_zone); 1829 kmem_zone_destroy(xfs_buf_zone);
1870 out_free_trace_buf: 1830 out_free_trace_buf:
1871#ifdef PAGEBUF_TRACE 1831#ifdef XFS_BUF_TRACE
1872 ktrace_free(pagebuf_trace_buf); 1832 ktrace_free(xfs_buf_trace_buf);
1873#endif 1833#endif
1874 return error; 1834 return error;
1875} 1835}
1876 1836
1877void 1837void
1878pagebuf_terminate(void) 1838xfs_buf_terminate(void)
1879{ 1839{
1880 kmem_shake_deregister(pagebuf_shake); 1840 kmem_shake_deregister(xfs_buf_shake);
1881 kthread_stop(xfsbufd_task);
1882 destroy_workqueue(xfsdatad_workqueue); 1841 destroy_workqueue(xfsdatad_workqueue);
1883 destroy_workqueue(xfslogd_workqueue); 1842 destroy_workqueue(xfslogd_workqueue);
1884 kmem_zone_destroy(pagebuf_zone); 1843 kmem_zone_destroy(xfs_buf_zone);
1885#ifdef PAGEBUF_TRACE 1844#ifdef XFS_BUF_TRACE
1886 ktrace_free(pagebuf_trace_buf); 1845 ktrace_free(xfs_buf_trace_buf);
1887#endif 1846#endif
1888} 1847}
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 237a35b915d1..4dd6592d5a4c 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -32,44 +32,47 @@
32 * Base types 32 * Base types
33 */ 33 */
34 34
35#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) 35#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))
36 36
37#define page_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE) 37#define xfs_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE)
38#define page_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) 38#define xfs_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT)
39#define page_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT) 39#define xfs_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT)
40#define page_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK) 40#define xfs_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK)
41 41
42typedef enum page_buf_rw_e { 42typedef enum {
43 PBRW_READ = 1, /* transfer into target memory */ 43 XBRW_READ = 1, /* transfer into target memory */
44 PBRW_WRITE = 2, /* transfer from target memory */ 44 XBRW_WRITE = 2, /* transfer from target memory */
45 PBRW_ZERO = 3 /* Zero target memory */ 45 XBRW_ZERO = 3, /* Zero target memory */
46} page_buf_rw_t; 46} xfs_buf_rw_t;
47 47
48 48typedef enum {
49typedef enum page_buf_flags_e { /* pb_flags values */ 49 XBF_READ = (1 << 0), /* buffer intended for reading from device */
50 PBF_READ = (1 << 0), /* buffer intended for reading from device */ 50 XBF_WRITE = (1 << 1), /* buffer intended for writing to device */
51 PBF_WRITE = (1 << 1), /* buffer intended for writing to device */ 51 XBF_MAPPED = (1 << 2), /* buffer mapped (b_addr valid) */
52 PBF_MAPPED = (1 << 2), /* buffer mapped (pb_addr valid) */ 52 XBF_ASYNC = (1 << 4), /* initiator will not wait for completion */
53 PBF_ASYNC = (1 << 4), /* initiator will not wait for completion */ 53 XBF_DONE = (1 << 5), /* all pages in the buffer uptodate */
54 PBF_DONE = (1 << 5), /* all pages in the buffer uptodate */ 54 XBF_DELWRI = (1 << 6), /* buffer has dirty pages */
55 PBF_DELWRI = (1 << 6), /* buffer has dirty pages */ 55 XBF_STALE = (1 << 7), /* buffer has been staled, do not find it */
56 PBF_STALE = (1 << 7), /* buffer has been staled, do not find it */ 56 XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */
57 PBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ 57 XBF_ORDERED = (1 << 11), /* use ordered writes */
58 PBF_ORDERED = (1 << 11), /* use ordered writes */ 58 XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */
59 PBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */
60 59
61 /* flags used only as arguments to access routines */ 60 /* flags used only as arguments to access routines */
62 PBF_LOCK = (1 << 14), /* lock requested */ 61 XBF_LOCK = (1 << 14), /* lock requested */
63 PBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */ 62 XBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */
64 PBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */ 63 XBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */
65 64
66 /* flags used only internally */ 65 /* flags used only internally */
67 _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ 66 _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */
68 _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */ 67 _XBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */
69 _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ 68 _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
70 _PBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ 69 _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
71} page_buf_flags_t; 70} xfs_buf_flags_t;
72 71
72typedef enum {
73 XBT_FORCE_SLEEP = (0 << 1),
74 XBT_FORCE_FLUSH = (1 << 1),
75} xfs_buftarg_flags_t;
73 76
74typedef struct xfs_bufhash { 77typedef struct xfs_bufhash {
75 struct list_head bh_list; 78 struct list_head bh_list;
@@ -77,477 +80,350 @@ typedef struct xfs_bufhash {
77} xfs_bufhash_t; 80} xfs_bufhash_t;
78 81
79typedef struct xfs_buftarg { 82typedef struct xfs_buftarg {
80 dev_t pbr_dev; 83 dev_t bt_dev;
81 struct block_device *pbr_bdev; 84 struct block_device *bt_bdev;
82 struct address_space *pbr_mapping; 85 struct address_space *bt_mapping;
83 unsigned int pbr_bsize; 86 unsigned int bt_bsize;
84 unsigned int pbr_sshift; 87 unsigned int bt_sshift;
85 size_t pbr_smask; 88 size_t bt_smask;
86 89
87 /* per-device buffer hash table */ 90 /* per device buffer hash table */
88 uint bt_hashmask; 91 uint bt_hashmask;
89 uint bt_hashshift; 92 uint bt_hashshift;
90 xfs_bufhash_t *bt_hash; 93 xfs_bufhash_t *bt_hash;
94
95 /* per device delwri queue */
96 struct task_struct *bt_task;
97 struct list_head bt_list;
98 struct list_head bt_delwrite_queue;
99 spinlock_t bt_delwrite_lock;
100 unsigned long bt_flags;
91} xfs_buftarg_t; 101} xfs_buftarg_t;
92 102
93/* 103/*
94 * xfs_buf_t: Buffer structure for page cache-based buffers 104 * xfs_buf_t: Buffer structure for pagecache-based buffers
105 *
106 * This buffer structure is used by the pagecache buffer management routines
107 * to refer to an assembly of pages forming a logical buffer.
95 * 108 *
96 * This buffer structure is used by the page cache buffer management routines 109 * The buffer structure is used on a temporary basis only, and discarded when
97 * to refer to an assembly of pages forming a logical buffer. The actual I/O 110 * released. The real data storage is recorded in the pagecache. Buffers are
98 * is performed with buffer_head structures, as required by drivers.
99 *
100 * The buffer structure is used on temporary basis only, and discarded when
101 * released. The real data storage is recorded in the page cache. Metadata is
102 * hashed to the block device on which the file system resides. 111 * hashed to the block device on which the file system resides.
103 */ 112 */
104 113
105struct xfs_buf; 114struct xfs_buf;
115typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
116typedef void (*xfs_buf_relse_t)(struct xfs_buf *);
117typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *);
106 118
107/* call-back function on I/O completion */ 119#define XB_PAGES 2
108typedef void (*page_buf_iodone_t)(struct xfs_buf *);
109/* call-back function on I/O completion */
110typedef void (*page_buf_relse_t)(struct xfs_buf *);
111/* pre-write function */
112typedef int (*page_buf_bdstrat_t)(struct xfs_buf *);
113
114#define PB_PAGES 2
115 120
116typedef struct xfs_buf { 121typedef struct xfs_buf {
117 struct semaphore pb_sema; /* semaphore for lockables */ 122 struct semaphore b_sema; /* semaphore for lockables */
118 unsigned long pb_queuetime; /* time buffer was queued */ 123 unsigned long b_queuetime; /* time buffer was queued */
119 atomic_t pb_pin_count; /* pin count */ 124 atomic_t b_pin_count; /* pin count */
120 wait_queue_head_t pb_waiters; /* unpin waiters */ 125 wait_queue_head_t b_waiters; /* unpin waiters */
121 struct list_head pb_list; 126 struct list_head b_list;
122 page_buf_flags_t pb_flags; /* status flags */ 127 xfs_buf_flags_t b_flags; /* status flags */
123 struct list_head pb_hash_list; /* hash table list */ 128 struct list_head b_hash_list; /* hash table list */
124 xfs_bufhash_t *pb_hash; /* hash table list start */ 129 xfs_bufhash_t *b_hash; /* hash table list start */
125 xfs_buftarg_t *pb_target; /* buffer target (device) */ 130 xfs_buftarg_t *b_target; /* buffer target (device) */
126 atomic_t pb_hold; /* reference count */ 131 atomic_t b_hold; /* reference count */
127 xfs_daddr_t pb_bn; /* block number for I/O */ 132 xfs_daddr_t b_bn; /* block number for I/O */
128 loff_t pb_file_offset; /* offset in file */ 133 xfs_off_t b_file_offset; /* offset in file */
129 size_t pb_buffer_length; /* size of buffer in bytes */ 134 size_t b_buffer_length;/* size of buffer in bytes */
130 size_t pb_count_desired; /* desired transfer size */ 135 size_t b_count_desired;/* desired transfer size */
131 void *pb_addr; /* virtual address of buffer */ 136 void *b_addr; /* virtual address of buffer */
132 struct work_struct pb_iodone_work; 137 struct work_struct b_iodone_work;
133 atomic_t pb_io_remaining;/* #outstanding I/O requests */ 138 atomic_t b_io_remaining; /* #outstanding I/O requests */
134 page_buf_iodone_t pb_iodone; /* I/O completion function */ 139 xfs_buf_iodone_t b_iodone; /* I/O completion function */
135 page_buf_relse_t pb_relse; /* releasing function */ 140 xfs_buf_relse_t b_relse; /* releasing function */
136 page_buf_bdstrat_t pb_strat; /* pre-write function */ 141 xfs_buf_bdstrat_t b_strat; /* pre-write function */
137 struct semaphore pb_iodonesema; /* Semaphore for I/O waiters */ 142 struct semaphore b_iodonesema; /* Semaphore for I/O waiters */
138 void *pb_fspriv; 143 void *b_fspriv;
139 void *pb_fspriv2; 144 void *b_fspriv2;
140 void *pb_fspriv3; 145 void *b_fspriv3;
141 unsigned short pb_error; /* error code on I/O */ 146 unsigned short b_error; /* error code on I/O */
142 unsigned short pb_locked; /* page array is locked */ 147 unsigned short b_locked; /* page array is locked */
143 unsigned int pb_page_count; /* size of page array */ 148 unsigned int b_page_count; /* size of page array */
144 unsigned int pb_offset; /* page offset in first page */ 149 unsigned int b_offset; /* page offset in first page */
145 struct page **pb_pages; /* array of page pointers */ 150 struct page **b_pages; /* array of page pointers */
146 struct page *pb_page_array[PB_PAGES]; /* inline pages */ 151 struct page *b_page_array[XB_PAGES]; /* inline pages */
147#ifdef PAGEBUF_LOCK_TRACKING 152#ifdef XFS_BUF_LOCK_TRACKING
148 int pb_last_holder; 153 int b_last_holder;
149#endif 154#endif
150} xfs_buf_t; 155} xfs_buf_t;
151 156
152 157
153/* Finding and Reading Buffers */ 158/* Finding and Reading Buffers */
154 159extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
155extern xfs_buf_t *_pagebuf_find( /* find buffer for block if */ 160 xfs_buf_flags_t, xfs_buf_t *);
156 /* the block is in memory */
157 xfs_buftarg_t *, /* inode for block */
158 loff_t, /* starting offset of range */
159 size_t, /* length of range */
160 page_buf_flags_t, /* PBF_LOCK */
161 xfs_buf_t *); /* newly allocated buffer */
162
163#define xfs_incore(buftarg,blkno,len,lockit) \ 161#define xfs_incore(buftarg,blkno,len,lockit) \
164 _pagebuf_find(buftarg, blkno ,len, lockit, NULL) 162 _xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
165
166extern xfs_buf_t *xfs_buf_get_flags( /* allocate a buffer */
167 xfs_buftarg_t *, /* inode for buffer */
168 loff_t, /* starting offset of range */
169 size_t, /* length of range */
170 page_buf_flags_t); /* PBF_LOCK, PBF_READ, */
171 /* PBF_ASYNC */
172 163
164extern xfs_buf_t *xfs_buf_get_flags(xfs_buftarg_t *, xfs_off_t, size_t,
165 xfs_buf_flags_t);
173#define xfs_buf_get(target, blkno, len, flags) \ 166#define xfs_buf_get(target, blkno, len, flags) \
174 xfs_buf_get_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED) 167 xfs_buf_get_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
175
176extern xfs_buf_t *xfs_buf_read_flags( /* allocate and read a buffer */
177 xfs_buftarg_t *, /* inode for buffer */
178 loff_t, /* starting offset of range */
179 size_t, /* length of range */
180 page_buf_flags_t); /* PBF_LOCK, PBF_ASYNC */
181 168
169extern xfs_buf_t *xfs_buf_read_flags(xfs_buftarg_t *, xfs_off_t, size_t,
170 xfs_buf_flags_t);
182#define xfs_buf_read(target, blkno, len, flags) \ 171#define xfs_buf_read(target, blkno, len, flags) \
183 xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED) 172 xfs_buf_read_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
184
185extern xfs_buf_t *pagebuf_get_empty( /* allocate pagebuf struct with */
186 /* no memory or disk address */
187 size_t len,
188 xfs_buftarg_t *); /* mount point "fake" inode */
189
190extern xfs_buf_t *pagebuf_get_no_daddr(/* allocate pagebuf struct */
191 /* without disk address */
192 size_t len,
193 xfs_buftarg_t *); /* mount point "fake" inode */
194
195extern int pagebuf_associate_memory(
196 xfs_buf_t *,
197 void *,
198 size_t);
199
200extern void pagebuf_hold( /* increment reference count */
201 xfs_buf_t *); /* buffer to hold */
202 173
203extern void pagebuf_readahead( /* read ahead into cache */ 174extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
204 xfs_buftarg_t *, /* target for buffer (or NULL) */ 175extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *);
205 loff_t, /* starting offset of range */ 176extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
206 size_t, /* length of range */ 177extern void xfs_buf_hold(xfs_buf_t *);
207 page_buf_flags_t); /* additional read flags */ 178extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t,
179 xfs_buf_flags_t);
208 180
209/* Releasing Buffers */ 181/* Releasing Buffers */
210 182extern void xfs_buf_free(xfs_buf_t *);
211extern void pagebuf_free( /* deallocate a buffer */ 183extern void xfs_buf_rele(xfs_buf_t *);
212 xfs_buf_t *); /* buffer to deallocate */
213
214extern void pagebuf_rele( /* release hold on a buffer */
215 xfs_buf_t *); /* buffer to release */
216 184
217/* Locking and Unlocking Buffers */ 185/* Locking and Unlocking Buffers */
218 186extern int xfs_buf_cond_lock(xfs_buf_t *);
219extern int pagebuf_cond_lock( /* lock buffer, if not locked */ 187extern int xfs_buf_lock_value(xfs_buf_t *);
220 /* (returns -EBUSY if locked) */ 188extern void xfs_buf_lock(xfs_buf_t *);
221 xfs_buf_t *); /* buffer to lock */ 189extern void xfs_buf_unlock(xfs_buf_t *);
222
223extern int pagebuf_lock_value( /* return count on lock */
224 xfs_buf_t *); /* buffer to check */
225
226extern int pagebuf_lock( /* lock buffer */
227 xfs_buf_t *); /* buffer to lock */
228
229extern void pagebuf_unlock( /* unlock buffer */
230 xfs_buf_t *); /* buffer to unlock */
231 190
232/* Buffer Read and Write Routines */ 191/* Buffer Read and Write Routines */
233 192extern void xfs_buf_ioend(xfs_buf_t *, int);
234extern void pagebuf_iodone( /* mark buffer I/O complete */ 193extern void xfs_buf_ioerror(xfs_buf_t *, int);
235 xfs_buf_t *, /* buffer to mark */ 194extern int xfs_buf_iostart(xfs_buf_t *, xfs_buf_flags_t);
236 int); /* run completion locally, or in 195extern int xfs_buf_iorequest(xfs_buf_t *);
237 * a helper thread. */ 196extern int xfs_buf_iowait(xfs_buf_t *);
238 197extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t,
239extern void pagebuf_ioerror( /* mark buffer in error (or not) */ 198 xfs_buf_rw_t);
240 xfs_buf_t *, /* buffer to mark */ 199
241 int); /* error to store (0 if none) */ 200static inline int xfs_buf_iostrategy(xfs_buf_t *bp)
242
243extern int pagebuf_iostart( /* start I/O on a buffer */
244 xfs_buf_t *, /* buffer to start */
245 page_buf_flags_t); /* PBF_LOCK, PBF_ASYNC, */
246 /* PBF_READ, PBF_WRITE, */
247 /* PBF_DELWRI */
248
249extern int pagebuf_iorequest( /* start real I/O */
250 xfs_buf_t *); /* buffer to convey to device */
251
252extern int pagebuf_iowait( /* wait for buffer I/O done */
253 xfs_buf_t *); /* buffer to wait on */
254
255extern void pagebuf_iomove( /* move data in/out of pagebuf */
256 xfs_buf_t *, /* buffer to manipulate */
257 size_t, /* starting buffer offset */
258 size_t, /* length in buffer */
259 caddr_t, /* data pointer */
260 page_buf_rw_t); /* direction */
261
262static inline int pagebuf_iostrategy(xfs_buf_t *pb)
263{ 201{
264 return pb->pb_strat ? pb->pb_strat(pb) : pagebuf_iorequest(pb); 202 return bp->b_strat ? bp->b_strat(bp) : xfs_buf_iorequest(bp);
265} 203}
266 204
267static inline int pagebuf_geterror(xfs_buf_t *pb) 205static inline int xfs_buf_geterror(xfs_buf_t *bp)
268{ 206{
269 return pb ? pb->pb_error : ENOMEM; 207 return bp ? bp->b_error : ENOMEM;
270} 208}
271 209
272/* Buffer Utility Routines */ 210/* Buffer Utility Routines */
273 211extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
274extern caddr_t pagebuf_offset( /* pointer at offset in buffer */
275 xfs_buf_t *, /* buffer to offset into */
276 size_t); /* offset */
277 212
278/* Pinning Buffer Storage in Memory */ 213/* Pinning Buffer Storage in Memory */
279 214extern void xfs_buf_pin(xfs_buf_t *);
280extern void pagebuf_pin( /* pin buffer in memory */ 215extern void xfs_buf_unpin(xfs_buf_t *);
281 xfs_buf_t *); /* buffer to pin */ 216extern int xfs_buf_ispin(xfs_buf_t *);
282
283extern void pagebuf_unpin( /* unpin buffered data */
284 xfs_buf_t *); /* buffer to unpin */
285
286extern int pagebuf_ispin( /* check if buffer is pinned */
287 xfs_buf_t *); /* buffer to check */
288 217
289/* Delayed Write Buffer Routines */ 218/* Delayed Write Buffer Routines */
290 219extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
291extern void pagebuf_delwri_dequeue(xfs_buf_t *);
292 220
293/* Buffer Daemon Setup Routines */ 221/* Buffer Daemon Setup Routines */
222extern int xfs_buf_init(void);
223extern void xfs_buf_terminate(void);
294 224
295extern int pagebuf_init(void); 225#ifdef XFS_BUF_TRACE
296extern void pagebuf_terminate(void); 226extern ktrace_t *xfs_buf_trace_buf;
297 227extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
298
299#ifdef PAGEBUF_TRACE
300extern ktrace_t *pagebuf_trace_buf;
301extern void pagebuf_trace(
302 xfs_buf_t *, /* buffer being traced */
303 char *, /* description of operation */
304 void *, /* arbitrary diagnostic value */
305 void *); /* return address */
306#else 228#else
307# define pagebuf_trace(pb, id, ptr, ra) do { } while (0) 229#define xfs_buf_trace(bp,id,ptr,ra) do { } while (0)
308#endif 230#endif
309 231
310#define pagebuf_target_name(target) \ 232#define xfs_buf_target_name(target) \
311 ({ char __b[BDEVNAME_SIZE]; bdevname((target)->pbr_bdev, __b); __b; }) 233 ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; })
312 234
313 235
236#define XFS_B_ASYNC XBF_ASYNC
237#define XFS_B_DELWRI XBF_DELWRI
238#define XFS_B_READ XBF_READ
239#define XFS_B_WRITE XBF_WRITE
240#define XFS_B_STALE XBF_STALE
314 241
315/* These are just for xfs_syncsub... it sets an internal variable 242#define XFS_BUF_TRYLOCK XBF_TRYLOCK
316 * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t 243#define XFS_INCORE_TRYLOCK XBF_TRYLOCK
317 */ 244#define XFS_BUF_LOCK XBF_LOCK
318#define XFS_B_ASYNC PBF_ASYNC 245#define XFS_BUF_MAPPED XBF_MAPPED
319#define XFS_B_DELWRI PBF_DELWRI
320#define XFS_B_READ PBF_READ
321#define XFS_B_WRITE PBF_WRITE
322#define XFS_B_STALE PBF_STALE
323
324#define XFS_BUF_TRYLOCK PBF_TRYLOCK
325#define XFS_INCORE_TRYLOCK PBF_TRYLOCK
326#define XFS_BUF_LOCK PBF_LOCK
327#define XFS_BUF_MAPPED PBF_MAPPED
328
329#define BUF_BUSY PBF_DONT_BLOCK
330
331#define XFS_BUF_BFLAGS(x) ((x)->pb_flags)
332#define XFS_BUF_ZEROFLAGS(x) \
333 ((x)->pb_flags &= ~(PBF_READ|PBF_WRITE|PBF_ASYNC|PBF_DELWRI))
334
335#define XFS_BUF_STALE(x) ((x)->pb_flags |= XFS_B_STALE)
336#define XFS_BUF_UNSTALE(x) ((x)->pb_flags &= ~XFS_B_STALE)
337#define XFS_BUF_ISSTALE(x) ((x)->pb_flags & XFS_B_STALE)
338#define XFS_BUF_SUPER_STALE(x) do { \
339 XFS_BUF_STALE(x); \
340 pagebuf_delwri_dequeue(x); \
341 XFS_BUF_DONE(x); \
342 } while (0)
343 246
344#define XFS_BUF_MANAGE PBF_FS_MANAGED 247#define BUF_BUSY XBF_DONT_BLOCK
345#define XFS_BUF_UNMANAGE(x) ((x)->pb_flags &= ~PBF_FS_MANAGED) 248
346 249#define XFS_BUF_BFLAGS(bp) ((bp)->b_flags)
347#define XFS_BUF_DELAYWRITE(x) ((x)->pb_flags |= PBF_DELWRI) 250#define XFS_BUF_ZEROFLAGS(bp) \
348#define XFS_BUF_UNDELAYWRITE(x) pagebuf_delwri_dequeue(x) 251 ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI))
349#define XFS_BUF_ISDELAYWRITE(x) ((x)->pb_flags & PBF_DELWRI) 252
350 253#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XFS_B_STALE)
351#define XFS_BUF_ERROR(x,no) pagebuf_ioerror(x,no) 254#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XFS_B_STALE)
352#define XFS_BUF_GETERROR(x) pagebuf_geterror(x) 255#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XFS_B_STALE)
353#define XFS_BUF_ISERROR(x) (pagebuf_geterror(x)?1:0) 256#define XFS_BUF_SUPER_STALE(bp) do { \
354 257 XFS_BUF_STALE(bp); \
355#define XFS_BUF_DONE(x) ((x)->pb_flags |= PBF_DONE) 258 xfs_buf_delwri_dequeue(bp); \
356#define XFS_BUF_UNDONE(x) ((x)->pb_flags &= ~PBF_DONE) 259 XFS_BUF_DONE(bp); \
357#define XFS_BUF_ISDONE(x) ((x)->pb_flags & PBF_DONE) 260 } while (0)
358
359#define XFS_BUF_BUSY(x) do { } while (0)
360#define XFS_BUF_UNBUSY(x) do { } while (0)
361#define XFS_BUF_ISBUSY(x) (1)
362
363#define XFS_BUF_ASYNC(x) ((x)->pb_flags |= PBF_ASYNC)
364#define XFS_BUF_UNASYNC(x) ((x)->pb_flags &= ~PBF_ASYNC)
365#define XFS_BUF_ISASYNC(x) ((x)->pb_flags & PBF_ASYNC)
366
367#define XFS_BUF_ORDERED(x) ((x)->pb_flags |= PBF_ORDERED)
368#define XFS_BUF_UNORDERED(x) ((x)->pb_flags &= ~PBF_ORDERED)
369#define XFS_BUF_ISORDERED(x) ((x)->pb_flags & PBF_ORDERED)
370
371#define XFS_BUF_SHUT(x) printk("XFS_BUF_SHUT not implemented yet\n")
372#define XFS_BUF_UNSHUT(x) printk("XFS_BUF_UNSHUT not implemented yet\n")
373#define XFS_BUF_ISSHUT(x) (0)
374
375#define XFS_BUF_HOLD(x) pagebuf_hold(x)
376#define XFS_BUF_READ(x) ((x)->pb_flags |= PBF_READ)
377#define XFS_BUF_UNREAD(x) ((x)->pb_flags &= ~PBF_READ)
378#define XFS_BUF_ISREAD(x) ((x)->pb_flags & PBF_READ)
379
380#define XFS_BUF_WRITE(x) ((x)->pb_flags |= PBF_WRITE)
381#define XFS_BUF_UNWRITE(x) ((x)->pb_flags &= ~PBF_WRITE)
382#define XFS_BUF_ISWRITE(x) ((x)->pb_flags & PBF_WRITE)
383
384#define XFS_BUF_ISUNINITIAL(x) (0)
385#define XFS_BUF_UNUNINITIAL(x) (0)
386
387#define XFS_BUF_BP_ISMAPPED(bp) 1
388
389#define XFS_BUF_IODONE_FUNC(buf) (buf)->pb_iodone
390#define XFS_BUF_SET_IODONE_FUNC(buf, func) \
391 (buf)->pb_iodone = (func)
392#define XFS_BUF_CLR_IODONE_FUNC(buf) \
393 (buf)->pb_iodone = NULL
394#define XFS_BUF_SET_BDSTRAT_FUNC(buf, func) \
395 (buf)->pb_strat = (func)
396#define XFS_BUF_CLR_BDSTRAT_FUNC(buf) \
397 (buf)->pb_strat = NULL
398
399#define XFS_BUF_FSPRIVATE(buf, type) \
400 ((type)(buf)->pb_fspriv)
401#define XFS_BUF_SET_FSPRIVATE(buf, value) \
402 (buf)->pb_fspriv = (void *)(value)
403#define XFS_BUF_FSPRIVATE2(buf, type) \
404 ((type)(buf)->pb_fspriv2)
405#define XFS_BUF_SET_FSPRIVATE2(buf, value) \
406 (buf)->pb_fspriv2 = (void *)(value)
407#define XFS_BUF_FSPRIVATE3(buf, type) \
408 ((type)(buf)->pb_fspriv3)
409#define XFS_BUF_SET_FSPRIVATE3(buf, value) \
410 (buf)->pb_fspriv3 = (void *)(value)
411#define XFS_BUF_SET_START(buf)
412
413#define XFS_BUF_SET_BRELSE_FUNC(buf, value) \
414 (buf)->pb_relse = (value)
415
416#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->pb_addr)
417
418static inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset)
419{
420 if (bp->pb_flags & PBF_MAPPED)
421 return XFS_BUF_PTR(bp) + offset;
422 return (xfs_caddr_t) pagebuf_offset(bp, offset);
423}
424 261
425#define XFS_BUF_SET_PTR(bp, val, count) \ 262#define XFS_BUF_MANAGE XBF_FS_MANAGED
426 pagebuf_associate_memory(bp, val, count) 263#define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED)
427#define XFS_BUF_ADDR(bp) ((bp)->pb_bn) 264
428#define XFS_BUF_SET_ADDR(bp, blk) \ 265#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI)
429 ((bp)->pb_bn = (xfs_daddr_t)(blk)) 266#define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp)
430#define XFS_BUF_OFFSET(bp) ((bp)->pb_file_offset) 267#define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI)
431#define XFS_BUF_SET_OFFSET(bp, off) \ 268
432 ((bp)->pb_file_offset = (off)) 269#define XFS_BUF_ERROR(bp,no) xfs_buf_ioerror(bp,no)
433#define XFS_BUF_COUNT(bp) ((bp)->pb_count_desired) 270#define XFS_BUF_GETERROR(bp) xfs_buf_geterror(bp)
434#define XFS_BUF_SET_COUNT(bp, cnt) \ 271#define XFS_BUF_ISERROR(bp) (xfs_buf_geterror(bp) ? 1 : 0)
435 ((bp)->pb_count_desired = (cnt)) 272
436#define XFS_BUF_SIZE(bp) ((bp)->pb_buffer_length) 273#define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE)
437#define XFS_BUF_SET_SIZE(bp, cnt) \ 274#define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE)
438 ((bp)->pb_buffer_length = (cnt)) 275#define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE)
439#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) 276
440#define XFS_BUF_SET_VTYPE(bp, type) 277#define XFS_BUF_BUSY(bp) do { } while (0)
441#define XFS_BUF_SET_REF(bp, ref) 278#define XFS_BUF_UNBUSY(bp) do { } while (0)
442 279#define XFS_BUF_ISBUSY(bp) (1)
443#define XFS_BUF_ISPINNED(bp) pagebuf_ispin(bp) 280
444 281#define XFS_BUF_ASYNC(bp) ((bp)->b_flags |= XBF_ASYNC)
445#define XFS_BUF_VALUSEMA(bp) pagebuf_lock_value(bp) 282#define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC)
446#define XFS_BUF_CPSEMA(bp) (pagebuf_cond_lock(bp) == 0) 283#define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC)
447#define XFS_BUF_VSEMA(bp) pagebuf_unlock(bp) 284
448#define XFS_BUF_PSEMA(bp,x) pagebuf_lock(bp) 285#define XFS_BUF_ORDERED(bp) ((bp)->b_flags |= XBF_ORDERED)
449#define XFS_BUF_V_IODONESEMA(bp) up(&bp->pb_iodonesema); 286#define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED)
450 287#define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED)
451/* setup the buffer target from a buftarg structure */ 288
452#define XFS_BUF_SET_TARGET(bp, target) \ 289#define XFS_BUF_SHUT(bp) do { } while (0)
453 (bp)->pb_target = (target) 290#define XFS_BUF_UNSHUT(bp) do { } while (0)
454#define XFS_BUF_TARGET(bp) ((bp)->pb_target) 291#define XFS_BUF_ISSHUT(bp) (0)
455#define XFS_BUFTARG_NAME(target) \ 292
456 pagebuf_target_name(target) 293#define XFS_BUF_HOLD(bp) xfs_buf_hold(bp)
457 294#define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ)
458#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) 295#define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ)
459#define XFS_BUF_SET_VTYPE(bp, type) 296#define XFS_BUF_ISREAD(bp) ((bp)->b_flags & XBF_READ)
460#define XFS_BUF_SET_REF(bp, ref) 297
461 298#define XFS_BUF_WRITE(bp) ((bp)->b_flags |= XBF_WRITE)
462static inline int xfs_bawrite(void *mp, xfs_buf_t *bp) 299#define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE)
300#define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE)
301
302#define XFS_BUF_ISUNINITIAL(bp) (0)
303#define XFS_BUF_UNUNINITIAL(bp) (0)
304
305#define XFS_BUF_BP_ISMAPPED(bp) (1)
306
307#define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone)
308#define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func))
309#define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL)
310#define XFS_BUF_SET_BDSTRAT_FUNC(bp, func) ((bp)->b_strat = (func))
311#define XFS_BUF_CLR_BDSTRAT_FUNC(bp) ((bp)->b_strat = NULL)
312
313#define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv)
314#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val))
315#define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2)
316#define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val))
317#define XFS_BUF_FSPRIVATE3(bp, type) ((type)(bp)->b_fspriv3)
318#define XFS_BUF_SET_FSPRIVATE3(bp, val) ((bp)->b_fspriv3 = (void*)(val))
319#define XFS_BUF_SET_START(bp) do { } while (0)
320#define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func))
321
322#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr)
323#define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt)
324#define XFS_BUF_ADDR(bp) ((bp)->b_bn)
325#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno))
326#define XFS_BUF_OFFSET(bp) ((bp)->b_file_offset)
327#define XFS_BUF_SET_OFFSET(bp, off) ((bp)->b_file_offset = (off))
328#define XFS_BUF_COUNT(bp) ((bp)->b_count_desired)
329#define XFS_BUF_SET_COUNT(bp, cnt) ((bp)->b_count_desired = (cnt))
330#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length)
331#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt))
332
333#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) do { } while (0)
334#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0)
335#define XFS_BUF_SET_REF(bp, ref) do { } while (0)
336
337#define XFS_BUF_ISPINNED(bp) xfs_buf_ispin(bp)
338
339#define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp)
340#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0)
341#define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp)
342#define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp)
343#define XFS_BUF_V_IODONESEMA(bp) up(&bp->b_iodonesema);
344
345#define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target))
346#define XFS_BUF_TARGET(bp) ((bp)->b_target)
347#define XFS_BUFTARG_NAME(target) xfs_buf_target_name(target)
348
349static inline int xfs_bawrite(void *mp, xfs_buf_t *bp)
463{ 350{
464 bp->pb_fspriv3 = mp; 351 bp->b_fspriv3 = mp;
465 bp->pb_strat = xfs_bdstrat_cb; 352 bp->b_strat = xfs_bdstrat_cb;
466 pagebuf_delwri_dequeue(bp); 353 xfs_buf_delwri_dequeue(bp);
467 return pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC | _PBF_RUN_QUEUES); 354 return xfs_buf_iostart(bp, XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES);
468} 355}
469 356
470static inline void xfs_buf_relse(xfs_buf_t *bp) 357static inline void xfs_buf_relse(xfs_buf_t *bp)
471{ 358{
472 if (!bp->pb_relse) 359 if (!bp->b_relse)
473 pagebuf_unlock(bp); 360 xfs_buf_unlock(bp);
474 pagebuf_rele(bp); 361 xfs_buf_rele(bp);
475} 362}
476 363
477#define xfs_bpin(bp) pagebuf_pin(bp) 364#define xfs_bpin(bp) xfs_buf_pin(bp)
478#define xfs_bunpin(bp) pagebuf_unpin(bp) 365#define xfs_bunpin(bp) xfs_buf_unpin(bp)
479 366
480#define xfs_buftrace(id, bp) \ 367#define xfs_buftrace(id, bp) \
481 pagebuf_trace(bp, id, NULL, (void *)__builtin_return_address(0)) 368 xfs_buf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
482 369
483#define xfs_biodone(pb) \ 370#define xfs_biodone(bp) xfs_buf_ioend(bp, 0)
484 pagebuf_iodone(pb, 0)
485 371
486#define xfs_biomove(pb, off, len, data, rw) \ 372#define xfs_biomove(bp, off, len, data, rw) \
487 pagebuf_iomove((pb), (off), (len), (data), \ 373 xfs_buf_iomove((bp), (off), (len), (data), \
488 ((rw) == XFS_B_WRITE) ? PBRW_WRITE : PBRW_READ) 374 ((rw) == XFS_B_WRITE) ? XBRW_WRITE : XBRW_READ)
489 375
490#define xfs_biozero(pb, off, len) \ 376#define xfs_biozero(bp, off, len) \
491 pagebuf_iomove((pb), (off), (len), NULL, PBRW_ZERO) 377 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
492 378
493 379
494static inline int XFS_bwrite(xfs_buf_t *pb) 380static inline int XFS_bwrite(xfs_buf_t *bp)
495{ 381{
496 int iowait = (pb->pb_flags & PBF_ASYNC) == 0; 382 int iowait = (bp->b_flags & XBF_ASYNC) == 0;
497 int error = 0; 383 int error = 0;
498 384
499 if (!iowait) 385 if (!iowait)
500 pb->pb_flags |= _PBF_RUN_QUEUES; 386 bp->b_flags |= _XBF_RUN_QUEUES;
501 387
502 pagebuf_delwri_dequeue(pb); 388 xfs_buf_delwri_dequeue(bp);
503 pagebuf_iostrategy(pb); 389 xfs_buf_iostrategy(bp);
504 if (iowait) { 390 if (iowait) {
505 error = pagebuf_iowait(pb); 391 error = xfs_buf_iowait(bp);
506 xfs_buf_relse(pb); 392 xfs_buf_relse(bp);
507 } 393 }
508 return error; 394 return error;
509} 395}
510 396
511#define XFS_bdwrite(pb) \ 397#define XFS_bdwrite(bp) xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC)
512 pagebuf_iostart(pb, PBF_DELWRI | PBF_ASYNC)
513 398
514static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp) 399static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp)
515{ 400{
516 bp->pb_strat = xfs_bdstrat_cb; 401 bp->b_strat = xfs_bdstrat_cb;
517 bp->pb_fspriv3 = mp; 402 bp->b_fspriv3 = mp;
518 403 return xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC);
519 return pagebuf_iostart(bp, PBF_DELWRI | PBF_ASYNC);
520} 404}
521 405
522#define XFS_bdstrat(bp) pagebuf_iorequest(bp) 406#define XFS_bdstrat(bp) xfs_buf_iorequest(bp)
523 407
524#define xfs_iowait(pb) pagebuf_iowait(pb) 408#define xfs_iowait(bp) xfs_buf_iowait(bp)
525 409
526#define xfs_baread(target, rablkno, ralen) \ 410#define xfs_baread(target, rablkno, ralen) \
527 pagebuf_readahead((target), (rablkno), (ralen), PBF_DONT_BLOCK) 411 xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK)
528
529#define xfs_buf_get_empty(len, target) pagebuf_get_empty((len), (target))
530#define xfs_buf_get_noaddr(len, target) pagebuf_get_no_daddr((len), (target))
531#define xfs_buf_free(bp) pagebuf_free(bp)
532 412
533 413
534/* 414/*
535 * Handling of buftargs. 415 * Handling of buftargs.
536 */ 416 */
537
538extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); 417extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
539extern void xfs_free_buftarg(xfs_buftarg_t *, int); 418extern void xfs_free_buftarg(xfs_buftarg_t *, int);
540extern void xfs_wait_buftarg(xfs_buftarg_t *); 419extern void xfs_wait_buftarg(xfs_buftarg_t *);
541extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); 420extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
542extern int xfs_flush_buftarg(xfs_buftarg_t *, int); 421extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
543 422
544#define xfs_getsize_buftarg(buftarg) \ 423#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
545 block_size((buftarg)->pbr_bdev) 424#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
546#define xfs_readonly_buftarg(buftarg) \ 425
547 bdev_read_only((buftarg)->pbr_bdev) 426#define xfs_binval(buftarg) xfs_flush_buftarg(buftarg, 1)
548#define xfs_binval(buftarg) \ 427#define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg, 1)
549 xfs_flush_buftarg(buftarg, 1)
550#define XFS_bflush(buftarg) \
551 xfs_flush_buftarg(buftarg, 1)
552 428
553#endif /* __XFS_BUF_H__ */ 429#endif /* __XFS_BUF_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 06111d0bbae4..ced4404339c7 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -509,16 +509,14 @@ linvfs_open_exec(
509 vnode_t *vp = LINVFS_GET_VP(inode); 509 vnode_t *vp = LINVFS_GET_VP(inode);
510 xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); 510 xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp);
511 int error = 0; 511 int error = 0;
512 bhv_desc_t *bdp;
513 xfs_inode_t *ip; 512 xfs_inode_t *ip;
514 513
515 if (vp->v_vfsp->vfs_flag & VFS_DMI) { 514 if (vp->v_vfsp->vfs_flag & VFS_DMI) {
516 bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops); 515 ip = xfs_vtoi(vp);
517 if (!bdp) { 516 if (!ip) {
518 error = -EINVAL; 517 error = -EINVAL;
519 goto open_exec_out; 518 goto open_exec_out;
520 } 519 }
521 ip = XFS_BHVTOI(bdp);
522 if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)) { 520 if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)) {
523 error = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp, 521 error = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp,
524 0, 0, 0, NULL); 522 0, 0, 0, NULL);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 21667ba6dcd5..4db47790415c 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -146,13 +146,10 @@ xfs_find_handle(
146 146
147 if (cmd != XFS_IOC_PATH_TO_FSHANDLE) { 147 if (cmd != XFS_IOC_PATH_TO_FSHANDLE) {
148 xfs_inode_t *ip; 148 xfs_inode_t *ip;
149 bhv_desc_t *bhv;
150 int lock_mode; 149 int lock_mode;
151 150
152 /* need to get access to the xfs_inode to read the generation */ 151 /* need to get access to the xfs_inode to read the generation */
153 bhv = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops); 152 ip = xfs_vtoi(vp);
154 ASSERT(bhv);
155 ip = XFS_BHVTOI(bhv);
156 ASSERT(ip); 153 ASSERT(ip);
157 lock_mode = xfs_ilock_map_shared(ip); 154 lock_mode = xfs_ilock_map_shared(ip);
158 155
@@ -751,9 +748,8 @@ xfs_ioctl(
751 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? 748 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
752 mp->m_rtdev_targp : mp->m_ddev_targp; 749 mp->m_rtdev_targp : mp->m_ddev_targp;
753 750
754 da.d_mem = da.d_miniosz = 1 << target->pbr_sshift; 751 da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
755 /* The size dio will do in one go */ 752 da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
756 da.d_maxiosz = 64 * PAGE_CACHE_SIZE;
757 753
758 if (copy_to_user(arg, &da, sizeof(da))) 754 if (copy_to_user(arg, &da, sizeof(da)))
759 return -XFS_ERROR(EFAULT); 755 return -XFS_ERROR(EFAULT);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 9b8ee3470ecc..4bd3d03b23ed 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -54,11 +54,46 @@
54#include <linux/capability.h> 54#include <linux/capability.h>
55#include <linux/xattr.h> 55#include <linux/xattr.h>
56#include <linux/namei.h> 56#include <linux/namei.h>
57#include <linux/security.h>
57 58
58#define IS_NOATIME(inode) ((inode->i_sb->s_flags & MS_NOATIME) || \ 59#define IS_NOATIME(inode) ((inode->i_sb->s_flags & MS_NOATIME) || \
59 (S_ISDIR(inode->i_mode) && inode->i_sb->s_flags & MS_NODIRATIME)) 60 (S_ISDIR(inode->i_mode) && inode->i_sb->s_flags & MS_NODIRATIME))
60 61
61/* 62/*
63 * Get a XFS inode from a given vnode.
64 */
65xfs_inode_t *
66xfs_vtoi(
67 struct vnode *vp)
68{
69 bhv_desc_t *bdp;
70
71 bdp = bhv_lookup_range(VN_BHV_HEAD(vp),
72 VNODE_POSITION_XFS, VNODE_POSITION_XFS);
73 if (unlikely(bdp == NULL))
74 return NULL;
75 return XFS_BHVTOI(bdp);
76}
77
78/*
79 * Bring the atime in the XFS inode uptodate.
80 * Used before logging the inode to disk or when the Linux inode goes away.
81 */
82void
83xfs_synchronize_atime(
84 xfs_inode_t *ip)
85{
86 vnode_t *vp;
87
88 vp = XFS_ITOV_NULL(ip);
89 if (vp) {
90 struct inode *inode = &vp->v_inode;
91 ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
92 ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
93 }
94}
95
96/*
62 * Change the requested timestamp in the given inode. 97 * Change the requested timestamp in the given inode.
63 * We don't lock across timestamp updates, and we don't log them but 98 * We don't lock across timestamp updates, and we don't log them but
64 * we do record the fact that there is dirty information in core. 99 * we do record the fact that there is dirty information in core.
@@ -77,23 +112,6 @@ xfs_ichgtime(
77 struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip)); 112 struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip));
78 timespec_t tv; 113 timespec_t tv;
79 114
80 /*
81 * We're not supposed to change timestamps in readonly-mounted
82 * filesystems. Throw it away if anyone asks us.
83 */
84 if (unlikely(IS_RDONLY(inode)))
85 return;
86
87 /*
88 * Don't update access timestamps on reads if mounted "noatime".
89 * Throw it away if anyone asks us.
90 */
91 if (unlikely(
92 (ip->i_mount->m_flags & XFS_MOUNT_NOATIME || IS_NOATIME(inode)) &&
93 (flags & (XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD|XFS_ICHGTIME_CHG)) ==
94 XFS_ICHGTIME_ACC))
95 return;
96
97 nanotime(&tv); 115 nanotime(&tv);
98 if (flags & XFS_ICHGTIME_MOD) { 116 if (flags & XFS_ICHGTIME_MOD) {
99 inode->i_mtime = tv; 117 inode->i_mtime = tv;
@@ -130,8 +148,6 @@ xfs_ichgtime(
130 * Variant on the above which avoids querying the system clock 148 * Variant on the above which avoids querying the system clock
131 * in situations where we know the Linux inode timestamps have 149 * in situations where we know the Linux inode timestamps have
132 * just been updated (and so we can update our inode cheaply). 150 * just been updated (and so we can update our inode cheaply).
133 * We also skip the readonly and noatime checks here, they are
134 * also catered for already.
135 */ 151 */
136void 152void
137xfs_ichgtime_fast( 153xfs_ichgtime_fast(
@@ -142,20 +158,16 @@ xfs_ichgtime_fast(
142 timespec_t *tvp; 158 timespec_t *tvp;
143 159
144 /* 160 /*
145 * We're not supposed to change timestamps in readonly-mounted 161 * Atime updates for read() & friends are handled lazily now, and
146 * filesystems. Throw it away if anyone asks us. 162 * explicit updates must go through xfs_ichgtime()
147 */ 163 */
148 if (unlikely(IS_RDONLY(inode))) 164 ASSERT((flags & XFS_ICHGTIME_ACC) == 0);
149 return;
150 165
151 /* 166 /*
152 * Don't update access timestamps on reads if mounted "noatime". 167 * We're not supposed to change timestamps in readonly-mounted
153 * Throw it away if anyone asks us. 168 * filesystems. Throw it away if anyone asks us.
154 */ 169 */
155 if (unlikely( 170 if (unlikely(IS_RDONLY(inode)))
156 (ip->i_mount->m_flags & XFS_MOUNT_NOATIME || IS_NOATIME(inode)) &&
157 ((flags & (XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD|XFS_ICHGTIME_CHG)) ==
158 XFS_ICHGTIME_ACC)))
159 return; 171 return;
160 172
161 if (flags & XFS_ICHGTIME_MOD) { 173 if (flags & XFS_ICHGTIME_MOD) {
@@ -163,11 +175,6 @@ xfs_ichgtime_fast(
163 ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec; 175 ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec;
164 ip->i_d.di_mtime.t_nsec = (__int32_t)tvp->tv_nsec; 176 ip->i_d.di_mtime.t_nsec = (__int32_t)tvp->tv_nsec;
165 } 177 }
166 if (flags & XFS_ICHGTIME_ACC) {
167 tvp = &inode->i_atime;
168 ip->i_d.di_atime.t_sec = (__int32_t)tvp->tv_sec;
169 ip->i_d.di_atime.t_nsec = (__int32_t)tvp->tv_nsec;
170 }
171 if (flags & XFS_ICHGTIME_CHG) { 178 if (flags & XFS_ICHGTIME_CHG) {
172 tvp = &inode->i_ctime; 179 tvp = &inode->i_ctime;
173 ip->i_d.di_ctime.t_sec = (__int32_t)tvp->tv_sec; 180 ip->i_d.di_ctime.t_sec = (__int32_t)tvp->tv_sec;
@@ -214,6 +221,39 @@ validate_fields(
214} 221}
215 222
216/* 223/*
224 * Hook in SELinux. This is not quite correct yet, what we really need
225 * here (as we do for default ACLs) is a mechanism by which creation of
226 * these attrs can be journalled at inode creation time (along with the
227 * inode, of course, such that log replay can't cause these to be lost).
228 */
229STATIC int
230linvfs_init_security(
231 struct vnode *vp,
232 struct inode *dir)
233{
234 struct inode *ip = LINVFS_GET_IP(vp);
235 size_t length;
236 void *value;
237 char *name;
238 int error;
239
240 error = security_inode_init_security(ip, dir, &name, &value, &length);
241 if (error) {
242 if (error == -EOPNOTSUPP)
243 return 0;
244 return -error;
245 }
246
247 VOP_ATTR_SET(vp, name, value, length, ATTR_SECURE, NULL, error);
248 if (!error)
249 VMODIFY(vp);
250
251 kfree(name);
252 kfree(value);
253 return error;
254}
255
256/*
217 * Determine whether a process has a valid fs_struct (kernel daemons 257 * Determine whether a process has a valid fs_struct (kernel daemons
218 * like knfsd don't have an fs_struct). 258 * like knfsd don't have an fs_struct).
219 * 259 *
@@ -278,6 +318,9 @@ linvfs_mknod(
278 break; 318 break;
279 } 319 }
280 320
321 if (!error)
322 error = linvfs_init_security(vp, dir);
323
281 if (default_acl) { 324 if (default_acl) {
282 if (!error) { 325 if (!error) {
283 error = _ACL_INHERIT(vp, &va, default_acl); 326 error = _ACL_INHERIT(vp, &va, default_acl);
@@ -294,8 +337,6 @@ linvfs_mknod(
294 teardown.d_inode = ip = LINVFS_GET_IP(vp); 337 teardown.d_inode = ip = LINVFS_GET_IP(vp);
295 teardown.d_name = dentry->d_name; 338 teardown.d_name = dentry->d_name;
296 339
297 vn_mark_bad(vp);
298
299 if (S_ISDIR(mode)) 340 if (S_ISDIR(mode))
300 VOP_RMDIR(dvp, &teardown, NULL, err2); 341 VOP_RMDIR(dvp, &teardown, NULL, err2);
301 else 342 else
@@ -506,7 +547,7 @@ linvfs_follow_link(
506 ASSERT(dentry); 547 ASSERT(dentry);
507 ASSERT(nd); 548 ASSERT(nd);
508 549
509 link = (char *)kmalloc(MAXNAMELEN+1, GFP_KERNEL); 550 link = (char *)kmalloc(MAXPATHLEN+1, GFP_KERNEL);
510 if (!link) { 551 if (!link) {
511 nd_set_link(nd, ERR_PTR(-ENOMEM)); 552 nd_set_link(nd, ERR_PTR(-ENOMEM));
512 return NULL; 553 return NULL;
@@ -522,12 +563,12 @@ linvfs_follow_link(
522 vp = LINVFS_GET_VP(dentry->d_inode); 563 vp = LINVFS_GET_VP(dentry->d_inode);
523 564
524 iov.iov_base = link; 565 iov.iov_base = link;
525 iov.iov_len = MAXNAMELEN; 566 iov.iov_len = MAXPATHLEN;
526 567
527 uio->uio_iov = &iov; 568 uio->uio_iov = &iov;
528 uio->uio_offset = 0; 569 uio->uio_offset = 0;
529 uio->uio_segflg = UIO_SYSSPACE; 570 uio->uio_segflg = UIO_SYSSPACE;
530 uio->uio_resid = MAXNAMELEN; 571 uio->uio_resid = MAXPATHLEN;
531 uio->uio_iovcnt = 1; 572 uio->uio_iovcnt = 1;
532 573
533 VOP_READLINK(vp, uio, 0, NULL, error); 574 VOP_READLINK(vp, uio, 0, NULL, error);
@@ -535,7 +576,7 @@ linvfs_follow_link(
535 kfree(link); 576 kfree(link);
536 link = ERR_PTR(-error); 577 link = ERR_PTR(-error);
537 } else { 578 } else {
538 link[MAXNAMELEN - uio->uio_resid] = '\0'; 579 link[MAXPATHLEN - uio->uio_resid] = '\0';
539 } 580 }
540 kfree(uio); 581 kfree(uio);
541 582
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index ee784b63acbf..6899a6b4a50a 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -26,11 +26,6 @@ extern struct file_operations linvfs_file_operations;
26extern struct file_operations linvfs_invis_file_operations; 26extern struct file_operations linvfs_invis_file_operations;
27extern struct file_operations linvfs_dir_operations; 27extern struct file_operations linvfs_dir_operations;
28 28
29extern struct address_space_operations linvfs_aops;
30
31extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
32extern void linvfs_unwritten_done(struct buffer_head *, int);
33
34extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *, 29extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *,
35 int, unsigned int, void __user *); 30 int, unsigned int, void __user *);
36 31
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index d8e21ba0cccc..67389b745526 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -110,10 +110,6 @@
110 * delalloc and these ondisk-uninitialised buffers. 110 * delalloc and these ondisk-uninitialised buffers.
111 */ 111 */
112BUFFER_FNS(PrivateStart, unwritten); 112BUFFER_FNS(PrivateStart, unwritten);
113static inline void set_buffer_unwritten_io(struct buffer_head *bh)
114{
115 bh->b_end_io = linvfs_unwritten_done;
116}
117 113
118#define restricted_chown xfs_params.restrict_chown.val 114#define restricted_chown xfs_params.restrict_chown.val
119#define irix_sgid_inherit xfs_params.sgid_inherit.val 115#define irix_sgid_inherit xfs_params.sgid_inherit.val
@@ -232,7 +228,7 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh)
232#define xfs_itruncate_data(ip, off) \ 228#define xfs_itruncate_data(ip, off) \
233 (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off))) 229 (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off)))
234#define xfs_statvfs_fsid(statp, mp) \ 230#define xfs_statvfs_fsid(statp, mp) \
235 ({ u64 id = huge_encode_dev((mp)->m_dev); \ 231 ({ u64 id = huge_encode_dev((mp)->m_ddev_targp->bt_dev); \
236 __kernel_fsid_t *fsid = &(statp)->f_fsid; \ 232 __kernel_fsid_t *fsid = &(statp)->f_fsid; \
237 (fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); }) 233 (fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); })
238 234
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 885dfafeabee..e0ab45fbfebd 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -233,8 +233,8 @@ xfs_read(
233 xfs_buftarg_t *target = 233 xfs_buftarg_t *target =
234 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? 234 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
235 mp->m_rtdev_targp : mp->m_ddev_targp; 235 mp->m_rtdev_targp : mp->m_ddev_targp;
236 if ((*offset & target->pbr_smask) || 236 if ((*offset & target->bt_smask) ||
237 (size & target->pbr_smask)) { 237 (size & target->bt_smask)) {
238 if (*offset == ip->i_d.di_size) { 238 if (*offset == ip->i_d.di_size) {
239 return (0); 239 return (0);
240 } 240 }
@@ -281,9 +281,6 @@ xfs_read(
281 281
282 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 282 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
283 283
284 if (likely(!(ioflags & IO_INVIS)))
285 xfs_ichgtime_fast(ip, inode, XFS_ICHGTIME_ACC);
286
287unlock_isem: 284unlock_isem:
288 if (unlikely(ioflags & IO_ISDIRECT)) 285 if (unlikely(ioflags & IO_ISDIRECT))
289 mutex_unlock(&inode->i_mutex); 286 mutex_unlock(&inode->i_mutex);
@@ -346,9 +343,6 @@ xfs_sendfile(
346 if (ret > 0) 343 if (ret > 0)
347 XFS_STATS_ADD(xs_read_bytes, ret); 344 XFS_STATS_ADD(xs_read_bytes, ret);
348 345
349 if (likely(!(ioflags & IO_INVIS)))
350 xfs_ichgtime_fast(ip, LINVFS_GET_IP(vp), XFS_ICHGTIME_ACC);
351
352 return ret; 346 return ret;
353} 347}
354 348
@@ -362,7 +356,6 @@ STATIC int /* error (positive) */
362xfs_zero_last_block( 356xfs_zero_last_block(
363 struct inode *ip, 357 struct inode *ip,
364 xfs_iocore_t *io, 358 xfs_iocore_t *io,
365 xfs_off_t offset,
366 xfs_fsize_t isize, 359 xfs_fsize_t isize,
367 xfs_fsize_t end_size) 360 xfs_fsize_t end_size)
368{ 361{
@@ -371,19 +364,16 @@ xfs_zero_last_block(
371 int nimaps; 364 int nimaps;
372 int zero_offset; 365 int zero_offset;
373 int zero_len; 366 int zero_len;
374 int isize_fsb_offset;
375 int error = 0; 367 int error = 0;
376 xfs_bmbt_irec_t imap; 368 xfs_bmbt_irec_t imap;
377 loff_t loff; 369 loff_t loff;
378 size_t lsize;
379 370
380 ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0); 371 ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0);
381 ASSERT(offset > isize);
382 372
383 mp = io->io_mount; 373 mp = io->io_mount;
384 374
385 isize_fsb_offset = XFS_B_FSB_OFFSET(mp, isize); 375 zero_offset = XFS_B_FSB_OFFSET(mp, isize);
386 if (isize_fsb_offset == 0) { 376 if (zero_offset == 0) {
387 /* 377 /*
388 * There are no extra bytes in the last block on disk to 378 * There are no extra bytes in the last block on disk to
389 * zero, so return. 379 * zero, so return.
@@ -413,10 +403,8 @@ xfs_zero_last_block(
413 */ 403 */
414 XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD); 404 XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
415 loff = XFS_FSB_TO_B(mp, last_fsb); 405 loff = XFS_FSB_TO_B(mp, last_fsb);
416 lsize = XFS_FSB_TO_B(mp, 1);
417 406
418 zero_offset = isize_fsb_offset; 407 zero_len = mp->m_sb.sb_blocksize - zero_offset;
419 zero_len = mp->m_sb.sb_blocksize - isize_fsb_offset;
420 408
421 error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size); 409 error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size);
422 410
@@ -447,20 +435,17 @@ xfs_zero_eof(
447 struct inode *ip = LINVFS_GET_IP(vp); 435 struct inode *ip = LINVFS_GET_IP(vp);
448 xfs_fileoff_t start_zero_fsb; 436 xfs_fileoff_t start_zero_fsb;
449 xfs_fileoff_t end_zero_fsb; 437 xfs_fileoff_t end_zero_fsb;
450 xfs_fileoff_t prev_zero_fsb;
451 xfs_fileoff_t zero_count_fsb; 438 xfs_fileoff_t zero_count_fsb;
452 xfs_fileoff_t last_fsb; 439 xfs_fileoff_t last_fsb;
453 xfs_extlen_t buf_len_fsb; 440 xfs_extlen_t buf_len_fsb;
454 xfs_extlen_t prev_zero_count;
455 xfs_mount_t *mp; 441 xfs_mount_t *mp;
456 int nimaps; 442 int nimaps;
457 int error = 0; 443 int error = 0;
458 xfs_bmbt_irec_t imap; 444 xfs_bmbt_irec_t imap;
459 loff_t loff;
460 size_t lsize;
461 445
462 ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); 446 ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
463 ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); 447 ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
448 ASSERT(offset > isize);
464 449
465 mp = io->io_mount; 450 mp = io->io_mount;
466 451
@@ -468,7 +453,7 @@ xfs_zero_eof(
468 * First handle zeroing the block on which isize resides. 453 * First handle zeroing the block on which isize resides.
469 * We only zero a part of that block so it is handled specially. 454 * We only zero a part of that block so it is handled specially.
470 */ 455 */
471 error = xfs_zero_last_block(ip, io, offset, isize, end_size); 456 error = xfs_zero_last_block(ip, io, isize, end_size);
472 if (error) { 457 if (error) {
473 ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); 458 ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
474 ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); 459 ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
@@ -496,8 +481,6 @@ xfs_zero_eof(
496 } 481 }
497 482
498 ASSERT(start_zero_fsb <= end_zero_fsb); 483 ASSERT(start_zero_fsb <= end_zero_fsb);
499 prev_zero_fsb = NULLFILEOFF;
500 prev_zero_count = 0;
501 while (start_zero_fsb <= end_zero_fsb) { 484 while (start_zero_fsb <= end_zero_fsb) {
502 nimaps = 1; 485 nimaps = 1;
503 zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; 486 zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
@@ -519,10 +502,7 @@ xfs_zero_eof(
519 * that sits on a hole and sets the page as P_HOLE 502 * that sits on a hole and sets the page as P_HOLE
520 * and calls remapf if it is a mapped file. 503 * and calls remapf if it is a mapped file.
521 */ 504 */
522 prev_zero_fsb = NULLFILEOFF; 505 start_zero_fsb = imap.br_startoff + imap.br_blockcount;
523 prev_zero_count = 0;
524 start_zero_fsb = imap.br_startoff +
525 imap.br_blockcount;
526 ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); 506 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
527 continue; 507 continue;
528 } 508 }
@@ -543,17 +523,15 @@ xfs_zero_eof(
543 */ 523 */
544 XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 524 XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
545 525
546 loff = XFS_FSB_TO_B(mp, start_zero_fsb); 526 error = xfs_iozero(ip,
547 lsize = XFS_FSB_TO_B(mp, buf_len_fsb); 527 XFS_FSB_TO_B(mp, start_zero_fsb),
548 528 XFS_FSB_TO_B(mp, buf_len_fsb),
549 error = xfs_iozero(ip, loff, lsize, end_size); 529 end_size);
550 530
551 if (error) { 531 if (error) {
552 goto out_lock; 532 goto out_lock;
553 } 533 }
554 534
555 prev_zero_fsb = start_zero_fsb;
556 prev_zero_count = buf_len_fsb;
557 start_zero_fsb = imap.br_startoff + buf_len_fsb; 535 start_zero_fsb = imap.br_startoff + buf_len_fsb;
558 ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); 536 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
559 537
@@ -640,7 +618,7 @@ xfs_write(
640 (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? 618 (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
641 mp->m_rtdev_targp : mp->m_ddev_targp; 619 mp->m_rtdev_targp : mp->m_ddev_targp;
642 620
643 if ((pos & target->pbr_smask) || (count & target->pbr_smask)) 621 if ((pos & target->bt_smask) || (count & target->bt_smask))
644 return XFS_ERROR(-EINVAL); 622 return XFS_ERROR(-EINVAL);
645 623
646 if (!VN_CACHED(vp) && pos < i_size_read(inode)) 624 if (!VN_CACHED(vp) && pos < i_size_read(inode))
@@ -831,6 +809,10 @@ retry:
831 goto retry; 809 goto retry;
832 } 810 }
833 811
812 isize = i_size_read(inode);
813 if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
814 *offset = isize;
815
834 if (*offset > xip->i_d.di_size) { 816 if (*offset > xip->i_d.di_size) {
835 xfs_ilock(xip, XFS_ILOCK_EXCL); 817 xfs_ilock(xip, XFS_ILOCK_EXCL);
836 if (*offset > xip->i_d.di_size) { 818 if (*offset > xip->i_d.di_size) {
@@ -956,7 +938,7 @@ xfs_bdstrat_cb(struct xfs_buf *bp)
956 938
957 mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *); 939 mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *);
958 if (!XFS_FORCED_SHUTDOWN(mp)) { 940 if (!XFS_FORCED_SHUTDOWN(mp)) {
959 pagebuf_iorequest(bp); 941 xfs_buf_iorequest(bp);
960 return 0; 942 return 0;
961 } else { 943 } else {
962 xfs_buftrace("XFS__BDSTRAT IOERROR", bp); 944 xfs_buftrace("XFS__BDSTRAT IOERROR", bp);
@@ -1009,7 +991,7 @@ xfsbdstrat(
1009 * if (XFS_BUF_IS_GRIO(bp)) { 991 * if (XFS_BUF_IS_GRIO(bp)) {
1010 */ 992 */
1011 993
1012 pagebuf_iorequest(bp); 994 xfs_buf_iorequest(bp);
1013 return 0; 995 return 0;
1014 } 996 }
1015 997
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index 6c40a74be7c8..8955720a2c6b 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -34,7 +34,7 @@ xfs_read_xfsstats(
34 __uint64_t xs_write_bytes = 0; 34 __uint64_t xs_write_bytes = 0;
35 __uint64_t xs_read_bytes = 0; 35 __uint64_t xs_read_bytes = 0;
36 36
37 static struct xstats_entry { 37 static const struct xstats_entry {
38 char *desc; 38 char *desc;
39 int endpoint; 39 int endpoint;
40 } xstats[] = { 40 } xstats[] = {
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
index 50027c4a5618..8ba7a2fa6c1d 100644
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -109,15 +109,15 @@ struct xfsstats {
109 __uint32_t vn_remove; /* # times vn_remove called */ 109 __uint32_t vn_remove; /* # times vn_remove called */
110 __uint32_t vn_free; /* # times vn_free called */ 110 __uint32_t vn_free; /* # times vn_free called */
111#define XFSSTAT_END_BUF (XFSSTAT_END_VNODE_OPS+9) 111#define XFSSTAT_END_BUF (XFSSTAT_END_VNODE_OPS+9)
112 __uint32_t pb_get; 112 __uint32_t xb_get;
113 __uint32_t pb_create; 113 __uint32_t xb_create;
114 __uint32_t pb_get_locked; 114 __uint32_t xb_get_locked;
115 __uint32_t pb_get_locked_waited; 115 __uint32_t xb_get_locked_waited;
116 __uint32_t pb_busy_locked; 116 __uint32_t xb_busy_locked;
117 __uint32_t pb_miss_locked; 117 __uint32_t xb_miss_locked;
118 __uint32_t pb_page_retries; 118 __uint32_t xb_page_retries;
119 __uint32_t pb_page_found; 119 __uint32_t xb_page_found;
120 __uint32_t pb_get_read; 120 __uint32_t xb_get_read;
121/* Extra precision counters */ 121/* Extra precision counters */
122 __uint64_t xs_xstrat_bytes; 122 __uint64_t xs_xstrat_bytes;
123 __uint64_t xs_write_bytes; 123 __uint64_t xs_write_bytes;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 6116b5bf433e..f22e426d9e42 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -306,13 +306,15 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
306 xfs_fs_cmn_err(CE_NOTE, mp, 306 xfs_fs_cmn_err(CE_NOTE, mp,
307 "Disabling barriers, not supported with external log device"); 307 "Disabling barriers, not supported with external log device");
308 mp->m_flags &= ~XFS_MOUNT_BARRIER; 308 mp->m_flags &= ~XFS_MOUNT_BARRIER;
309 return;
309 } 310 }
310 311
311 if (mp->m_ddev_targp->pbr_bdev->bd_disk->queue->ordered == 312 if (mp->m_ddev_targp->bt_bdev->bd_disk->queue->ordered ==
312 QUEUE_ORDERED_NONE) { 313 QUEUE_ORDERED_NONE) {
313 xfs_fs_cmn_err(CE_NOTE, mp, 314 xfs_fs_cmn_err(CE_NOTE, mp,
314 "Disabling barriers, not supported by the underlying device"); 315 "Disabling barriers, not supported by the underlying device");
315 mp->m_flags &= ~XFS_MOUNT_BARRIER; 316 mp->m_flags &= ~XFS_MOUNT_BARRIER;
317 return;
316 } 318 }
317 319
318 error = xfs_barrier_test(mp); 320 error = xfs_barrier_test(mp);
@@ -320,6 +322,7 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
320 xfs_fs_cmn_err(CE_NOTE, mp, 322 xfs_fs_cmn_err(CE_NOTE, mp,
321 "Disabling barriers, trial barrier write failed"); 323 "Disabling barriers, trial barrier write failed");
322 mp->m_flags &= ~XFS_MOUNT_BARRIER; 324 mp->m_flags &= ~XFS_MOUNT_BARRIER;
325 return;
323 } 326 }
324} 327}
325 328
@@ -327,7 +330,7 @@ void
327xfs_blkdev_issue_flush( 330xfs_blkdev_issue_flush(
328 xfs_buftarg_t *buftarg) 331 xfs_buftarg_t *buftarg)
329{ 332{
330 blkdev_issue_flush(buftarg->pbr_bdev, NULL); 333 blkdev_issue_flush(buftarg->bt_bdev, NULL);
331} 334}
332 335
333STATIC struct inode * 336STATIC struct inode *
@@ -576,7 +579,7 @@ xfssyncd(
576 timeleft = schedule_timeout_interruptible(timeleft); 579 timeleft = schedule_timeout_interruptible(timeleft);
577 /* swsusp */ 580 /* swsusp */
578 try_to_freeze(); 581 try_to_freeze();
579 if (kthread_should_stop()) 582 if (kthread_should_stop() && list_empty(&vfsp->vfs_sync_list))
580 break; 583 break;
581 584
582 spin_lock(&vfsp->vfs_sync_lock); 585 spin_lock(&vfsp->vfs_sync_lock);
@@ -966,9 +969,9 @@ init_xfs_fs( void )
966 if (error < 0) 969 if (error < 0)
967 goto undo_zones; 970 goto undo_zones;
968 971
969 error = pagebuf_init(); 972 error = xfs_buf_init();
970 if (error < 0) 973 if (error < 0)
971 goto undo_pagebuf; 974 goto undo_buffers;
972 975
973 vn_init(); 976 vn_init();
974 xfs_init(); 977 xfs_init();
@@ -982,9 +985,9 @@ init_xfs_fs( void )
982 return 0; 985 return 0;
983 986
984undo_register: 987undo_register:
985 pagebuf_terminate(); 988 xfs_buf_terminate();
986 989
987undo_pagebuf: 990undo_buffers:
988 linvfs_destroy_zones(); 991 linvfs_destroy_zones();
989 992
990undo_zones: 993undo_zones:
@@ -998,7 +1001,7 @@ exit_xfs_fs( void )
998 XFS_DM_EXIT(&xfs_fs_type); 1001 XFS_DM_EXIT(&xfs_fs_type);
999 unregister_filesystem(&xfs_fs_type); 1002 unregister_filesystem(&xfs_fs_type);
1000 xfs_cleanup(); 1003 xfs_cleanup();
1001 pagebuf_terminate(); 1004 xfs_buf_terminate();
1002 linvfs_destroy_zones(); 1005 linvfs_destroy_zones();
1003 ktrace_uninit(); 1006 ktrace_uninit();
1004} 1007}
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index e9bbcb4d6243..260dd8415dd7 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -106,7 +106,6 @@ vn_revalidate_core(
106 inode->i_blocks = vap->va_nblocks; 106 inode->i_blocks = vap->va_nblocks;
107 inode->i_mtime = vap->va_mtime; 107 inode->i_mtime = vap->va_mtime;
108 inode->i_ctime = vap->va_ctime; 108 inode->i_ctime = vap->va_ctime;
109 inode->i_atime = vap->va_atime;
110 inode->i_blksize = vap->va_blocksize; 109 inode->i_blksize = vap->va_blocksize;
111 if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) 110 if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
112 inode->i_flags |= S_IMMUTABLE; 111 inode->i_flags |= S_IMMUTABLE;
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index f2bbb327c081..0fe2419461d6 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -566,6 +566,25 @@ static inline int VN_BAD(struct vnode *vp)
566} 566}
567 567
568/* 568/*
569 * Extracting atime values in various formats
570 */
571static inline void vn_atime_to_bstime(struct vnode *vp, xfs_bstime_t *bs_atime)
572{
573 bs_atime->tv_sec = vp->v_inode.i_atime.tv_sec;
574 bs_atime->tv_nsec = vp->v_inode.i_atime.tv_nsec;
575}
576
577static inline void vn_atime_to_timespec(struct vnode *vp, struct timespec *ts)
578{
579 *ts = vp->v_inode.i_atime;
580}
581
582static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt)
583{
584 *tt = vp->v_inode.i_atime.tv_sec;
585}
586
587/*
569 * Some useful predicates. 588 * Some useful predicates.
570 */ 589 */
571#define VN_MAPPED(vp) mapping_mapped(LINVFS_GET_IP(vp)->i_mapping) 590#define VN_MAPPED(vp) mapping_mapped(LINVFS_GET_IP(vp)->i_mapping)
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 2f69822344e5..2ec6b441849c 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -239,7 +239,7 @@ xfs_qm_dquot_logitem_pushbuf(
239 * trying to duplicate our effort. 239 * trying to duplicate our effort.
240 */ 240 */
241 ASSERT(qip->qli_pushbuf_flag != 0); 241 ASSERT(qip->qli_pushbuf_flag != 0);
242 ASSERT(qip->qli_push_owner == get_thread_id()); 242 ASSERT(qip->qli_push_owner == current_pid());
243 243
244 /* 244 /*
245 * If flushlock isn't locked anymore, chances are that the 245 * If flushlock isn't locked anymore, chances are that the
@@ -333,7 +333,7 @@ xfs_qm_dquot_logitem_trylock(
333 qip->qli_pushbuf_flag = 1; 333 qip->qli_pushbuf_flag = 1;
334 ASSERT(qip->qli_format.qlf_blkno == dqp->q_blkno); 334 ASSERT(qip->qli_format.qlf_blkno == dqp->q_blkno);
335#ifdef DEBUG 335#ifdef DEBUG
336 qip->qli_push_owner = get_thread_id(); 336 qip->qli_push_owner = current_pid();
337#endif 337#endif
338 /* 338 /*
339 * The dquot is left locked. 339 * The dquot is left locked.
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index bb6991a7a617..7dcdd0640c32 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -1392,11 +1392,12 @@ xfs_qm_qino_alloc(
1392{ 1392{
1393 xfs_trans_t *tp; 1393 xfs_trans_t *tp;
1394 int error; 1394 int error;
1395 unsigned long s; 1395 unsigned long s;
1396 cred_t zerocr; 1396 cred_t zerocr;
1397 xfs_inode_t zeroino;
1397 int committed; 1398 int committed;
1398 1399
1399 tp = xfs_trans_alloc(mp,XFS_TRANS_QM_QINOCREATE); 1400 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
1400 if ((error = xfs_trans_reserve(tp, 1401 if ((error = xfs_trans_reserve(tp,
1401 XFS_QM_QINOCREATE_SPACE_RES(mp), 1402 XFS_QM_QINOCREATE_SPACE_RES(mp),
1402 XFS_CREATE_LOG_RES(mp), 0, 1403 XFS_CREATE_LOG_RES(mp), 0,
@@ -1406,8 +1407,9 @@ xfs_qm_qino_alloc(
1406 return (error); 1407 return (error);
1407 } 1408 }
1408 memset(&zerocr, 0, sizeof(zerocr)); 1409 memset(&zerocr, 0, sizeof(zerocr));
1410 memset(&zeroino, 0, sizeof(zeroino));
1409 1411
1410 if ((error = xfs_dir_ialloc(&tp, mp->m_rootip, S_IFREG, 1, 0, 1412 if ((error = xfs_dir_ialloc(&tp, &zeroino, S_IFREG, 1, 0,
1411 &zerocr, 0, 1, ip, &committed))) { 1413 &zerocr, 0, 1, ip, &committed))) {
1412 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | 1414 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
1413 XFS_TRANS_ABORT); 1415 XFS_TRANS_ABORT);
@@ -1918,9 +1920,7 @@ xfs_qm_quotacheck(
1918 * at this point (because we intentionally didn't in dqget_noattach). 1920 * at this point (because we intentionally didn't in dqget_noattach).
1919 */ 1921 */
1920 if (error) { 1922 if (error) {
1921 xfs_qm_dqpurge_all(mp, 1923 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF);
1922 XFS_QMOPT_UQUOTA|XFS_QMOPT_GQUOTA|
1923 XFS_QMOPT_PQUOTA|XFS_QMOPT_QUOTAOFF);
1924 goto error_return; 1924 goto error_return;
1925 } 1925 }
1926 /* 1926 /*
@@ -2743,6 +2743,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
2743 xfs_dqunlock(udqp); 2743 xfs_dqunlock(udqp);
2744 ASSERT(ip->i_udquot == NULL); 2744 ASSERT(ip->i_udquot == NULL);
2745 ip->i_udquot = udqp; 2745 ip->i_udquot = udqp;
2746 ASSERT(XFS_IS_UQUOTA_ON(tp->t_mountp));
2746 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); 2747 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
2747 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); 2748 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
2748 } 2749 }
@@ -2752,7 +2753,10 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
2752 xfs_dqunlock(gdqp); 2753 xfs_dqunlock(gdqp);
2753 ASSERT(ip->i_gdquot == NULL); 2754 ASSERT(ip->i_gdquot == NULL);
2754 ip->i_gdquot = gdqp; 2755 ip->i_gdquot = gdqp;
2755 ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id)); 2756 ASSERT(XFS_IS_OQUOTA_ON(tp->t_mountp));
2757 ASSERT((XFS_IS_GQUOTA_ON(tp->t_mountp) ?
2758 ip->i_d.di_gid : ip->i_d.di_projid) ==
2759 be32_to_cpu(gdqp->q_core.d_id));
2756 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); 2760 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
2757 } 2761 }
2758} 2762}
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index bb6dc91ea261..b08b3d9345b7 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -27,45 +27,12 @@ static DEFINE_SPINLOCK(xfs_err_lock);
27/* Translate from CE_FOO to KERN_FOO, err_level(CE_FOO) == KERN_FOO */ 27/* Translate from CE_FOO to KERN_FOO, err_level(CE_FOO) == KERN_FOO */
28#define XFS_MAX_ERR_LEVEL 7 28#define XFS_MAX_ERR_LEVEL 7
29#define XFS_ERR_MASK ((1 << 3) - 1) 29#define XFS_ERR_MASK ((1 << 3) - 1)
30static char *err_level[XFS_MAX_ERR_LEVEL+1] = 30static const char * const err_level[XFS_MAX_ERR_LEVEL+1] =
31 {KERN_EMERG, KERN_ALERT, KERN_CRIT, 31 {KERN_EMERG, KERN_ALERT, KERN_CRIT,
32 KERN_ERR, KERN_WARNING, KERN_NOTICE, 32 KERN_ERR, KERN_WARNING, KERN_NOTICE,
33 KERN_INFO, KERN_DEBUG}; 33 KERN_INFO, KERN_DEBUG};
34 34
35void 35void
36assfail(char *a, char *f, int l)
37{
38 printk("XFS assertion failed: %s, file: %s, line: %d\n", a, f, l);
39 BUG();
40}
41
42#if ((defined(DEBUG) || defined(INDUCE_IO_ERRROR)) && !defined(NO_WANT_RANDOM))
43
44unsigned long
45random(void)
46{
47 static unsigned long RandomValue = 1;
48 /* cycles pseudo-randomly through all values between 1 and 2^31 - 2 */
49 register long rv = RandomValue;
50 register long lo;
51 register long hi;
52
53 hi = rv / 127773;
54 lo = rv % 127773;
55 rv = 16807 * lo - 2836 * hi;
56 if( rv <= 0 ) rv += 2147483647;
57 return( RandomValue = rv );
58}
59
60int
61get_thread_id(void)
62{
63 return current->pid;
64}
65
66#endif /* DEBUG || INDUCE_IO_ERRROR || !NO_WANT_RANDOM */
67
68void
69cmn_err(register int level, char *fmt, ...) 36cmn_err(register int level, char *fmt, ...)
70{ 37{
71 char *fp = fmt; 38 char *fp = fmt;
@@ -90,7 +57,6 @@ cmn_err(register int level, char *fmt, ...)
90 BUG(); 57 BUG();
91} 58}
92 59
93
94void 60void
95icmn_err(register int level, char *fmt, va_list ap) 61icmn_err(register int level, char *fmt, va_list ap)
96{ 62{
@@ -109,3 +75,27 @@ icmn_err(register int level, char *fmt, va_list ap)
109 if (level == CE_PANIC) 75 if (level == CE_PANIC)
110 BUG(); 76 BUG();
111} 77}
78
79void
80assfail(char *expr, char *file, int line)
81{
82 printk("Assertion failed: %s, file: %s, line: %d\n", expr, file, line);
83 BUG();
84}
85
86#if ((defined(DEBUG) || defined(INDUCE_IO_ERRROR)) && !defined(NO_WANT_RANDOM))
87unsigned long random(void)
88{
89 static unsigned long RandomValue = 1;
90 /* cycles pseudo-randomly through all values between 1 and 2^31 - 2 */
91 register long rv = RandomValue;
92 register long lo;
93 register long hi;
94
95 hi = rv / 127773;
96 lo = rv % 127773;
97 rv = 16807 * lo - 2836 * hi;
98 if (rv <= 0) rv += 2147483647;
99 return RandomValue = rv;
100}
101#endif /* DEBUG || INDUCE_IO_ERRROR || !NO_WANT_RANDOM */
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index aff558664c32..e3bf58112e7e 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -31,24 +31,23 @@ extern void icmn_err(int, char *, va_list)
31 __attribute__ ((format (printf, 2, 0))); 31 __attribute__ ((format (printf, 2, 0)));
32extern void cmn_err(int, char *, ...) 32extern void cmn_err(int, char *, ...)
33 __attribute__ ((format (printf, 2, 3))); 33 __attribute__ ((format (printf, 2, 3)));
34extern void assfail(char *expr, char *f, int l);
34 35
35#ifndef STATIC 36#define prdev(fmt,targ,args...) \
36# define STATIC static 37 printk("Device %s - " fmt "\n", XFS_BUFTARG_NAME(targ), ## args)
37#endif
38 38
39#ifdef DEBUG 39#define ASSERT_ALWAYS(expr) \
40# define ASSERT(EX) ((EX) ? ((void)0) : assfail(#EX, __FILE__, __LINE__)) 40 (unlikely((expr) != 0) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
41#else
42# define ASSERT(x) ((void)0)
43#endif
44 41
45extern void assfail(char *, char *, int); 42#ifndef DEBUG
46#ifdef DEBUG 43# define ASSERT(expr) ((void)0)
44#else
45# define ASSERT(expr) ASSERT_ALWAYS(expr)
47extern unsigned long random(void); 46extern unsigned long random(void);
48extern int get_thread_id(void);
49#endif 47#endif
50 48
51#define ASSERT_ALWAYS(EX) ((EX)?((void)0):assfail(#EX, __FILE__, __LINE__)) 49#ifndef STATIC
52#define debug_stop_all_cpus(param) /* param is "cpumask_t *" */ 50# define STATIC static
51#endif
53 52
54#endif /* __XFS_SUPPORT_DEBUG_H__ */ 53#endif /* __XFS_SUPPORT_DEBUG_H__ */
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
index 69ec4f540c3a..a3d565a67734 100644
--- a/fs/xfs/support/uuid.c
+++ b/fs/xfs/support/uuid.c
@@ -27,6 +27,16 @@ uuid_init(void)
27 mutex_init(&uuid_monitor); 27 mutex_init(&uuid_monitor);
28} 28}
29 29
30
31/* IRIX interpretation of an uuid_t */
32typedef struct {
33 __be32 uu_timelow;
34 __be16 uu_timemid;
35 __be16 uu_timehi;
36 __be16 uu_clockseq;
37 __be16 uu_node[3];
38} xfs_uu_t;
39
30/* 40/*
31 * uuid_getnodeuniq - obtain the node unique fields of a UUID. 41 * uuid_getnodeuniq - obtain the node unique fields of a UUID.
32 * 42 *
@@ -36,16 +46,11 @@ uuid_init(void)
36void 46void
37uuid_getnodeuniq(uuid_t *uuid, int fsid [2]) 47uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
38{ 48{
39 char *uu = (char *)uuid; 49 xfs_uu_t *uup = (xfs_uu_t *)uuid;
40
41 /* on IRIX, this function assumes big-endian fields within
42 * the uuid, so we use INT_GET to get the same result on
43 * little-endian systems
44 */
45 50
46 fsid[0] = (INT_GET(*(u_int16_t*)(uu+8), ARCH_CONVERT) << 16) + 51 fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
47 INT_GET(*(u_int16_t*)(uu+4), ARCH_CONVERT); 52 be16_to_cpu(uup->uu_timemid);
48 fsid[1] = INT_GET(*(u_int32_t*)(uu ), ARCH_CONVERT); 53 fsid[1] = be16_to_cpu(uup->uu_timelow);
49} 54}
50 55
51void 56void
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index 68e5051d8e24..c4836890b726 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -40,6 +40,22 @@
40#undef XFS_NATIVE_HOST 40#undef XFS_NATIVE_HOST
41#endif 41#endif
42 42
43#ifdef XFS_NATIVE_HOST
44#define cpu_to_be16(val) ((__be16)(val))
45#define cpu_to_be32(val) ((__be32)(val))
46#define cpu_to_be64(val) ((__be64)(val))
47#define be16_to_cpu(val) ((__uint16_t)(val))
48#define be32_to_cpu(val) ((__uint32_t)(val))
49#define be64_to_cpu(val) ((__uint64_t)(val))
50#else
51#define cpu_to_be16(val) (__swab16((__uint16_t)(val)))
52#define cpu_to_be32(val) (__swab32((__uint32_t)(val)))
53#define cpu_to_be64(val) (__swab64((__uint64_t)(val)))
54#define be16_to_cpu(val) (__swab16((__be16)(val)))
55#define be32_to_cpu(val) (__swab32((__be32)(val)))
56#define be64_to_cpu(val) (__swab64((__be64)(val)))
57#endif
58
43#endif /* __KERNEL__ */ 59#endif /* __KERNEL__ */
44 60
45/* do we need conversion? */ 61/* do we need conversion? */
@@ -186,7 +202,7 @@ static inline void be64_add(__be64 *a, __s64 b)
186 */ 202 */
187 203
188#define XFS_GET_DIR_INO4(di) \ 204#define XFS_GET_DIR_INO4(di) \
189 (((u32)(di).i[0] << 24) | ((di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3])) 205 (((__u32)(di).i[0] << 24) | ((di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
190 206
191#define XFS_PUT_DIR_INO4(from, di) \ 207#define XFS_PUT_DIR_INO4(from, di) \
192do { \ 208do { \
@@ -197,9 +213,9 @@ do { \
197} while (0) 213} while (0)
198 214
199#define XFS_DI_HI(di) \ 215#define XFS_DI_HI(di) \
200 (((u32)(di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3])) 216 (((__u32)(di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
201#define XFS_DI_LO(di) \ 217#define XFS_DI_LO(di) \
202 (((u32)(di).i[4] << 24) | ((di).i[5] << 16) | ((di).i[6] << 8) | ((di).i[7])) 218 (((__u32)(di).i[4] << 24) | ((di).i[5] << 16) | ((di).i[6] << 8) | ((di).i[7]))
203 219
204#define XFS_GET_DIR_INO8(di) \ 220#define XFS_GET_DIR_INO8(di) \
205 (((xfs_ino_t)XFS_DI_LO(di) & 0xffffffffULL) | \ 221 (((xfs_ino_t)XFS_DI_LO(di) & 0xffffffffULL) | \
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 1c7421840c18..fe91eac4e2a7 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -128,7 +128,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
128 return (offset >= minforkoff) ? minforkoff : 0; 128 return (offset >= minforkoff) ? minforkoff : 0;
129 } 129 }
130 130
131 if (unlikely(mp->m_flags & XFS_MOUNT_COMPAT_ATTR)) { 131 if (!(mp->m_flags & XFS_MOUNT_ATTR2)) {
132 if (bytes <= XFS_IFORK_ASIZE(dp)) 132 if (bytes <= XFS_IFORK_ASIZE(dp))
133 return mp->m_attroffset >> 3; 133 return mp->m_attroffset >> 3;
134 return 0; 134 return 0;
@@ -157,7 +157,7 @@ xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp)
157{ 157{
158 unsigned long s; 158 unsigned long s;
159 159
160 if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR) && 160 if ((mp->m_flags & XFS_MOUNT_ATTR2) &&
161 !(XFS_SB_VERSION_HASATTR2(&mp->m_sb))) { 161 !(XFS_SB_VERSION_HASATTR2(&mp->m_sb))) {
162 s = XFS_SB_LOCK(mp); 162 s = XFS_SB_LOCK(mp);
163 if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb)) { 163 if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
@@ -311,7 +311,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
311 */ 311 */
312 totsize -= size; 312 totsize -= size;
313 if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname && 313 if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname &&
314 !(mp->m_flags & XFS_MOUNT_COMPAT_ATTR)) { 314 (mp->m_flags & XFS_MOUNT_ATTR2)) {
315 /* 315 /*
316 * Last attribute now removed, revert to original 316 * Last attribute now removed, revert to original
317 * inode format making all literal area available 317 * inode format making all literal area available
@@ -330,7 +330,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
330 dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize); 330 dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
331 ASSERT(dp->i_d.di_forkoff); 331 ASSERT(dp->i_d.di_forkoff);
332 ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname || 332 ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname ||
333 (mp->m_flags & XFS_MOUNT_COMPAT_ATTR)); 333 !(mp->m_flags & XFS_MOUNT_ATTR2));
334 dp->i_afp->if_ext_max = 334 dp->i_afp->if_ext_max =
335 XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t); 335 XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
336 dp->i_df.if_ext_max = 336 dp->i_df.if_ext_max =
@@ -739,7 +739,7 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
739 + name_loc->namelen 739 + name_loc->namelen
740 + INT_GET(name_loc->valuelen, ARCH_CONVERT); 740 + INT_GET(name_loc->valuelen, ARCH_CONVERT);
741 } 741 }
742 if (!(dp->i_mount->m_flags & XFS_MOUNT_COMPAT_ATTR) && 742 if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) &&
743 (bytes == sizeof(struct xfs_attr_sf_hdr))) 743 (bytes == sizeof(struct xfs_attr_sf_hdr)))
744 return(-1); 744 return(-1);
745 return(xfs_attr_shortform_bytesfit(dp, bytes)); 745 return(xfs_attr_shortform_bytesfit(dp, bytes));
@@ -778,7 +778,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
778 goto out; 778 goto out;
779 779
780 if (forkoff == -1) { 780 if (forkoff == -1) {
781 ASSERT(!(dp->i_mount->m_flags & XFS_MOUNT_COMPAT_ATTR)); 781 ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2);
782 782
783 /* 783 /*
784 * Last attribute was removed, revert to original 784 * Last attribute was removed, revert to original
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index f6143ff251a0..541e34109bb9 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -63,7 +63,7 @@ struct xfs_trans;
63 * the leaf_entry. The namespaces are independent only because we also look 63 * the leaf_entry. The namespaces are independent only because we also look
64 * at the namespace bit when we are looking for a matching attribute name. 64 * at the namespace bit when we are looking for a matching attribute name.
65 * 65 *
66 * We also store a "incomplete" bit in the leaf_entry. It shows that an 66 * We also store an "incomplete" bit in the leaf_entry. It shows that an
67 * attribute is in the middle of being created and should not be shown to 67 * attribute is in the middle of being created and should not be shown to
68 * the user if we crash during the time that the bit is set. We clear the 68 * the user if we crash during the time that the bit is set. We clear the
69 * bit when we have finished setting up the attribute. We do this because 69 * bit when we have finished setting up the attribute. We do this because
@@ -72,42 +72,48 @@ struct xfs_trans;
72 */ 72 */
73#define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */ 73#define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */
74 74
75typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */
76 __uint16_t base; /* base of free region */
77 __uint16_t size; /* length of free region */
78} xfs_attr_leaf_map_t;
79
80typedef struct xfs_attr_leaf_hdr { /* constant-structure header block */
81 xfs_da_blkinfo_t info; /* block type, links, etc. */
82 __uint16_t count; /* count of active leaf_entry's */
83 __uint16_t usedbytes; /* num bytes of names/values stored */
84 __uint16_t firstused; /* first used byte in name area */
85 __uint8_t holes; /* != 0 if blk needs compaction */
86 __uint8_t pad1;
87 xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE];
88 /* N largest free regions */
89} xfs_attr_leaf_hdr_t;
90
91typedef struct xfs_attr_leaf_entry { /* sorted on key, not name */
92 xfs_dahash_t hashval; /* hash value of name */
93 __uint16_t nameidx; /* index into buffer of name/value */
94 __uint8_t flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */
95 __uint8_t pad2; /* unused pad byte */
96} xfs_attr_leaf_entry_t;
97
98typedef struct xfs_attr_leaf_name_local {
99 __uint16_t valuelen; /* number of bytes in value */
100 __uint8_t namelen; /* length of name bytes */
101 __uint8_t nameval[1]; /* name/value bytes */
102} xfs_attr_leaf_name_local_t;
103
104typedef struct xfs_attr_leaf_name_remote {
105 xfs_dablk_t valueblk; /* block number of value bytes */
106 __uint32_t valuelen; /* number of bytes in value */
107 __uint8_t namelen; /* length of name bytes */
108 __uint8_t name[1]; /* name bytes */
109} xfs_attr_leaf_name_remote_t;
110
75typedef struct xfs_attr_leafblock { 111typedef struct xfs_attr_leafblock {
76 struct xfs_attr_leaf_hdr { /* constant-structure header block */ 112 xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */
77 xfs_da_blkinfo_t info; /* block type, links, etc. */ 113 xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */
78 __uint16_t count; /* count of active leaf_entry's */ 114 xfs_attr_leaf_name_local_t namelist; /* grows from bottom of buf */
79 __uint16_t usedbytes; /* num bytes of names/values stored */ 115 xfs_attr_leaf_name_remote_t valuelist; /* grows from bottom of buf */
80 __uint16_t firstused; /* first used byte in name area */
81 __uint8_t holes; /* != 0 if blk needs compaction */
82 __uint8_t pad1;
83 struct xfs_attr_leaf_map { /* RLE map of free bytes */
84 __uint16_t base; /* base of free region */
85 __uint16_t size; /* length of free region */
86 } freemap[XFS_ATTR_LEAF_MAPSIZE]; /* N largest free regions */
87 } hdr;
88 struct xfs_attr_leaf_entry { /* sorted on key, not name */
89 xfs_dahash_t hashval; /* hash value of name */
90 __uint16_t nameidx; /* index into buffer of name/value */
91 __uint8_t flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */
92 __uint8_t pad2; /* unused pad byte */
93 } entries[1]; /* variable sized array */
94 struct xfs_attr_leaf_name_local {
95 __uint16_t valuelen; /* number of bytes in value */
96 __uint8_t namelen; /* length of name bytes */
97 __uint8_t nameval[1]; /* name/value bytes */
98 } namelist; /* grows from bottom of buf */
99 struct xfs_attr_leaf_name_remote {
100 xfs_dablk_t valueblk; /* block number of value bytes */
101 __uint32_t valuelen; /* number of bytes in value */
102 __uint8_t namelen; /* length of name bytes */
103 __uint8_t name[1]; /* name bytes */
104 } valuelist; /* grows from bottom of buf */
105} xfs_attr_leafblock_t; 116} xfs_attr_leafblock_t;
106typedef struct xfs_attr_leaf_hdr xfs_attr_leaf_hdr_t;
107typedef struct xfs_attr_leaf_map xfs_attr_leaf_map_t;
108typedef struct xfs_attr_leaf_entry xfs_attr_leaf_entry_t;
109typedef struct xfs_attr_leaf_name_local xfs_attr_leaf_name_local_t;
110typedef struct xfs_attr_leaf_name_remote xfs_attr_leaf_name_remote_t;
111 117
112/* 118/*
113 * Flags used in the leaf_entry[i].flags field. 119 * Flags used in the leaf_entry[i].flags field.
@@ -150,7 +156,8 @@ xfs_attr_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
150 (leafp))[INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT)]; 156 (leafp))[INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT)];
151} 157}
152 158
153#define XFS_ATTR_LEAF_NAME(leafp,idx) xfs_attr_leaf_name(leafp,idx) 159#define XFS_ATTR_LEAF_NAME(leafp,idx) \
160 xfs_attr_leaf_name(leafp,idx)
154static inline char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx) 161static inline char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx)
155{ 162{
156 return (&((char *) 163 return (&((char *)
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index e415a4698e9c..70625e577c70 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2146,13 +2146,176 @@ xfs_bmap_add_extent_hole_real(
2146 return 0; /* keep gcc quite */ 2146 return 0; /* keep gcc quite */
2147} 2147}
2148 2148
2149/*
2150 * Adjust the size of the new extent based on di_extsize and rt extsize.
2151 */
2152STATIC int
2153xfs_bmap_extsize_align(
2154 xfs_mount_t *mp,
2155 xfs_bmbt_irec_t *gotp, /* next extent pointer */
2156 xfs_bmbt_irec_t *prevp, /* previous extent pointer */
2157 xfs_extlen_t extsz, /* align to this extent size */
2158 int rt, /* is this a realtime inode? */
2159 int eof, /* is extent at end-of-file? */
2160 int delay, /* creating delalloc extent? */
2161 int convert, /* overwriting unwritten extent? */
2162 xfs_fileoff_t *offp, /* in/out: aligned offset */
2163 xfs_extlen_t *lenp) /* in/out: aligned length */
2164{
2165 xfs_fileoff_t orig_off; /* original offset */
2166 xfs_extlen_t orig_alen; /* original length */
2167 xfs_fileoff_t orig_end; /* original off+len */
2168 xfs_fileoff_t nexto; /* next file offset */
2169 xfs_fileoff_t prevo; /* previous file offset */
2170 xfs_fileoff_t align_off; /* temp for offset */
2171 xfs_extlen_t align_alen; /* temp for length */
2172 xfs_extlen_t temp; /* temp for calculations */
2173
2174 if (convert)
2175 return 0;
2176
2177 orig_off = align_off = *offp;
2178 orig_alen = align_alen = *lenp;
2179 orig_end = orig_off + orig_alen;
2180
2181 /*
2182 * If this request overlaps an existing extent, then don't
2183 * attempt to perform any additional alignment.
2184 */
2185 if (!delay && !eof &&
2186 (orig_off >= gotp->br_startoff) &&
2187 (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
2188 return 0;
2189 }
2190
2191 /*
2192 * If the file offset is unaligned vs. the extent size
2193 * we need to align it. This will be possible unless
2194 * the file was previously written with a kernel that didn't
2195 * perform this alignment, or if a truncate shot us in the
2196 * foot.
2197 */
2198 temp = do_mod(orig_off, extsz);
2199 if (temp) {
2200 align_alen += temp;
2201 align_off -= temp;
2202 }
2203 /*
2204 * Same adjustment for the end of the requested area.
2205 */
2206 if ((temp = (align_alen % extsz))) {
2207 align_alen += extsz - temp;
2208 }
2209 /*
2210 * If the previous block overlaps with this proposed allocation
2211 * then move the start forward without adjusting the length.
2212 */
2213 if (prevp->br_startoff != NULLFILEOFF) {
2214 if (prevp->br_startblock == HOLESTARTBLOCK)
2215 prevo = prevp->br_startoff;
2216 else
2217 prevo = prevp->br_startoff + prevp->br_blockcount;
2218 } else
2219 prevo = 0;
2220 if (align_off != orig_off && align_off < prevo)
2221 align_off = prevo;
2222 /*
2223 * If the next block overlaps with this proposed allocation
2224 * then move the start back without adjusting the length,
2225 * but not before offset 0.
2226 * This may of course make the start overlap previous block,
2227 * and if we hit the offset 0 limit then the next block
2228 * can still overlap too.
2229 */
2230 if (!eof && gotp->br_startoff != NULLFILEOFF) {
2231 if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
2232 (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
2233 nexto = gotp->br_startoff + gotp->br_blockcount;
2234 else
2235 nexto = gotp->br_startoff;
2236 } else
2237 nexto = NULLFILEOFF;
2238 if (!eof &&
2239 align_off + align_alen != orig_end &&
2240 align_off + align_alen > nexto)
2241 align_off = nexto > align_alen ? nexto - align_alen : 0;
2242 /*
2243 * If we're now overlapping the next or previous extent that
2244 * means we can't fit an extsz piece in this hole. Just move
2245 * the start forward to the first valid spot and set
2246 * the length so we hit the end.
2247 */
2248 if (align_off != orig_off && align_off < prevo)
2249 align_off = prevo;
2250 if (align_off + align_alen != orig_end &&
2251 align_off + align_alen > nexto &&
2252 nexto != NULLFILEOFF) {
2253 ASSERT(nexto > prevo);
2254 align_alen = nexto - align_off;
2255 }
2256
2257 /*
2258 * If realtime, and the result isn't a multiple of the realtime
2259 * extent size we need to remove blocks until it is.
2260 */
2261 if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
2262 /*
2263 * We're not covering the original request, or
2264 * we won't be able to once we fix the length.
2265 */
2266 if (orig_off < align_off ||
2267 orig_end > align_off + align_alen ||
2268 align_alen - temp < orig_alen)
2269 return XFS_ERROR(EINVAL);
2270 /*
2271 * Try to fix it by moving the start up.
2272 */
2273 if (align_off + temp <= orig_off) {
2274 align_alen -= temp;
2275 align_off += temp;
2276 }
2277 /*
2278 * Try to fix it by moving the end in.
2279 */
2280 else if (align_off + align_alen - temp >= orig_end)
2281 align_alen -= temp;
2282 /*
2283 * Set the start to the minimum then trim the length.
2284 */
2285 else {
2286 align_alen -= orig_off - align_off;
2287 align_off = orig_off;
2288 align_alen -= align_alen % mp->m_sb.sb_rextsize;
2289 }
2290 /*
2291 * Result doesn't cover the request, fail it.
2292 */
2293 if (orig_off < align_off || orig_end > align_off + align_alen)
2294 return XFS_ERROR(EINVAL);
2295 } else {
2296 ASSERT(orig_off >= align_off);
2297 ASSERT(orig_end <= align_off + align_alen);
2298 }
2299
2300#ifdef DEBUG
2301 if (!eof && gotp->br_startoff != NULLFILEOFF)
2302 ASSERT(align_off + align_alen <= gotp->br_startoff);
2303 if (prevp->br_startoff != NULLFILEOFF)
2304 ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
2305#endif
2306
2307 *lenp = align_alen;
2308 *offp = align_off;
2309 return 0;
2310}
2311
2149#define XFS_ALLOC_GAP_UNITS 4 2312#define XFS_ALLOC_GAP_UNITS 4
2150 2313
2151/* 2314/*
2152 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. 2315 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
2153 * It figures out where to ask the underlying allocator to put the new extent. 2316 * It figures out where to ask the underlying allocator to put the new extent.
2154 */ 2317 */
2155STATIC int /* error */ 2318STATIC int
2156xfs_bmap_alloc( 2319xfs_bmap_alloc(
2157 xfs_bmalloca_t *ap) /* bmap alloc argument struct */ 2320 xfs_bmalloca_t *ap) /* bmap alloc argument struct */
2158{ 2321{
@@ -2163,10 +2326,10 @@ xfs_bmap_alloc(
2163 xfs_mount_t *mp; /* mount point structure */ 2326 xfs_mount_t *mp; /* mount point structure */
2164 int nullfb; /* true if ap->firstblock isn't set */ 2327 int nullfb; /* true if ap->firstblock isn't set */
2165 int rt; /* true if inode is realtime */ 2328 int rt; /* true if inode is realtime */
2166#ifdef __KERNEL__ 2329 xfs_extlen_t prod = 0; /* product factor for allocators */
2167 xfs_extlen_t prod=0; /* product factor for allocators */ 2330 xfs_extlen_t ralen = 0; /* realtime allocation length */
2168 xfs_extlen_t ralen=0; /* realtime allocation length */ 2331 xfs_extlen_t align; /* minimum allocation alignment */
2169#endif 2332 xfs_rtblock_t rtx;
2170 2333
2171#define ISVALID(x,y) \ 2334#define ISVALID(x,y) \
2172 (rt ? \ 2335 (rt ? \
@@ -2182,125 +2345,25 @@ xfs_bmap_alloc(
2182 nullfb = ap->firstblock == NULLFSBLOCK; 2345 nullfb = ap->firstblock == NULLFSBLOCK;
2183 rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata; 2346 rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;
2184 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock); 2347 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
2185#ifdef __KERNEL__
2186 if (rt) { 2348 if (rt) {
2187 xfs_extlen_t extsz; /* file extent size for rt */ 2349 align = ap->ip->i_d.di_extsize ?
2188 xfs_fileoff_t nexto; /* next file offset */ 2350 ap->ip->i_d.di_extsize : mp->m_sb.sb_rextsize;
2189 xfs_extlen_t orig_alen; /* original ap->alen */ 2351 /* Set prod to match the extent size */
2190 xfs_fileoff_t orig_end; /* original off+len */ 2352 prod = align / mp->m_sb.sb_rextsize;
2191 xfs_fileoff_t orig_off; /* original ap->off */ 2353
2192 xfs_extlen_t mod_off; /* modulus calculations */ 2354 error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
2193 xfs_fileoff_t prevo; /* previous file offset */ 2355 align, rt, ap->eof, 0,
2194 xfs_rtblock_t rtx; /* realtime extent number */ 2356 ap->conv, &ap->off, &ap->alen);
2195 xfs_extlen_t temp; /* temp for rt calculations */ 2357 if (error)
2196 2358 return error;
2197 /* 2359 ASSERT(ap->alen);
2198 * Set prod to match the realtime extent size.
2199 */
2200 if (!(extsz = ap->ip->i_d.di_extsize))
2201 extsz = mp->m_sb.sb_rextsize;
2202 prod = extsz / mp->m_sb.sb_rextsize;
2203 orig_off = ap->off;
2204 orig_alen = ap->alen;
2205 orig_end = orig_off + orig_alen;
2206 /*
2207 * If the file offset is unaligned vs. the extent size
2208 * we need to align it. This will be possible unless
2209 * the file was previously written with a kernel that didn't
2210 * perform this alignment.
2211 */
2212 mod_off = do_mod(orig_off, extsz);
2213 if (mod_off) {
2214 ap->alen += mod_off;
2215 ap->off -= mod_off;
2216 }
2217 /*
2218 * Same adjustment for the end of the requested area.
2219 */
2220 if ((temp = (ap->alen % extsz)))
2221 ap->alen += extsz - temp;
2222 /*
2223 * If the previous block overlaps with this proposed allocation
2224 * then move the start forward without adjusting the length.
2225 */
2226 prevo =
2227 ap->prevp->br_startoff == NULLFILEOFF ?
2228 0 :
2229 (ap->prevp->br_startoff +
2230 ap->prevp->br_blockcount);
2231 if (ap->off != orig_off && ap->off < prevo)
2232 ap->off = prevo;
2233 /*
2234 * If the next block overlaps with this proposed allocation
2235 * then move the start back without adjusting the length,
2236 * but not before offset 0.
2237 * This may of course make the start overlap previous block,
2238 * and if we hit the offset 0 limit then the next block
2239 * can still overlap too.
2240 */
2241 nexto = (ap->eof || ap->gotp->br_startoff == NULLFILEOFF) ?
2242 NULLFILEOFF : ap->gotp->br_startoff;
2243 if (!ap->eof &&
2244 ap->off + ap->alen != orig_end &&
2245 ap->off + ap->alen > nexto)
2246 ap->off = nexto > ap->alen ? nexto - ap->alen : 0;
2247 /*
2248 * If we're now overlapping the next or previous extent that
2249 * means we can't fit an extsz piece in this hole. Just move
2250 * the start forward to the first valid spot and set
2251 * the length so we hit the end.
2252 */
2253 if ((ap->off != orig_off && ap->off < prevo) ||
2254 (ap->off + ap->alen != orig_end &&
2255 ap->off + ap->alen > nexto)) {
2256 ap->off = prevo;
2257 ap->alen = nexto - prevo;
2258 }
2259 /*
2260 * If the result isn't a multiple of rtextents we need to
2261 * remove blocks until it is.
2262 */
2263 if ((temp = (ap->alen % mp->m_sb.sb_rextsize))) {
2264 /*
2265 * We're not covering the original request, or
2266 * we won't be able to once we fix the length.
2267 */
2268 if (orig_off < ap->off ||
2269 orig_end > ap->off + ap->alen ||
2270 ap->alen - temp < orig_alen)
2271 return XFS_ERROR(EINVAL);
2272 /*
2273 * Try to fix it by moving the start up.
2274 */
2275 if (ap->off + temp <= orig_off) {
2276 ap->alen -= temp;
2277 ap->off += temp;
2278 }
2279 /*
2280 * Try to fix it by moving the end in.
2281 */
2282 else if (ap->off + ap->alen - temp >= orig_end)
2283 ap->alen -= temp;
2284 /*
2285 * Set the start to the minimum then trim the length.
2286 */
2287 else {
2288 ap->alen -= orig_off - ap->off;
2289 ap->off = orig_off;
2290 ap->alen -= ap->alen % mp->m_sb.sb_rextsize;
2291 }
2292 /*
2293 * Result doesn't cover the request, fail it.
2294 */
2295 if (orig_off < ap->off || orig_end > ap->off + ap->alen)
2296 return XFS_ERROR(EINVAL);
2297 }
2298 ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0); 2360 ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0);
2361
2299 /* 2362 /*
2300 * If the offset & length are not perfectly aligned 2363 * If the offset & length are not perfectly aligned
2301 * then kill prod, it will just get us in trouble. 2364 * then kill prod, it will just get us in trouble.
2302 */ 2365 */
2303 if (do_mod(ap->off, extsz) || ap->alen % extsz) 2366 if (do_mod(ap->off, align) || ap->alen % align)
2304 prod = 1; 2367 prod = 1;
2305 /* 2368 /*
2306 * Set ralen to be the actual requested length in rtextents. 2369 * Set ralen to be the actual requested length in rtextents.
@@ -2326,15 +2389,24 @@ xfs_bmap_alloc(
2326 ap->rval = rtx * mp->m_sb.sb_rextsize; 2389 ap->rval = rtx * mp->m_sb.sb_rextsize;
2327 } else 2390 } else
2328 ap->rval = 0; 2391 ap->rval = 0;
2392 } else {
2393 align = (ap->userdata && ap->ip->i_d.di_extsize &&
2394 (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)) ?
2395 ap->ip->i_d.di_extsize : 0;
2396 if (unlikely(align)) {
2397 error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
2398 align, rt,
2399 ap->eof, 0, ap->conv,
2400 &ap->off, &ap->alen);
2401 ASSERT(!error);
2402 ASSERT(ap->alen);
2403 }
2404 if (nullfb)
2405 ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
2406 else
2407 ap->rval = ap->firstblock;
2329 } 2408 }
2330#else 2409
2331 if (rt)
2332 ap->rval = 0;
2333#endif /* __KERNEL__ */
2334 else if (nullfb)
2335 ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
2336 else
2337 ap->rval = ap->firstblock;
2338 /* 2410 /*
2339 * If allocating at eof, and there's a previous real block, 2411 * If allocating at eof, and there's a previous real block,
2340 * try to use it's last block as our starting point. 2412 * try to use it's last block as our starting point.
@@ -2598,11 +2670,12 @@ xfs_bmap_alloc(
2598 args.total = ap->total; 2670 args.total = ap->total;
2599 args.minlen = ap->minlen; 2671 args.minlen = ap->minlen;
2600 } 2672 }
2601 if (ap->ip->i_d.di_extsize) { 2673 if (unlikely(ap->userdata && ap->ip->i_d.di_extsize &&
2674 (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE))) {
2602 args.prod = ap->ip->i_d.di_extsize; 2675 args.prod = ap->ip->i_d.di_extsize;
2603 if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod))) 2676 if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
2604 args.mod = (xfs_extlen_t)(args.prod - args.mod); 2677 args.mod = (xfs_extlen_t)(args.prod - args.mod);
2605 } else if (mp->m_sb.sb_blocksize >= NBPP) { 2678 } else if (unlikely(mp->m_sb.sb_blocksize >= NBPP)) {
2606 args.prod = 1; 2679 args.prod = 1;
2607 args.mod = 0; 2680 args.mod = 0;
2608 } else { 2681 } else {
@@ -3580,14 +3653,16 @@ xfs_bmap_search_extents(
3580 3653
3581 ep = xfs_bmap_do_search_extents(base, lastx, nextents, bno, eofp, 3654 ep = xfs_bmap_do_search_extents(base, lastx, nextents, bno, eofp,
3582 lastxp, gotp, prevp); 3655 lastxp, gotp, prevp);
3583 rt = ip->i_d.di_flags & XFS_DIFLAG_REALTIME; 3656 rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
3584 if(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM)) { 3657 if (unlikely(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM))) {
3585 cmn_err(CE_PANIC,"Access to block zero: fs: <%s> inode: %lld " 3658 cmn_err(CE_PANIC,"Access to block zero: fs: <%s> inode: %lld "
3586 "start_block : %llx start_off : %llx blkcnt : %llx " 3659 "start_block : %llx start_off : %llx blkcnt : %llx "
3587 "extent-state : %x \n", 3660 "extent-state : %x \n",
3588 (ip->i_mount)->m_fsname,(long long)ip->i_ino, 3661 (ip->i_mount)->m_fsname, (long long)ip->i_ino,
3589 gotp->br_startblock, gotp->br_startoff, 3662 (unsigned long long)gotp->br_startblock,
3590 gotp->br_blockcount,gotp->br_state); 3663 (unsigned long long)gotp->br_startoff,
3664 (unsigned long long)gotp->br_blockcount,
3665 gotp->br_state);
3591 } 3666 }
3592 return ep; 3667 return ep;
3593} 3668}
@@ -3875,7 +3950,7 @@ xfs_bmap_add_attrfork(
3875 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size); 3950 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
3876 if (!ip->i_d.di_forkoff) 3951 if (!ip->i_d.di_forkoff)
3877 ip->i_d.di_forkoff = mp->m_attroffset >> 3; 3952 ip->i_d.di_forkoff = mp->m_attroffset >> 3;
3878 else if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR)) 3953 else if (mp->m_flags & XFS_MOUNT_ATTR2)
3879 version = 2; 3954 version = 2;
3880 break; 3955 break;
3881 default: 3956 default:
@@ -4023,13 +4098,13 @@ xfs_bmap_compute_maxlevels(
4023 */ 4098 */
4024 if (whichfork == XFS_DATA_FORK) { 4099 if (whichfork == XFS_DATA_FORK) {
4025 maxleafents = MAXEXTNUM; 4100 maxleafents = MAXEXTNUM;
4026 sz = (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) ? 4101 sz = (mp->m_flags & XFS_MOUNT_ATTR2) ?
4027 mp->m_attroffset : XFS_BMDR_SPACE_CALC(MINDBTPTRS); 4102 XFS_BMDR_SPACE_CALC(MINDBTPTRS) : mp->m_attroffset;
4028 } else { 4103 } else {
4029 maxleafents = MAXAEXTNUM; 4104 maxleafents = MAXAEXTNUM;
4030 sz = (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) ? 4105 sz = (mp->m_flags & XFS_MOUNT_ATTR2) ?
4031 mp->m_sb.sb_inodesize - mp->m_attroffset : 4106 XFS_BMDR_SPACE_CALC(MINABTPTRS) :
4032 XFS_BMDR_SPACE_CALC(MINABTPTRS); 4107 mp->m_sb.sb_inodesize - mp->m_attroffset;
4033 } 4108 }
4034 maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0); 4109 maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0);
4035 minleafrecs = mp->m_bmap_dmnr[0]; 4110 minleafrecs = mp->m_bmap_dmnr[0];
@@ -4418,8 +4493,8 @@ xfs_bmap_read_extents(
4418 num_recs = be16_to_cpu(block->bb_numrecs); 4493 num_recs = be16_to_cpu(block->bb_numrecs);
4419 if (unlikely(i + num_recs > room)) { 4494 if (unlikely(i + num_recs > room)) {
4420 ASSERT(i + num_recs <= room); 4495 ASSERT(i + num_recs <= room);
4421 xfs_fs_cmn_err(CE_WARN, ip->i_mount, 4496 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
4422 "corrupt dinode %Lu, (btree extents). Unmount and run xfs_repair.", 4497 "corrupt dinode %Lu, (btree extents).",
4423 (unsigned long long) ip->i_ino); 4498 (unsigned long long) ip->i_ino);
4424 XFS_ERROR_REPORT("xfs_bmap_read_extents(1)", 4499 XFS_ERROR_REPORT("xfs_bmap_read_extents(1)",
4425 XFS_ERRLEVEL_LOW, 4500 XFS_ERRLEVEL_LOW,
@@ -4590,6 +4665,7 @@ xfs_bmapi(
4590 char contig; /* allocation must be one extent */ 4665 char contig; /* allocation must be one extent */
4591 char delay; /* this request is for delayed alloc */ 4666 char delay; /* this request is for delayed alloc */
4592 char exact; /* don't do all of wasdelayed extent */ 4667 char exact; /* don't do all of wasdelayed extent */
4668 char convert; /* unwritten extent I/O completion */
4593 xfs_bmbt_rec_t *ep; /* extent list entry pointer */ 4669 xfs_bmbt_rec_t *ep; /* extent list entry pointer */
4594 int error; /* error return */ 4670 int error; /* error return */
4595 xfs_bmbt_irec_t got; /* current extent list record */ 4671 xfs_bmbt_irec_t got; /* current extent list record */
@@ -4643,7 +4719,7 @@ xfs_bmapi(
4643 } 4719 }
4644 if (XFS_FORCED_SHUTDOWN(mp)) 4720 if (XFS_FORCED_SHUTDOWN(mp))
4645 return XFS_ERROR(EIO); 4721 return XFS_ERROR(EIO);
4646 rt = XFS_IS_REALTIME_INODE(ip); 4722 rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
4647 ifp = XFS_IFORK_PTR(ip, whichfork); 4723 ifp = XFS_IFORK_PTR(ip, whichfork);
4648 ASSERT(ifp->if_ext_max == 4724 ASSERT(ifp->if_ext_max ==
4649 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); 4725 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
@@ -4654,6 +4730,7 @@ xfs_bmapi(
4654 delay = (flags & XFS_BMAPI_DELAY) != 0; 4730 delay = (flags & XFS_BMAPI_DELAY) != 0;
4655 trim = (flags & XFS_BMAPI_ENTIRE) == 0; 4731 trim = (flags & XFS_BMAPI_ENTIRE) == 0;
4656 userdata = (flags & XFS_BMAPI_METADATA) == 0; 4732 userdata = (flags & XFS_BMAPI_METADATA) == 0;
4733 convert = (flags & XFS_BMAPI_CONVERT) != 0;
4657 exact = (flags & XFS_BMAPI_EXACT) != 0; 4734 exact = (flags & XFS_BMAPI_EXACT) != 0;
4658 rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0; 4735 rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0;
4659 contig = (flags & XFS_BMAPI_CONTIG) != 0; 4736 contig = (flags & XFS_BMAPI_CONTIG) != 0;
@@ -4748,15 +4825,25 @@ xfs_bmapi(
4748 } 4825 }
4749 minlen = contig ? alen : 1; 4826 minlen = contig ? alen : 1;
4750 if (delay) { 4827 if (delay) {
4751 xfs_extlen_t extsz = 0; 4828 xfs_extlen_t extsz;
4752 4829
4753 /* Figure out the extent size, adjust alen */ 4830 /* Figure out the extent size, adjust alen */
4754 if (rt) { 4831 if (rt) {
4755 if (!(extsz = ip->i_d.di_extsize)) 4832 if (!(extsz = ip->i_d.di_extsize))
4756 extsz = mp->m_sb.sb_rextsize; 4833 extsz = mp->m_sb.sb_rextsize;
4757 alen = roundup(alen, extsz); 4834 } else {
4758 extsz = alen / mp->m_sb.sb_rextsize; 4835 extsz = ip->i_d.di_extsize;
4759 } 4836 }
4837 if (extsz) {
4838 error = xfs_bmap_extsize_align(mp,
4839 &got, &prev, extsz,
4840 rt, eof, delay, convert,
4841 &aoff, &alen);
4842 ASSERT(!error);
4843 }
4844
4845 if (rt)
4846 extsz = alen / mp->m_sb.sb_rextsize;
4760 4847
4761 /* 4848 /*
4762 * Make a transaction-less quota reservation for 4849 * Make a transaction-less quota reservation for
@@ -4785,32 +4872,33 @@ xfs_bmapi(
4785 xfs_bmap_worst_indlen(ip, alen); 4872 xfs_bmap_worst_indlen(ip, alen);
4786 ASSERT(indlen > 0); 4873 ASSERT(indlen > 0);
4787 4874
4788 if (rt) 4875 if (rt) {
4789 error = xfs_mod_incore_sb(mp, 4876 error = xfs_mod_incore_sb(mp,
4790 XFS_SBS_FREXTENTS, 4877 XFS_SBS_FREXTENTS,
4791 -(extsz), rsvd); 4878 -(extsz), rsvd);
4792 else 4879 } else {
4793 error = xfs_mod_incore_sb(mp, 4880 error = xfs_mod_incore_sb(mp,
4794 XFS_SBS_FDBLOCKS, 4881 XFS_SBS_FDBLOCKS,
4795 -(alen), rsvd); 4882 -(alen), rsvd);
4883 }
4796 if (!error) { 4884 if (!error) {
4797 error = xfs_mod_incore_sb(mp, 4885 error = xfs_mod_incore_sb(mp,
4798 XFS_SBS_FDBLOCKS, 4886 XFS_SBS_FDBLOCKS,
4799 -(indlen), rsvd); 4887 -(indlen), rsvd);
4800 if (error && rt) { 4888 if (error && rt)
4801 xfs_mod_incore_sb(ip->i_mount, 4889 xfs_mod_incore_sb(mp,
4802 XFS_SBS_FREXTENTS, 4890 XFS_SBS_FREXTENTS,
4803 extsz, rsvd); 4891 extsz, rsvd);
4804 } else if (error) { 4892 else if (error)
4805 xfs_mod_incore_sb(ip->i_mount, 4893 xfs_mod_incore_sb(mp,
4806 XFS_SBS_FDBLOCKS, 4894 XFS_SBS_FDBLOCKS,
4807 alen, rsvd); 4895 alen, rsvd);
4808 }
4809 } 4896 }
4810 4897
4811 if (error) { 4898 if (error) {
4812 if (XFS_IS_QUOTA_ON(ip->i_mount)) 4899 if (XFS_IS_QUOTA_ON(mp))
4813 /* unreserve the blocks now */ 4900 /* unreserve the blocks now */
4901 (void)
4814 XFS_TRANS_UNRESERVE_QUOTA_NBLKS( 4902 XFS_TRANS_UNRESERVE_QUOTA_NBLKS(
4815 mp, NULL, ip, 4903 mp, NULL, ip,
4816 (long)alen, 0, rt ? 4904 (long)alen, 0, rt ?
@@ -4849,6 +4937,7 @@ xfs_bmapi(
4849 bma.firstblock = *firstblock; 4937 bma.firstblock = *firstblock;
4850 bma.alen = alen; 4938 bma.alen = alen;
4851 bma.off = aoff; 4939 bma.off = aoff;
4940 bma.conv = convert;
4852 bma.wasdel = wasdelay; 4941 bma.wasdel = wasdelay;
4853 bma.minlen = minlen; 4942 bma.minlen = minlen;
4854 bma.low = flist->xbf_low; 4943 bma.low = flist->xbf_low;
@@ -5270,8 +5359,7 @@ xfs_bunmapi(
5270 return 0; 5359 return 0;
5271 } 5360 }
5272 XFS_STATS_INC(xs_blk_unmap); 5361 XFS_STATS_INC(xs_blk_unmap);
5273 isrt = (whichfork == XFS_DATA_FORK) && 5362 isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
5274 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME);
5275 start = bno; 5363 start = bno;
5276 bno = start + len - 1; 5364 bno = start + len - 1;
5277 ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, 5365 ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
@@ -5443,7 +5531,7 @@ xfs_bunmapi(
5443 } 5531 }
5444 if (wasdel) { 5532 if (wasdel) {
5445 ASSERT(STARTBLOCKVAL(del.br_startblock) > 0); 5533 ASSERT(STARTBLOCKVAL(del.br_startblock) > 0);
5446 /* Update realtim/data freespace, unreserve quota */ 5534 /* Update realtime/data freespace, unreserve quota */
5447 if (isrt) { 5535 if (isrt) {
5448 xfs_filblks_t rtexts; 5536 xfs_filblks_t rtexts;
5449 5537
@@ -5451,14 +5539,14 @@ xfs_bunmapi(
5451 do_div(rtexts, mp->m_sb.sb_rextsize); 5539 do_div(rtexts, mp->m_sb.sb_rextsize);
5452 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, 5540 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
5453 (int)rtexts, rsvd); 5541 (int)rtexts, rsvd);
5454 XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip, 5542 (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
5455 -((long)del.br_blockcount), 0, 5543 NULL, ip, -((long)del.br_blockcount), 0,
5456 XFS_QMOPT_RES_RTBLKS); 5544 XFS_QMOPT_RES_RTBLKS);
5457 } else { 5545 } else {
5458 xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, 5546 xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
5459 (int)del.br_blockcount, rsvd); 5547 (int)del.br_blockcount, rsvd);
5460 XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip, 5548 (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
5461 -((long)del.br_blockcount), 0, 5549 NULL, ip, -((long)del.br_blockcount), 0,
5462 XFS_QMOPT_RES_REGBLKS); 5550 XFS_QMOPT_RES_REGBLKS);
5463 } 5551 }
5464 ip->i_delayed_blks -= del.br_blockcount; 5552 ip->i_delayed_blks -= del.br_blockcount;
@@ -5652,7 +5740,9 @@ xfs_getbmap(
5652 ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) 5740 ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
5653 return XFS_ERROR(EINVAL); 5741 return XFS_ERROR(EINVAL);
5654 if (whichfork == XFS_DATA_FORK) { 5742 if (whichfork == XFS_DATA_FORK) {
5655 if (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC) { 5743 if ((ip->i_d.di_extsize && (ip->i_d.di_flags &
5744 (XFS_DIFLAG_REALTIME|XFS_DIFLAG_EXTSIZE))) ||
5745 ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
5656 prealloced = 1; 5746 prealloced = 1;
5657 fixlen = XFS_MAXIOFFSET(mp); 5747 fixlen = XFS_MAXIOFFSET(mp);
5658 } else { 5748 } else {
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 2e0717a01309..12cc63dfc2c4 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -62,6 +62,10 @@ typedef struct xfs_bmap_free
62#define XFS_BMAPI_IGSTATE 0x200 /* Ignore state - */ 62#define XFS_BMAPI_IGSTATE 0x200 /* Ignore state - */
63 /* combine contig. space */ 63 /* combine contig. space */
64#define XFS_BMAPI_CONTIG 0x400 /* must allocate only one extent */ 64#define XFS_BMAPI_CONTIG 0x400 /* must allocate only one extent */
65/* XFS_BMAPI_DIRECT_IO 0x800 */
66#define XFS_BMAPI_CONVERT 0x1000 /* unwritten extent conversion - */
67 /* need write cache flushing and no */
68 /* additional allocation alignments */
65 69
66#define XFS_BMAPI_AFLAG(w) xfs_bmapi_aflag(w) 70#define XFS_BMAPI_AFLAG(w) xfs_bmapi_aflag(w)
67static inline int xfs_bmapi_aflag(int w) 71static inline int xfs_bmapi_aflag(int w)
@@ -101,7 +105,8 @@ typedef struct xfs_bmalloca {
101 char wasdel; /* replacing a delayed allocation */ 105 char wasdel; /* replacing a delayed allocation */
102 char userdata;/* set if is user data */ 106 char userdata;/* set if is user data */
103 char low; /* low on space, using seq'l ags */ 107 char low; /* low on space, using seq'l ags */
104 char aeof; /* allocated space at eof */ 108 char aeof; /* allocated space at eof */
109 char conv; /* overwriting unwritten extents */
105} xfs_bmalloca_t; 110} xfs_bmalloca_t;
106 111
107#ifdef __KERNEL__ 112#ifdef __KERNEL__
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index 328a528b926d..f57cc9ac875e 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -57,7 +57,7 @@ struct xfs_mount_args {
57/* 57/*
58 * XFS mount option flags -- args->flags1 58 * XFS mount option flags -- args->flags1
59 */ 59 */
60#define XFSMNT_COMPAT_ATTR 0x00000001 /* do not use ATTR2 format */ 60#define XFSMNT_ATTR2 0x00000001 /* allow ATTR2 EA format */
61#define XFSMNT_WSYNC 0x00000002 /* safe mode nfs mount 61#define XFSMNT_WSYNC 0x00000002 /* safe mode nfs mount
62 * compatible */ 62 * compatible */
63#define XFSMNT_INO64 0x00000004 /* move inode numbers up 63#define XFSMNT_INO64 0x00000004 /* move inode numbers up
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 070259a4254c..c6191d00ad27 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -60,8 +60,6 @@ xfs_swapext(
60 xfs_bstat_t *sbp; 60 xfs_bstat_t *sbp;
61 struct file *fp = NULL, *tfp = NULL; 61 struct file *fp = NULL, *tfp = NULL;
62 vnode_t *vp, *tvp; 62 vnode_t *vp, *tvp;
63 bhv_desc_t *bdp, *tbdp;
64 vn_bhv_head_t *bhp, *tbhp;
65 static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; 63 static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL;
66 int ilf_fields, tilf_fields; 64 int ilf_fields, tilf_fields;
67 int error = 0; 65 int error = 0;
@@ -90,13 +88,10 @@ xfs_swapext(
90 goto error0; 88 goto error0;
91 } 89 }
92 90
93 bhp = VN_BHV_HEAD(vp); 91 ip = xfs_vtoi(vp);
94 bdp = vn_bhv_lookup(bhp, &xfs_vnodeops); 92 if (ip == NULL) {
95 if (bdp == NULL) {
96 error = XFS_ERROR(EBADF); 93 error = XFS_ERROR(EBADF);
97 goto error0; 94 goto error0;
98 } else {
99 ip = XFS_BHVTOI(bdp);
100 } 95 }
101 96
102 if (((tfp = fget((int)sxp->sx_fdtmp)) == NULL) || 97 if (((tfp = fget((int)sxp->sx_fdtmp)) == NULL) ||
@@ -105,13 +100,10 @@ xfs_swapext(
105 goto error0; 100 goto error0;
106 } 101 }
107 102
108 tbhp = VN_BHV_HEAD(tvp); 103 tip = xfs_vtoi(tvp);
109 tbdp = vn_bhv_lookup(tbhp, &xfs_vnodeops); 104 if (tip == NULL) {
110 if (tbdp == NULL) {
111 error = XFS_ERROR(EBADF); 105 error = XFS_ERROR(EBADF);
112 goto error0; 106 goto error0;
113 } else {
114 tip = XFS_BHVTOI(tbdp);
115 } 107 }
116 108
117 if (ip->i_mount != tip->i_mount) { 109 if (ip->i_mount != tip->i_mount) {
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index c5a0e537ff1a..79d0d9e1fbab 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -199,10 +199,16 @@ typedef enum xfs_dinode_fmt
199 199
200#define XFS_DFORK_DSIZE(dip,mp) \ 200#define XFS_DFORK_DSIZE(dip,mp) \
201 XFS_CFORK_DSIZE_DISK(&(dip)->di_core, mp) 201 XFS_CFORK_DSIZE_DISK(&(dip)->di_core, mp)
202#define XFS_DFORK_DSIZE_HOST(dip,mp) \
203 XFS_CFORK_DSIZE(&(dip)->di_core, mp)
202#define XFS_DFORK_ASIZE(dip,mp) \ 204#define XFS_DFORK_ASIZE(dip,mp) \
203 XFS_CFORK_ASIZE_DISK(&(dip)->di_core, mp) 205 XFS_CFORK_ASIZE_DISK(&(dip)->di_core, mp)
206#define XFS_DFORK_ASIZE_HOST(dip,mp) \
207 XFS_CFORK_ASIZE(&(dip)->di_core, mp)
204#define XFS_DFORK_SIZE(dip,mp,w) \ 208#define XFS_DFORK_SIZE(dip,mp,w) \
205 XFS_CFORK_SIZE_DISK(&(dip)->di_core, mp, w) 209 XFS_CFORK_SIZE_DISK(&(dip)->di_core, mp, w)
210#define XFS_DFORK_SIZE_HOST(dip,mp,w) \
211 XFS_CFORK_SIZE(&(dip)->di_core, mp, w)
206 212
207#define XFS_DFORK_Q(dip) XFS_CFORK_Q_DISK(&(dip)->di_core) 213#define XFS_DFORK_Q(dip) XFS_CFORK_Q_DISK(&(dip)->di_core)
208#define XFS_DFORK_BOFF(dip) XFS_CFORK_BOFF_DISK(&(dip)->di_core) 214#define XFS_DFORK_BOFF(dip) XFS_CFORK_BOFF_DISK(&(dip)->di_core)
@@ -216,6 +222,7 @@ typedef enum xfs_dinode_fmt
216#define XFS_CFORK_FMT_SET(dcp,w,n) \ 222#define XFS_CFORK_FMT_SET(dcp,w,n) \
217 ((w) == XFS_DATA_FORK ? \ 223 ((w) == XFS_DATA_FORK ? \
218 ((dcp)->di_format = (n)) : ((dcp)->di_aformat = (n))) 224 ((dcp)->di_format = (n)) : ((dcp)->di_aformat = (n)))
225#define XFS_DFORK_FORMAT(dip,w) XFS_CFORK_FORMAT(&(dip)->di_core, w)
219 226
220#define XFS_CFORK_NEXTENTS_DISK(dcp,w) \ 227#define XFS_CFORK_NEXTENTS_DISK(dcp,w) \
221 ((w) == XFS_DATA_FORK ? \ 228 ((w) == XFS_DATA_FORK ? \
@@ -223,13 +230,13 @@ typedef enum xfs_dinode_fmt
223 INT_GET((dcp)->di_anextents, ARCH_CONVERT)) 230 INT_GET((dcp)->di_anextents, ARCH_CONVERT))
224#define XFS_CFORK_NEXTENTS(dcp,w) \ 231#define XFS_CFORK_NEXTENTS(dcp,w) \
225 ((w) == XFS_DATA_FORK ? (dcp)->di_nextents : (dcp)->di_anextents) 232 ((w) == XFS_DATA_FORK ? (dcp)->di_nextents : (dcp)->di_anextents)
233#define XFS_DFORK_NEXTENTS(dip,w) XFS_CFORK_NEXTENTS_DISK(&(dip)->di_core, w)
234#define XFS_DFORK_NEXTENTS_HOST(dip,w) XFS_CFORK_NEXTENTS(&(dip)->di_core, w)
226 235
227#define XFS_CFORK_NEXT_SET(dcp,w,n) \ 236#define XFS_CFORK_NEXT_SET(dcp,w,n) \
228 ((w) == XFS_DATA_FORK ? \ 237 ((w) == XFS_DATA_FORK ? \
229 ((dcp)->di_nextents = (n)) : ((dcp)->di_anextents = (n))) 238 ((dcp)->di_nextents = (n)) : ((dcp)->di_anextents = (n)))
230 239
231#define XFS_DFORK_NEXTENTS(dip,w) XFS_CFORK_NEXTENTS_DISK(&(dip)->di_core, w)
232
233#define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)XFS_BUF_PTR(bp)) 240#define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)XFS_BUF_PTR(bp))
234 241
235/* 242/*
@@ -246,8 +253,10 @@ typedef enum xfs_dinode_fmt
246#define XFS_DIFLAG_NOATIME_BIT 6 /* do not update atime */ 253#define XFS_DIFLAG_NOATIME_BIT 6 /* do not update atime */
247#define XFS_DIFLAG_NODUMP_BIT 7 /* do not dump */ 254#define XFS_DIFLAG_NODUMP_BIT 7 /* do not dump */
248#define XFS_DIFLAG_RTINHERIT_BIT 8 /* create with realtime bit set */ 255#define XFS_DIFLAG_RTINHERIT_BIT 8 /* create with realtime bit set */
249#define XFS_DIFLAG_PROJINHERIT_BIT 9 /* create with parents projid */ 256#define XFS_DIFLAG_PROJINHERIT_BIT 9 /* create with parents projid */
250#define XFS_DIFLAG_NOSYMLINKS_BIT 10 /* disallow symlink creation */ 257#define XFS_DIFLAG_NOSYMLINKS_BIT 10 /* disallow symlink creation */
258#define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */
259#define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */
251#define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) 260#define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT)
252#define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) 261#define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT)
253#define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT) 262#define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT)
@@ -259,11 +268,14 @@ typedef enum xfs_dinode_fmt
259#define XFS_DIFLAG_RTINHERIT (1 << XFS_DIFLAG_RTINHERIT_BIT) 268#define XFS_DIFLAG_RTINHERIT (1 << XFS_DIFLAG_RTINHERIT_BIT)
260#define XFS_DIFLAG_PROJINHERIT (1 << XFS_DIFLAG_PROJINHERIT_BIT) 269#define XFS_DIFLAG_PROJINHERIT (1 << XFS_DIFLAG_PROJINHERIT_BIT)
261#define XFS_DIFLAG_NOSYMLINKS (1 << XFS_DIFLAG_NOSYMLINKS_BIT) 270#define XFS_DIFLAG_NOSYMLINKS (1 << XFS_DIFLAG_NOSYMLINKS_BIT)
271#define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT)
272#define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
262 273
263#define XFS_DIFLAG_ANY \ 274#define XFS_DIFLAG_ANY \
264 (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \ 275 (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
265 XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \ 276 XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
266 XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \ 277 XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
267 XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS) 278 XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
279 XFS_DIFLAG_EXTSZINHERIT)
268 280
269#endif /* __XFS_DINODE_H__ */ 281#endif /* __XFS_DINODE_H__ */
diff --git a/fs/xfs/xfs_dir.c b/fs/xfs/xfs_dir.c
index 3dd30391f551..bb87d2a700a9 100644
--- a/fs/xfs/xfs_dir.c
+++ b/fs/xfs/xfs_dir.c
@@ -176,7 +176,7 @@ xfs_dir_mount(xfs_mount_t *mp)
176 uint shortcount, leafcount, count; 176 uint shortcount, leafcount, count;
177 177
178 mp->m_dirversion = 1; 178 mp->m_dirversion = 1;
179 if (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) { 179 if (!(mp->m_flags & XFS_MOUNT_ATTR2)) {
180 shortcount = (mp->m_attroffset - 180 shortcount = (mp->m_attroffset -
181 (uint)sizeof(xfs_dir_sf_hdr_t)) / 181 (uint)sizeof(xfs_dir_sf_hdr_t)) /
182 (uint)sizeof(xfs_dir_sf_entry_t); 182 (uint)sizeof(xfs_dir_sf_entry_t);
diff --git a/fs/xfs/xfs_dir.h b/fs/xfs/xfs_dir.h
index 488defe86ba6..8cc8afb9f6c0 100644
--- a/fs/xfs/xfs_dir.h
+++ b/fs/xfs/xfs_dir.h
@@ -135,6 +135,8 @@ void xfs_dir_startup(void); /* called exactly once */
135 ((mp)->m_dirops.xd_shortform_to_single(args)) 135 ((mp)->m_dirops.xd_shortform_to_single(args))
136 136
137#define XFS_DIR_IS_V1(mp) ((mp)->m_dirversion == 1) 137#define XFS_DIR_IS_V1(mp) ((mp)->m_dirversion == 1)
138#define XFS_DIR_IS_V2(mp) ((mp)->m_dirversion == 2)
138extern xfs_dirops_t xfsv1_dirops; 139extern xfs_dirops_t xfsv1_dirops;
140extern xfs_dirops_t xfsv2_dirops;
139 141
140#endif /* __XFS_DIR_H__ */ 142#endif /* __XFS_DIR_H__ */
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 7e24ffeda9e1..3158f5dc431f 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -72,9 +72,6 @@ typedef struct xfs_dir2_put_args {
72 struct uio *uio; /* uio control structure */ 72 struct uio *uio; /* uio control structure */
73} xfs_dir2_put_args_t; 73} xfs_dir2_put_args_t;
74 74
75#define XFS_DIR_IS_V2(mp) ((mp)->m_dirversion == 2)
76extern xfs_dirops_t xfsv2_dirops;
77
78/* 75/*
79 * Other interfaces used by the rest of the dir v2 code. 76 * Other interfaces used by the rest of the dir v2 code.
80 */ 77 */
diff --git a/fs/xfs/xfs_dir_leaf.h b/fs/xfs/xfs_dir_leaf.h
index ab6b09eef9ab..eb8cd9a4667f 100644
--- a/fs/xfs/xfs_dir_leaf.h
+++ b/fs/xfs/xfs_dir_leaf.h
@@ -67,34 +67,38 @@ struct xfs_trans;
67 */ 67 */
68#define XFS_DIR_LEAF_MAPSIZE 3 /* how many freespace slots */ 68#define XFS_DIR_LEAF_MAPSIZE 3 /* how many freespace slots */
69 69
70typedef struct xfs_dir_leaf_map { /* RLE map of free bytes */
71 __uint16_t base; /* base of free region */
72 __uint16_t size; /* run length of free region */
73} xfs_dir_leaf_map_t;
74
75typedef struct xfs_dir_leaf_hdr { /* constant-structure header block */
76 xfs_da_blkinfo_t info; /* block type, links, etc. */
77 __uint16_t count; /* count of active leaf_entry's */
78 __uint16_t namebytes; /* num bytes of name strings stored */
79 __uint16_t firstused; /* first used byte in name area */
80 __uint8_t holes; /* != 0 if blk needs compaction */
81 __uint8_t pad1;
82 xfs_dir_leaf_map_t freemap[XFS_DIR_LEAF_MAPSIZE];
83} xfs_dir_leaf_hdr_t;
84
85typedef struct xfs_dir_leaf_entry { /* sorted on key, not name */
86 xfs_dahash_t hashval; /* hash value of name */
87 __uint16_t nameidx; /* index into buffer of name */
88 __uint8_t namelen; /* length of name string */
89 __uint8_t pad2;
90} xfs_dir_leaf_entry_t;
91
92typedef struct xfs_dir_leaf_name {
93 xfs_dir_ino_t inumber; /* inode number for this key */
94 __uint8_t name[1]; /* name string itself */
95} xfs_dir_leaf_name_t;
96
70typedef struct xfs_dir_leafblock { 97typedef struct xfs_dir_leafblock {
71 struct xfs_dir_leaf_hdr { /* constant-structure header block */ 98 xfs_dir_leaf_hdr_t hdr; /* constant-structure header block */
72 xfs_da_blkinfo_t info; /* block type, links, etc. */ 99 xfs_dir_leaf_entry_t entries[1]; /* var sized array */
73 __uint16_t count; /* count of active leaf_entry's */ 100 xfs_dir_leaf_name_t namelist[1]; /* grows from bottom of buf */
74 __uint16_t namebytes; /* num bytes of name strings stored */
75 __uint16_t firstused; /* first used byte in name area */
76 __uint8_t holes; /* != 0 if blk needs compaction */
77 __uint8_t pad1;
78 struct xfs_dir_leaf_map {/* RLE map of free bytes */
79 __uint16_t base; /* base of free region */
80 __uint16_t size; /* run length of free region */
81 } freemap[XFS_DIR_LEAF_MAPSIZE]; /* N largest free regions */
82 } hdr;
83 struct xfs_dir_leaf_entry { /* sorted on key, not name */
84 xfs_dahash_t hashval; /* hash value of name */
85 __uint16_t nameidx; /* index into buffer of name */
86 __uint8_t namelen; /* length of name string */
87 __uint8_t pad2;
88 } entries[1]; /* var sized array */
89 struct xfs_dir_leaf_name {
90 xfs_dir_ino_t inumber; /* inode number for this key */
91 __uint8_t name[1]; /* name string itself */
92 } namelist[1]; /* grows from bottom of buf */
93} xfs_dir_leafblock_t; 101} xfs_dir_leafblock_t;
94typedef struct xfs_dir_leaf_hdr xfs_dir_leaf_hdr_t;
95typedef struct xfs_dir_leaf_map xfs_dir_leaf_map_t;
96typedef struct xfs_dir_leaf_entry xfs_dir_leaf_entry_t;
97typedef struct xfs_dir_leaf_name xfs_dir_leaf_name_t;
98 102
99/* 103/*
100 * Length of name for which a 512-byte block filesystem 104 * Length of name for which a 512-byte block filesystem
@@ -126,11 +130,10 @@ typedef union {
126#define XFS_PUT_COOKIE(c,mp,bno,entry,hash) \ 130#define XFS_PUT_COOKIE(c,mp,bno,entry,hash) \
127 ((c).s.be = XFS_DA_MAKE_BNOENTRY(mp, bno, entry), (c).s.h = (hash)) 131 ((c).s.be = XFS_DA_MAKE_BNOENTRY(mp, bno, entry), (c).s.h = (hash))
128 132
129typedef struct xfs_dir_put_args 133typedef struct xfs_dir_put_args {
130{
131 xfs_dircook_t cook; /* cookie of (next) entry */ 134 xfs_dircook_t cook; /* cookie of (next) entry */
132 xfs_intino_t ino; /* inode number */ 135 xfs_intino_t ino; /* inode number */
133 struct xfs_dirent *dbp; /* buffer pointer */ 136 struct xfs_dirent *dbp; /* buffer pointer */
134 char *name; /* directory entry name */ 137 char *name; /* directory entry name */
135 int namelen; /* length of name */ 138 int namelen; /* length of name */
136 int done; /* output: set if value was stored */ 139 int done; /* output: set if value was stored */
@@ -138,7 +141,8 @@ typedef struct xfs_dir_put_args
138 struct uio *uio; /* uio control structure */ 141 struct uio *uio; /* uio control structure */
139} xfs_dir_put_args_t; 142} xfs_dir_put_args_t;
140 143
141#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len) xfs_dir_leaf_entsize_byname(len) 144#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len) \
145 xfs_dir_leaf_entsize_byname(len)
142static inline int xfs_dir_leaf_entsize_byname(int len) 146static inline int xfs_dir_leaf_entsize_byname(int len)
143{ 147{
144 return (uint)sizeof(xfs_dir_leaf_name_t)-1 + len; 148 return (uint)sizeof(xfs_dir_leaf_name_t)-1 + len;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index d7b6b5d16704..2a21c5024017 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -54,7 +54,6 @@ xfs_error_trap(int e)
54 if (e != xfs_etrap[i]) 54 if (e != xfs_etrap[i])
55 continue; 55 continue;
56 cmn_err(CE_NOTE, "xfs_error_trap: error %d", e); 56 cmn_err(CE_NOTE, "xfs_error_trap: error %d", e);
57 debug_stop_all_cpus((void *)-1LL);
58 BUG(); 57 BUG();
59 break; 58 break;
60 } 59 }
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 06d8a8426c16..26b8e709a569 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -18,9 +18,6 @@
18#ifndef __XFS_ERROR_H__ 18#ifndef __XFS_ERROR_H__
19#define __XFS_ERROR_H__ 19#define __XFS_ERROR_H__
20 20
21#define prdev(fmt,targ,args...) \
22 printk("XFS: device %s - " fmt "\n", XFS_BUFTARG_NAME(targ), ## args)
23
24#define XFS_ERECOVER 1 /* Failure to recover log */ 21#define XFS_ERECOVER 1 /* Failure to recover log */
25#define XFS_ELOGSTAT 2 /* Failure to stat log in user space */ 22#define XFS_ELOGSTAT 2 /* Failure to stat log in user space */
26#define XFS_ENOLOGSPACE 3 /* Reservation too large */ 23#define XFS_ENOLOGSPACE 3 /* Reservation too large */
@@ -182,8 +179,11 @@ extern int xfs_errortag_clearall_umount(int64_t fsid, char *fsname, int loud);
182struct xfs_mount; 179struct xfs_mount;
183/* PRINTFLIKE4 */ 180/* PRINTFLIKE4 */
184extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp, 181extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp,
185 char *fmt, ...); 182 char *fmt, ...);
186/* PRINTFLIKE3 */ 183/* PRINTFLIKE3 */
187extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...); 184extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...);
188 185
186#define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \
187 xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args)
188
189#endif /* __XFS_ERROR_H__ */ 189#endif /* __XFS_ERROR_H__ */
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index ba096f80f48d..14010f1fa82f 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -3,15 +3,15 @@
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU Lesser General Public License
7 * published by the Free Software Foundation. 7 * as published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU Lesser General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
@@ -65,6 +65,8 @@ struct fsxattr {
65#define XFS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */ 65#define XFS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */
66#define XFS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */ 66#define XFS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */
67#define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */ 67#define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */
68#define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
69#define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
68#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ 70#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
69 71
70/* 72/*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index d1236d6f4045..163031c1e394 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -540,6 +540,32 @@ xfs_reserve_blocks(
540 return(0); 540 return(0);
541} 541}
542 542
543void
544xfs_fs_log_dummy(xfs_mount_t *mp)
545{
546 xfs_trans_t *tp;
547 xfs_inode_t *ip;
548
549
550 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
551 atomic_inc(&mp->m_active_trans);
552 if (xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0)) {
553 xfs_trans_cancel(tp, 0);
554 return;
555 }
556
557 ip = mp->m_rootip;
558 xfs_ilock(ip, XFS_ILOCK_EXCL);
559
560 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
561 xfs_trans_ihold(tp, ip);
562 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
563 xfs_trans_set_sync(tp);
564 xfs_trans_commit(tp, 0, NULL);
565
566 xfs_iunlock(ip, XFS_ILOCK_EXCL);
567}
568
543int 569int
544xfs_fs_goingdown( 570xfs_fs_goingdown(
545 xfs_mount_t *mp, 571 xfs_mount_t *mp,
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
index f32713f14f9a..300d0c9d61ad 100644
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -25,5 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
25extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, 25extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
26 xfs_fsop_resblks_t *outval); 26 xfs_fsop_resblks_t *outval);
27extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); 27extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
28extern void xfs_fs_log_dummy(xfs_mount_t *mp);
28 29
29#endif /* __XFS_FSOPS_H__ */ 30#endif /* __XFS_FSOPS_H__ */
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index fc19eedbd11b..8e380a1fb79b 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -493,7 +493,6 @@ xfs_iget(
493 493
494retry: 494retry:
495 if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) { 495 if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) {
496 bhv_desc_t *bdp;
497 xfs_inode_t *ip; 496 xfs_inode_t *ip;
498 497
499 vp = LINVFS_GET_VP(inode); 498 vp = LINVFS_GET_VP(inode);
@@ -517,14 +516,12 @@ retry:
517 * to wait for the inode to go away. 516 * to wait for the inode to go away.
518 */ 517 */
519 if (is_bad_inode(inode) || 518 if (is_bad_inode(inode) ||
520 ((bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), 519 ((ip = xfs_vtoi(vp)) == NULL)) {
521 &xfs_vnodeops)) == NULL)) {
522 iput(inode); 520 iput(inode);
523 delay(1); 521 delay(1);
524 goto retry; 522 goto retry;
525 } 523 }
526 524
527 ip = XFS_BHVTOI(bdp);
528 if (lock_flags != 0) 525 if (lock_flags != 0)
529 xfs_ilock(ip, lock_flags); 526 xfs_ilock(ip, lock_flags);
530 XFS_STATS_INC(xs_ig_found); 527 XFS_STATS_INC(xs_ig_found);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index df0d4572d70a..1d7f5a7e063e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -404,9 +404,8 @@ xfs_iformat(
404 INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) + 404 INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) +
405 INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) > 405 INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) >
406 INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT))) { 406 INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT))) {
407 xfs_fs_cmn_err(CE_WARN, ip->i_mount, 407 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
408 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu." 408 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
409 " Unmount and run xfs_repair.",
410 (unsigned long long)ip->i_ino, 409 (unsigned long long)ip->i_ino,
411 (int)(INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) 410 (int)(INT_GET(dip->di_core.di_nextents, ARCH_CONVERT)
412 + INT_GET(dip->di_core.di_anextents, ARCH_CONVERT)), 411 + INT_GET(dip->di_core.di_anextents, ARCH_CONVERT)),
@@ -418,9 +417,8 @@ xfs_iformat(
418 } 417 }
419 418
420 if (unlikely(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT) > ip->i_mount->m_sb.sb_inodesize)) { 419 if (unlikely(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT) > ip->i_mount->m_sb.sb_inodesize)) {
421 xfs_fs_cmn_err(CE_WARN, ip->i_mount, 420 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
422 "corrupt dinode %Lu, forkoff = 0x%x." 421 "corrupt dinode %Lu, forkoff = 0x%x.",
423 " Unmount and run xfs_repair.",
424 (unsigned long long)ip->i_ino, 422 (unsigned long long)ip->i_ino,
425 (int)(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT))); 423 (int)(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT)));
426 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, 424 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
@@ -451,8 +449,9 @@ xfs_iformat(
451 * no local regular files yet 449 * no local regular files yet
452 */ 450 */
453 if (unlikely((INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & S_IFMT) == S_IFREG)) { 451 if (unlikely((INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & S_IFMT) == S_IFREG)) {
454 xfs_fs_cmn_err(CE_WARN, ip->i_mount, 452 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
455 "corrupt inode (local format for regular file) %Lu. Unmount and run xfs_repair.", 453 "corrupt inode %Lu "
454 "(local format for regular file).",
456 (unsigned long long) ip->i_ino); 455 (unsigned long long) ip->i_ino);
457 XFS_CORRUPTION_ERROR("xfs_iformat(4)", 456 XFS_CORRUPTION_ERROR("xfs_iformat(4)",
458 XFS_ERRLEVEL_LOW, 457 XFS_ERRLEVEL_LOW,
@@ -462,8 +461,9 @@ xfs_iformat(
462 461
463 di_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT); 462 di_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT);
464 if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { 463 if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
465 xfs_fs_cmn_err(CE_WARN, ip->i_mount, 464 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
466 "corrupt inode %Lu (bad size %Ld for local inode). Unmount and run xfs_repair.", 465 "corrupt inode %Lu "
466 "(bad size %Ld for local inode).",
467 (unsigned long long) ip->i_ino, 467 (unsigned long long) ip->i_ino,
468 (long long) di_size); 468 (long long) di_size);
469 XFS_CORRUPTION_ERROR("xfs_iformat(5)", 469 XFS_CORRUPTION_ERROR("xfs_iformat(5)",
@@ -551,8 +551,9 @@ xfs_iformat_local(
551 * kmem_alloc() or memcpy() below. 551 * kmem_alloc() or memcpy() below.
552 */ 552 */
553 if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 553 if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
554 xfs_fs_cmn_err(CE_WARN, ip->i_mount, 554 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
555 "corrupt inode %Lu (bad size %d for local fork, size = %d). Unmount and run xfs_repair.", 555 "corrupt inode %Lu "
556 "(bad size %d for local fork, size = %d).",
556 (unsigned long long) ip->i_ino, size, 557 (unsigned long long) ip->i_ino, size,
557 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); 558 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
558 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, 559 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
@@ -610,8 +611,8 @@ xfs_iformat_extents(
610 * kmem_alloc() or memcpy() below. 611 * kmem_alloc() or memcpy() below.
611 */ 612 */
612 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 613 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
613 xfs_fs_cmn_err(CE_WARN, ip->i_mount, 614 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
614 "corrupt inode %Lu ((a)extents = %d). Unmount and run xfs_repair.", 615 "corrupt inode %Lu ((a)extents = %d).",
615 (unsigned long long) ip->i_ino, nex); 616 (unsigned long long) ip->i_ino, nex);
616 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, 617 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
617 ip->i_mount, dip); 618 ip->i_mount, dip);
@@ -692,8 +693,8 @@ xfs_iformat_btree(
692 || XFS_BMDR_SPACE_CALC(nrecs) > 693 || XFS_BMDR_SPACE_CALC(nrecs) >
693 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) 694 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)
694 || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { 695 || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
695 xfs_fs_cmn_err(CE_WARN, ip->i_mount, 696 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
696 "corrupt inode %Lu (btree). Unmount and run xfs_repair.", 697 "corrupt inode %Lu (btree).",
697 (unsigned long long) ip->i_ino); 698 (unsigned long long) ip->i_ino);
698 XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW, 699 XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
699 ip->i_mount); 700 ip->i_mount);
@@ -809,6 +810,10 @@ _xfs_dic2xflags(
809 flags |= XFS_XFLAG_PROJINHERIT; 810 flags |= XFS_XFLAG_PROJINHERIT;
810 if (di_flags & XFS_DIFLAG_NOSYMLINKS) 811 if (di_flags & XFS_DIFLAG_NOSYMLINKS)
811 flags |= XFS_XFLAG_NOSYMLINKS; 812 flags |= XFS_XFLAG_NOSYMLINKS;
813 if (di_flags & XFS_DIFLAG_EXTSIZE)
814 flags |= XFS_XFLAG_EXTSIZE;
815 if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
816 flags |= XFS_XFLAG_EXTSZINHERIT;
812 } 817 }
813 818
814 return flags; 819 return flags;
@@ -1192,11 +1197,19 @@ xfs_ialloc(
1192 if ((mode & S_IFMT) == S_IFDIR) { 1197 if ((mode & S_IFMT) == S_IFDIR) {
1193 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 1198 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
1194 di_flags |= XFS_DIFLAG_RTINHERIT; 1199 di_flags |= XFS_DIFLAG_RTINHERIT;
1195 } else { 1200 if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
1201 di_flags |= XFS_DIFLAG_EXTSZINHERIT;
1202 ip->i_d.di_extsize = pip->i_d.di_extsize;
1203 }
1204 } else if ((mode & S_IFMT) == S_IFREG) {
1196 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) { 1205 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) {
1197 di_flags |= XFS_DIFLAG_REALTIME; 1206 di_flags |= XFS_DIFLAG_REALTIME;
1198 ip->i_iocore.io_flags |= XFS_IOCORE_RT; 1207 ip->i_iocore.io_flags |= XFS_IOCORE_RT;
1199 } 1208 }
1209 if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
1210 di_flags |= XFS_DIFLAG_EXTSIZE;
1211 ip->i_d.di_extsize = pip->i_d.di_extsize;
1212 }
1200 } 1213 }
1201 if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && 1214 if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
1202 xfs_inherit_noatime) 1215 xfs_inherit_noatime)
@@ -1262,7 +1275,7 @@ xfs_isize_check(
1262 if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) 1275 if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
1263 return; 1276 return;
1264 1277
1265 if ( ip->i_d.di_flags & XFS_DIFLAG_REALTIME ) 1278 if (ip->i_d.di_flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_EXTSIZE))
1266 return; 1279 return;
1267 1280
1268 nimaps = 2; 1281 nimaps = 2;
@@ -1765,22 +1778,19 @@ xfs_igrow_start(
1765 xfs_fsize_t new_size, 1778 xfs_fsize_t new_size,
1766 cred_t *credp) 1779 cred_t *credp)
1767{ 1780{
1768 xfs_fsize_t isize;
1769 int error; 1781 int error;
1770 1782
1771 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); 1783 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
1772 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0); 1784 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
1773 ASSERT(new_size > ip->i_d.di_size); 1785 ASSERT(new_size > ip->i_d.di_size);
1774 1786
1775 error = 0;
1776 isize = ip->i_d.di_size;
1777 /* 1787 /*
1778 * Zero any pages that may have been created by 1788 * Zero any pages that may have been created by
1779 * xfs_write_file() beyond the end of the file 1789 * xfs_write_file() beyond the end of the file
1780 * and any blocks between the old and new file sizes. 1790 * and any blocks between the old and new file sizes.
1781 */ 1791 */
1782 error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size, isize, 1792 error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size,
1783 new_size); 1793 ip->i_d.di_size, new_size);
1784 return error; 1794 return error;
1785} 1795}
1786 1796
@@ -3355,6 +3365,11 @@ xfs_iflush_int(
3355 ip->i_update_core = 0; 3365 ip->i_update_core = 0;
3356 SYNCHRONIZE(); 3366 SYNCHRONIZE();
3357 3367
3368 /*
3369 * Make sure to get the latest atime from the Linux inode.
3370 */
3371 xfs_synchronize_atime(ip);
3372
3358 if (XFS_TEST_ERROR(INT_GET(dip->di_core.di_magic,ARCH_CONVERT) != XFS_DINODE_MAGIC, 3373 if (XFS_TEST_ERROR(INT_GET(dip->di_core.di_magic,ARCH_CONVERT) != XFS_DINODE_MAGIC,
3359 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 3374 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
3360 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 3375 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp,
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 124d30e6143b..1cfbcf18ce86 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -436,6 +436,10 @@ void xfs_ichgtime(xfs_inode_t *, int);
436xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); 436xfs_fsize_t xfs_file_last_byte(xfs_inode_t *);
437void xfs_lock_inodes(xfs_inode_t **, int, int, uint); 437void xfs_lock_inodes(xfs_inode_t **, int, int, uint);
438 438
439xfs_inode_t *xfs_vtoi(struct vnode *vp);
440
441void xfs_synchronize_atime(xfs_inode_t *);
442
439#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) 443#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
440 444
441#ifdef DEBUG 445#ifdef DEBUG
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 7f3363c621e1..36aa1fcb90a5 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -271,6 +271,11 @@ xfs_inode_item_format(
271 if (ip->i_update_size) 271 if (ip->i_update_size)
272 ip->i_update_size = 0; 272 ip->i_update_size = 0;
273 273
274 /*
275 * Make sure to get the latest atime from the Linux inode.
276 */
277 xfs_synchronize_atime(ip);
278
274 vecp->i_addr = (xfs_caddr_t)&ip->i_d; 279 vecp->i_addr = (xfs_caddr_t)&ip->i_d;
275 vecp->i_len = sizeof(xfs_dinode_core_t); 280 vecp->i_len = sizeof(xfs_dinode_core_t);
276 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); 281 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE);
@@ -603,7 +608,7 @@ xfs_inode_item_trylock(
603 if (iip->ili_pushbuf_flag == 0) { 608 if (iip->ili_pushbuf_flag == 0) {
604 iip->ili_pushbuf_flag = 1; 609 iip->ili_pushbuf_flag = 1;
605#ifdef DEBUG 610#ifdef DEBUG
606 iip->ili_push_owner = get_thread_id(); 611 iip->ili_push_owner = current_pid();
607#endif 612#endif
608 /* 613 /*
609 * Inode is left locked in shared mode. 614 * Inode is left locked in shared mode.
@@ -782,7 +787,7 @@ xfs_inode_item_pushbuf(
782 * trying to duplicate our effort. 787 * trying to duplicate our effort.
783 */ 788 */
784 ASSERT(iip->ili_pushbuf_flag != 0); 789 ASSERT(iip->ili_pushbuf_flag != 0);
785 ASSERT(iip->ili_push_owner == get_thread_id()); 790 ASSERT(iip->ili_push_owner == current_pid());
786 791
787 /* 792 /*
788 * If flushlock isn't locked anymore, chances are that the 793 * If flushlock isn't locked anymore, chances are that the
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index ca7afc83a893..788917f355c4 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -262,7 +262,7 @@ phase2:
262 case BMAPI_WRITE: 262 case BMAPI_WRITE:
263 /* If we found an extent, return it */ 263 /* If we found an extent, return it */
264 if (nimaps && 264 if (nimaps &&
265 (imap.br_startblock != HOLESTARTBLOCK) && 265 (imap.br_startblock != HOLESTARTBLOCK) &&
266 (imap.br_startblock != DELAYSTARTBLOCK)) { 266 (imap.br_startblock != DELAYSTARTBLOCK)) {
267 xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io, 267 xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io,
268 offset, count, iomapp, &imap, flags); 268 offset, count, iomapp, &imap, flags);
@@ -317,6 +317,58 @@ out:
317} 317}
318 318
319STATIC int 319STATIC int
320xfs_iomap_eof_align_last_fsb(
321 xfs_mount_t *mp,
322 xfs_iocore_t *io,
323 xfs_fsize_t isize,
324 xfs_extlen_t extsize,
325 xfs_fileoff_t *last_fsb)
326{
327 xfs_fileoff_t new_last_fsb = 0;
328 xfs_extlen_t align;
329 int eof, error;
330
331 if (io->io_flags & XFS_IOCORE_RT)
332 ;
333 /*
334 * If mounted with the "-o swalloc" option, roundup the allocation
335 * request to a stripe width boundary if the file size is >=
336 * stripe width and we are allocating past the allocation eof.
337 */
338 else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) &&
339 (isize >= XFS_FSB_TO_B(mp, mp->m_swidth)))
340 new_last_fsb = roundup_64(*last_fsb, mp->m_swidth);
341 /*
342 * Roundup the allocation request to a stripe unit (m_dalign) boundary
343 * if the file size is >= stripe unit size, and we are allocating past
344 * the allocation eof.
345 */
346 else if (mp->m_dalign && (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)))
347 new_last_fsb = roundup_64(*last_fsb, mp->m_dalign);
348
349 /*
350 * Always round up the allocation request to an extent boundary
351 * (when file on a real-time subvolume or has di_extsize hint).
352 */
353 if (extsize) {
354 if (new_last_fsb)
355 align = roundup_64(new_last_fsb, extsize);
356 else
357 align = extsize;
358 new_last_fsb = roundup_64(*last_fsb, align);
359 }
360
361 if (new_last_fsb) {
362 error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof);
363 if (error)
364 return error;
365 if (eof)
366 *last_fsb = new_last_fsb;
367 }
368 return 0;
369}
370
371STATIC int
320xfs_flush_space( 372xfs_flush_space(
321 xfs_inode_t *ip, 373 xfs_inode_t *ip,
322 int *fsynced, 374 int *fsynced,
@@ -362,19 +414,20 @@ xfs_iomap_write_direct(
362 xfs_iocore_t *io = &ip->i_iocore; 414 xfs_iocore_t *io = &ip->i_iocore;
363 xfs_fileoff_t offset_fsb; 415 xfs_fileoff_t offset_fsb;
364 xfs_fileoff_t last_fsb; 416 xfs_fileoff_t last_fsb;
365 xfs_filblks_t count_fsb; 417 xfs_filblks_t count_fsb, resaligned;
366 xfs_fsblock_t firstfsb; 418 xfs_fsblock_t firstfsb;
419 xfs_extlen_t extsz, temp;
420 xfs_fsize_t isize;
367 int nimaps; 421 int nimaps;
368 int error;
369 int bmapi_flag; 422 int bmapi_flag;
370 int quota_flag; 423 int quota_flag;
371 int rt; 424 int rt;
372 xfs_trans_t *tp; 425 xfs_trans_t *tp;
373 xfs_bmbt_irec_t imap; 426 xfs_bmbt_irec_t imap;
374 xfs_bmap_free_t free_list; 427 xfs_bmap_free_t free_list;
375 xfs_filblks_t qblocks, resblks; 428 uint qblocks, resblks, resrtextents;
376 int committed; 429 int committed;
377 int resrtextents; 430 int error;
378 431
379 /* 432 /*
380 * Make sure that the dquots are there. This doesn't hold 433 * Make sure that the dquots are there. This doesn't hold
@@ -384,37 +437,52 @@ xfs_iomap_write_direct(
384 if (error) 437 if (error)
385 return XFS_ERROR(error); 438 return XFS_ERROR(error);
386 439
387 offset_fsb = XFS_B_TO_FSBT(mp, offset); 440 rt = XFS_IS_REALTIME_INODE(ip);
388 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); 441 if (unlikely(rt)) {
389 count_fsb = last_fsb - offset_fsb; 442 if (!(extsz = ip->i_d.di_extsize))
390 if (found && (ret_imap->br_startblock == HOLESTARTBLOCK)) { 443 extsz = mp->m_sb.sb_rextsize;
391 xfs_fileoff_t map_last_fsb; 444 } else {
392 445 extsz = ip->i_d.di_extsize;
393 map_last_fsb = ret_imap->br_blockcount + ret_imap->br_startoff;
394 if (map_last_fsb < last_fsb) {
395 last_fsb = map_last_fsb;
396 count_fsb = last_fsb - offset_fsb;
397 }
398 ASSERT(count_fsb > 0);
399 } 446 }
400 447
401 /* 448 isize = ip->i_d.di_size;
402 * Determine if reserving space on the data or realtime partition. 449 if (io->io_new_size > isize)
403 */ 450 isize = io->io_new_size;
404 if ((rt = XFS_IS_REALTIME_INODE(ip))) {
405 xfs_extlen_t extsz;
406 451
407 if (!(extsz = ip->i_d.di_extsize)) 452 offset_fsb = XFS_B_TO_FSBT(mp, offset);
408 extsz = mp->m_sb.sb_rextsize; 453 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
409 resrtextents = qblocks = (count_fsb + extsz - 1); 454 if ((offset + count) > isize) {
410 do_div(resrtextents, mp->m_sb.sb_rextsize); 455 error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
411 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 456 &last_fsb);
412 quota_flag = XFS_QMOPT_RES_RTBLKS; 457 if (error)
458 goto error_out;
413 } else { 459 } else {
414 resrtextents = 0; 460 if (found && (ret_imap->br_startblock == HOLESTARTBLOCK))
415 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, count_fsb); 461 last_fsb = MIN(last_fsb, (xfs_fileoff_t)
416 quota_flag = XFS_QMOPT_RES_REGBLKS; 462 ret_imap->br_blockcount +
463 ret_imap->br_startoff);
417 } 464 }
465 count_fsb = last_fsb - offset_fsb;
466 ASSERT(count_fsb > 0);
467
468 resaligned = count_fsb;
469 if (unlikely(extsz)) {
470 if ((temp = do_mod(offset_fsb, extsz)))
471 resaligned += temp;
472 if ((temp = do_mod(resaligned, extsz)))
473 resaligned += extsz - temp;
474 }
475
476 if (unlikely(rt)) {
477 resrtextents = qblocks = resaligned;
478 resrtextents /= mp->m_sb.sb_rextsize;
479 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
480 quota_flag = XFS_QMOPT_RES_RTBLKS;
481 } else {
482 resrtextents = 0;
483 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
484 quota_flag = XFS_QMOPT_RES_REGBLKS;
485 }
418 486
419 /* 487 /*
420 * Allocate and setup the transaction 488 * Allocate and setup the transaction
@@ -425,7 +493,6 @@ xfs_iomap_write_direct(
425 XFS_WRITE_LOG_RES(mp), resrtextents, 493 XFS_WRITE_LOG_RES(mp), resrtextents,
426 XFS_TRANS_PERM_LOG_RES, 494 XFS_TRANS_PERM_LOG_RES,
427 XFS_WRITE_LOG_COUNT); 495 XFS_WRITE_LOG_COUNT);
428
429 /* 496 /*
430 * Check for running out of space, note: need lock to return 497 * Check for running out of space, note: need lock to return
431 */ 498 */
@@ -435,20 +502,20 @@ xfs_iomap_write_direct(
435 if (error) 502 if (error)
436 goto error_out; 503 goto error_out;
437 504
438 if (XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag)) { 505 error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
439 error = (EDQUOT); 506 qblocks, 0, quota_flag);
507 if (error)
440 goto error1; 508 goto error1;
441 }
442 509
443 bmapi_flag = XFS_BMAPI_WRITE;
444 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 510 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
445 xfs_trans_ihold(tp, ip); 511 xfs_trans_ihold(tp, ip);
446 512
447 if (!(flags & BMAPI_MMAP) && (offset < ip->i_d.di_size || rt)) 513 bmapi_flag = XFS_BMAPI_WRITE;
514 if ((flags & BMAPI_DIRECT) && (offset < ip->i_d.di_size || extsz))
448 bmapi_flag |= XFS_BMAPI_PREALLOC; 515 bmapi_flag |= XFS_BMAPI_PREALLOC;
449 516
450 /* 517 /*
451 * Issue the bmapi() call to allocate the blocks 518 * Issue the xfs_bmapi() call to allocate the blocks
452 */ 519 */
453 XFS_BMAP_INIT(&free_list, &firstfsb); 520 XFS_BMAP_INIT(&free_list, &firstfsb);
454 nimaps = 1; 521 nimaps = 1;
@@ -483,8 +550,10 @@ xfs_iomap_write_direct(
483 "extent-state : %x \n", 550 "extent-state : %x \n",
484 (ip->i_mount)->m_fsname, 551 (ip->i_mount)->m_fsname,
485 (long long)ip->i_ino, 552 (long long)ip->i_ino,
486 ret_imap->br_startblock, ret_imap->br_startoff, 553 (unsigned long long)ret_imap->br_startblock,
487 ret_imap->br_blockcount,ret_imap->br_state); 554 (unsigned long long)ret_imap->br_startoff,
555 (unsigned long long)ret_imap->br_blockcount,
556 ret_imap->br_state);
488 } 557 }
489 return 0; 558 return 0;
490 559
@@ -500,6 +569,63 @@ error_out:
500 return XFS_ERROR(error); 569 return XFS_ERROR(error);
501} 570}
502 571
572/*
573 * If the caller is doing a write at the end of the file,
574 * then extend the allocation out to the file system's write
575 * iosize. We clean up any extra space left over when the
576 * file is closed in xfs_inactive().
577 *
578 * For sync writes, we are flushing delayed allocate space to
579 * try to make additional space available for allocation near
580 * the filesystem full boundary - preallocation hurts in that
581 * situation, of course.
582 */
583STATIC int
584xfs_iomap_eof_want_preallocate(
585 xfs_mount_t *mp,
586 xfs_iocore_t *io,
587 xfs_fsize_t isize,
588 xfs_off_t offset,
589 size_t count,
590 int ioflag,
591 xfs_bmbt_irec_t *imap,
592 int nimaps,
593 int *prealloc)
594{
595 xfs_fileoff_t start_fsb;
596 xfs_filblks_t count_fsb;
597 xfs_fsblock_t firstblock;
598 int n, error, imaps;
599
600 *prealloc = 0;
601 if ((ioflag & BMAPI_SYNC) || (offset + count) <= isize)
602 return 0;
603
604 /*
605 * If there are any real blocks past eof, then don't
606 * do any speculative allocation.
607 */
608 start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1)));
609 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
610 while (count_fsb > 0) {
611 imaps = nimaps;
612 firstblock = NULLFSBLOCK;
613 error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb,
614 0, &firstblock, 0, imap, &imaps, NULL);
615 if (error)
616 return error;
617 for (n = 0; n < imaps; n++) {
618 if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
619 (imap[n].br_startblock != DELAYSTARTBLOCK))
620 return 0;
621 start_fsb += imap[n].br_blockcount;
622 count_fsb -= imap[n].br_blockcount;
623 }
624 }
625 *prealloc = 1;
626 return 0;
627}
628
503int 629int
504xfs_iomap_write_delay( 630xfs_iomap_write_delay(
505 xfs_inode_t *ip, 631 xfs_inode_t *ip,
@@ -513,13 +639,15 @@ xfs_iomap_write_delay(
513 xfs_iocore_t *io = &ip->i_iocore; 639 xfs_iocore_t *io = &ip->i_iocore;
514 xfs_fileoff_t offset_fsb; 640 xfs_fileoff_t offset_fsb;
515 xfs_fileoff_t last_fsb; 641 xfs_fileoff_t last_fsb;
516 xfs_fsize_t isize; 642 xfs_off_t aligned_offset;
643 xfs_fileoff_t ioalign;
517 xfs_fsblock_t firstblock; 644 xfs_fsblock_t firstblock;
645 xfs_extlen_t extsz;
646 xfs_fsize_t isize;
518 int nimaps; 647 int nimaps;
519 int error;
520 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; 648 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
521 int aeof; 649 int prealloc, fsynced = 0;
522 int fsynced = 0; 650 int error;
523 651
524 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0); 652 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
525 653
@@ -527,152 +655,57 @@ xfs_iomap_write_delay(
527 * Make sure that the dquots are there. This doesn't hold 655 * Make sure that the dquots are there. This doesn't hold
528 * the ilock across a disk read. 656 * the ilock across a disk read.
529 */ 657 */
530
531 error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED); 658 error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
532 if (error) 659 if (error)
533 return XFS_ERROR(error); 660 return XFS_ERROR(error);
534 661
662 if (XFS_IS_REALTIME_INODE(ip)) {
663 if (!(extsz = ip->i_d.di_extsize))
664 extsz = mp->m_sb.sb_rextsize;
665 } else {
666 extsz = ip->i_d.di_extsize;
667 }
668
669 offset_fsb = XFS_B_TO_FSBT(mp, offset);
670
535retry: 671retry:
536 isize = ip->i_d.di_size; 672 isize = ip->i_d.di_size;
537 if (io->io_new_size > isize) { 673 if (io->io_new_size > isize)
538 isize = io->io_new_size; 674 isize = io->io_new_size;
539 }
540 675
541 aeof = 0; 676 error = xfs_iomap_eof_want_preallocate(mp, io, isize, offset, count,
542 offset_fsb = XFS_B_TO_FSBT(mp, offset); 677 ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
543 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); 678 if (error)
544 /* 679 return error;
545 * If the caller is doing a write at the end of the file,
546 * then extend the allocation (and the buffer used for the write)
547 * out to the file system's write iosize. We clean up any extra
548 * space left over when the file is closed in xfs_inactive().
549 *
550 * For sync writes, we are flushing delayed allocate space to
551 * try to make additional space available for allocation near
552 * the filesystem full boundary - preallocation hurts in that
553 * situation, of course.
554 */
555 if (!(ioflag & BMAPI_SYNC) && ((offset + count) > ip->i_d.di_size)) {
556 xfs_off_t aligned_offset;
557 xfs_filblks_t count_fsb;
558 unsigned int iosize;
559 xfs_fileoff_t ioalign;
560 int n;
561 xfs_fileoff_t start_fsb;
562 680
563 /* 681 if (prealloc) {
564 * If there are any real blocks past eof, then don't
565 * do any speculative allocation.
566 */
567 start_fsb = XFS_B_TO_FSBT(mp,
568 ((xfs_ufsize_t)(offset + count - 1)));
569 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
570 while (count_fsb > 0) {
571 nimaps = XFS_WRITE_IMAPS;
572 error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb,
573 0, &firstblock, 0, imap, &nimaps, NULL);
574 if (error) {
575 return error;
576 }
577 for (n = 0; n < nimaps; n++) {
578 if ( !(io->io_flags & XFS_IOCORE_RT) &&
579 !imap[n].br_startblock) {
580 cmn_err(CE_PANIC,"Access to block "
581 "zero: fs <%s> inode: %lld "
582 "start_block : %llx start_off "
583 ": %llx blkcnt : %llx "
584 "extent-state : %x \n",
585 (ip->i_mount)->m_fsname,
586 (long long)ip->i_ino,
587 imap[n].br_startblock,
588 imap[n].br_startoff,
589 imap[n].br_blockcount,
590 imap[n].br_state);
591 }
592 if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
593 (imap[n].br_startblock != DELAYSTARTBLOCK)) {
594 goto write_map;
595 }
596 start_fsb += imap[n].br_blockcount;
597 count_fsb -= imap[n].br_blockcount;
598 }
599 }
600 iosize = mp->m_writeio_blocks;
601 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); 682 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
602 ioalign = XFS_B_TO_FSBT(mp, aligned_offset); 683 ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
603 last_fsb = ioalign + iosize; 684 last_fsb = ioalign + mp->m_writeio_blocks;
604 aeof = 1; 685 } else {
686 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
605 } 687 }
606write_map:
607 nimaps = XFS_WRITE_IMAPS;
608 firstblock = NULLFSBLOCK;
609 688
610 /* 689 if (prealloc || extsz) {
611 * If mounted with the "-o swalloc" option, roundup the allocation 690 error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
612 * request to a stripe width boundary if the file size is >= 691 &last_fsb);
613 * stripe width and we are allocating past the allocation eof. 692 if (error)
614 */
615 if (!(io->io_flags & XFS_IOCORE_RT) && mp->m_swidth
616 && (mp->m_flags & XFS_MOUNT_SWALLOC)
617 && (isize >= XFS_FSB_TO_B(mp, mp->m_swidth)) && aeof) {
618 int eof;
619 xfs_fileoff_t new_last_fsb;
620
621 new_last_fsb = roundup_64(last_fsb, mp->m_swidth);
622 error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
623 if (error) {
624 return error;
625 }
626 if (eof) {
627 last_fsb = new_last_fsb;
628 }
629 /*
630 * Roundup the allocation request to a stripe unit (m_dalign) boundary
631 * if the file size is >= stripe unit size, and we are allocating past
632 * the allocation eof.
633 */
634 } else if (!(io->io_flags & XFS_IOCORE_RT) && mp->m_dalign &&
635 (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)) && aeof) {
636 int eof;
637 xfs_fileoff_t new_last_fsb;
638 new_last_fsb = roundup_64(last_fsb, mp->m_dalign);
639 error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
640 if (error) {
641 return error;
642 }
643 if (eof) {
644 last_fsb = new_last_fsb;
645 }
646 /*
647 * Round up the allocation request to a real-time extent boundary
648 * if the file is on the real-time subvolume.
649 */
650 } else if (io->io_flags & XFS_IOCORE_RT && aeof) {
651 int eof;
652 xfs_fileoff_t new_last_fsb;
653
654 new_last_fsb = roundup_64(last_fsb, mp->m_sb.sb_rextsize);
655 error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof);
656 if (error) {
657 return error; 693 return error;
658 }
659 if (eof)
660 last_fsb = new_last_fsb;
661 } 694 }
695
696 nimaps = XFS_WRITE_IMAPS;
697 firstblock = NULLFSBLOCK;
662 error = xfs_bmapi(NULL, ip, offset_fsb, 698 error = xfs_bmapi(NULL, ip, offset_fsb,
663 (xfs_filblks_t)(last_fsb - offset_fsb), 699 (xfs_filblks_t)(last_fsb - offset_fsb),
664 XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | 700 XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
665 XFS_BMAPI_ENTIRE, &firstblock, 1, imap, 701 XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
666 &nimaps, NULL); 702 &nimaps, NULL);
667 /* 703 if (error && (error != ENOSPC))
668 * This can be EDQUOT, if nimaps == 0
669 */
670 if (error && (error != ENOSPC)) {
671 return XFS_ERROR(error); 704 return XFS_ERROR(error);
672 } 705
673 /* 706 /*
674 * If bmapi returned us nothing, and if we didn't get back EDQUOT, 707 * If bmapi returned us nothing, and if we didn't get back EDQUOT,
675 * then we must have run out of space. 708 * then we must have run out of space - flush delalloc, and retry..
676 */ 709 */
677 if (nimaps == 0) { 710 if (nimaps == 0) {
678 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE, 711 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE,
@@ -684,17 +717,21 @@ write_map:
684 goto retry; 717 goto retry;
685 } 718 }
686 719
687 *ret_imap = imap[0]; 720 if (!(io->io_flags & XFS_IOCORE_RT) && !ret_imap->br_startblock) {
688 *nmaps = 1;
689 if ( !(io->io_flags & XFS_IOCORE_RT) && !ret_imap->br_startblock) {
690 cmn_err(CE_PANIC,"Access to block zero: fs <%s> inode: %lld " 721 cmn_err(CE_PANIC,"Access to block zero: fs <%s> inode: %lld "
691 "start_block : %llx start_off : %llx blkcnt : %llx " 722 "start_block : %llx start_off : %llx blkcnt : %llx "
692 "extent-state : %x \n", 723 "extent-state : %x \n",
693 (ip->i_mount)->m_fsname, 724 (ip->i_mount)->m_fsname,
694 (long long)ip->i_ino, 725 (long long)ip->i_ino,
695 ret_imap->br_startblock, ret_imap->br_startoff, 726 (unsigned long long)ret_imap->br_startblock,
696 ret_imap->br_blockcount,ret_imap->br_state); 727 (unsigned long long)ret_imap->br_startoff,
728 (unsigned long long)ret_imap->br_blockcount,
729 ret_imap->br_state);
697 } 730 }
731
732 *ret_imap = imap[0];
733 *nmaps = 1;
734
698 return 0; 735 return 0;
699} 736}
700 737
@@ -820,17 +857,21 @@ xfs_iomap_write_allocate(
820 */ 857 */
821 858
822 for (i = 0; i < nimaps; i++) { 859 for (i = 0; i < nimaps; i++) {
823 if ( !(io->io_flags & XFS_IOCORE_RT) && 860 if (!(io->io_flags & XFS_IOCORE_RT) &&
824 !imap[i].br_startblock) { 861 !imap[i].br_startblock) {
825 cmn_err(CE_PANIC,"Access to block zero: " 862 cmn_err(CE_PANIC,"Access to block zero: "
826 "fs <%s> inode: %lld " 863 "fs <%s> inode: %lld "
827 "start_block : %llx start_off : %llx " 864 "start_block : %llx start_off : %llx "
828 "blkcnt : %llx extent-state : %x \n", 865 "blkcnt : %llx extent-state : %x \n",
829 (ip->i_mount)->m_fsname, 866 (ip->i_mount)->m_fsname,
830 (long long)ip->i_ino, 867 (long long)ip->i_ino,
831 imap[i].br_startblock, 868 (unsigned long long)
832 imap[i].br_startoff, 869 imap[i].br_startblock,
833 imap[i].br_blockcount,imap[i].br_state); 870 (unsigned long long)
871 imap[i].br_startoff,
872 (unsigned long long)
873 imap[i].br_blockcount,
874 imap[i].br_state);
834 } 875 }
835 if ((offset_fsb >= imap[i].br_startoff) && 876 if ((offset_fsb >= imap[i].br_startoff) &&
836 (offset_fsb < (imap[i].br_startoff + 877 (offset_fsb < (imap[i].br_startoff +
@@ -867,17 +908,17 @@ xfs_iomap_write_unwritten(
867{ 908{
868 xfs_mount_t *mp = ip->i_mount; 909 xfs_mount_t *mp = ip->i_mount;
869 xfs_iocore_t *io = &ip->i_iocore; 910 xfs_iocore_t *io = &ip->i_iocore;
870 xfs_trans_t *tp;
871 xfs_fileoff_t offset_fsb; 911 xfs_fileoff_t offset_fsb;
872 xfs_filblks_t count_fsb; 912 xfs_filblks_t count_fsb;
873 xfs_filblks_t numblks_fsb; 913 xfs_filblks_t numblks_fsb;
874 xfs_bmbt_irec_t imap; 914 xfs_fsblock_t firstfsb;
915 int nimaps;
916 xfs_trans_t *tp;
917 xfs_bmbt_irec_t imap;
918 xfs_bmap_free_t free_list;
919 uint resblks;
875 int committed; 920 int committed;
876 int error; 921 int error;
877 int nres;
878 int nimaps;
879 xfs_fsblock_t firstfsb;
880 xfs_bmap_free_t free_list;
881 922
882 xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN, 923 xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN,
883 &ip->i_iocore, offset, count); 924 &ip->i_iocore, offset, count);
@@ -886,9 +927,9 @@ xfs_iomap_write_unwritten(
886 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 927 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
887 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb); 928 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
888 929
889 do { 930 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
890 nres = XFS_DIOSTRAT_SPACE_RES(mp, 0);
891 931
932 do {
892 /* 933 /*
893 * set up a transaction to convert the range of extents 934 * set up a transaction to convert the range of extents
894 * from unwritten to real. Do allocations in a loop until 935 * from unwritten to real. Do allocations in a loop until
@@ -896,7 +937,7 @@ xfs_iomap_write_unwritten(
896 */ 937 */
897 938
898 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); 939 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
899 error = xfs_trans_reserve(tp, nres, 940 error = xfs_trans_reserve(tp, resblks,
900 XFS_WRITE_LOG_RES(mp), 0, 941 XFS_WRITE_LOG_RES(mp), 0,
901 XFS_TRANS_PERM_LOG_RES, 942 XFS_TRANS_PERM_LOG_RES,
902 XFS_WRITE_LOG_COUNT); 943 XFS_WRITE_LOG_COUNT);
@@ -915,7 +956,7 @@ xfs_iomap_write_unwritten(
915 XFS_BMAP_INIT(&free_list, &firstfsb); 956 XFS_BMAP_INIT(&free_list, &firstfsb);
916 nimaps = 1; 957 nimaps = 1;
917 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, 958 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
918 XFS_BMAPI_WRITE, &firstfsb, 959 XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,
919 1, &imap, &nimaps, &free_list); 960 1, &imap, &nimaps, &free_list);
920 if (error) 961 if (error)
921 goto error_on_bmapi_transaction; 962 goto error_on_bmapi_transaction;
@@ -929,15 +970,17 @@ xfs_iomap_write_unwritten(
929 xfs_iunlock(ip, XFS_ILOCK_EXCL); 970 xfs_iunlock(ip, XFS_ILOCK_EXCL);
930 if (error) 971 if (error)
931 goto error0; 972 goto error0;
932 973
933 if ( !(io->io_flags & XFS_IOCORE_RT) && !imap.br_startblock) { 974 if ( !(io->io_flags & XFS_IOCORE_RT) && !imap.br_startblock) {
934 cmn_err(CE_PANIC,"Access to block zero: fs <%s> " 975 cmn_err(CE_PANIC,"Access to block zero: fs <%s> "
935 "inode: %lld start_block : %llx start_off : " 976 "inode: %lld start_block : %llx start_off : "
936 "%llx blkcnt : %llx extent-state : %x \n", 977 "%llx blkcnt : %llx extent-state : %x \n",
937 (ip->i_mount)->m_fsname, 978 (ip->i_mount)->m_fsname,
938 (long long)ip->i_ino, 979 (long long)ip->i_ino,
939 imap.br_startblock,imap.br_startoff, 980 (unsigned long long)imap.br_startblock,
940 imap.br_blockcount,imap.br_state); 981 (unsigned long long)imap.br_startoff,
982 (unsigned long long)imap.br_blockcount,
983 imap.br_state);
941 } 984 }
942 985
943 if ((numblks_fsb = imap.br_blockcount) == 0) { 986 if ((numblks_fsb = imap.br_blockcount) == 0) {
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index f63646ead816..c59450e1be40 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -56,6 +56,7 @@ xfs_bulkstat_one_iget(
56{ 56{
57 xfs_dinode_core_t *dic; /* dinode core info pointer */ 57 xfs_dinode_core_t *dic; /* dinode core info pointer */
58 xfs_inode_t *ip; /* incore inode pointer */ 58 xfs_inode_t *ip; /* incore inode pointer */
59 vnode_t *vp;
59 int error; 60 int error;
60 61
61 error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, bno); 62 error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, bno);
@@ -72,6 +73,7 @@ xfs_bulkstat_one_iget(
72 goto out_iput; 73 goto out_iput;
73 } 74 }
74 75
76 vp = XFS_ITOV(ip);
75 dic = &ip->i_d; 77 dic = &ip->i_d;
76 78
77 /* xfs_iget returns the following without needing 79 /* xfs_iget returns the following without needing
@@ -84,8 +86,7 @@ xfs_bulkstat_one_iget(
84 buf->bs_uid = dic->di_uid; 86 buf->bs_uid = dic->di_uid;
85 buf->bs_gid = dic->di_gid; 87 buf->bs_gid = dic->di_gid;
86 buf->bs_size = dic->di_size; 88 buf->bs_size = dic->di_size;
87 buf->bs_atime.tv_sec = dic->di_atime.t_sec; 89 vn_atime_to_bstime(vp, &buf->bs_atime);
88 buf->bs_atime.tv_nsec = dic->di_atime.t_nsec;
89 buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; 90 buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
90 buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec; 91 buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
91 buf->bs_ctime.tv_sec = dic->di_ctime.t_sec; 92 buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 29af51275ca9..3d9a36e77363 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -178,6 +178,83 @@ xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
178#define xlog_trace_iclog(iclog,state) 178#define xlog_trace_iclog(iclog,state)
179#endif /* XFS_LOG_TRACE */ 179#endif /* XFS_LOG_TRACE */
180 180
181
182static void
183xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
184{
185 if (*qp) {
186 tic->t_next = (*qp);
187 tic->t_prev = (*qp)->t_prev;
188 (*qp)->t_prev->t_next = tic;
189 (*qp)->t_prev = tic;
190 } else {
191 tic->t_prev = tic->t_next = tic;
192 *qp = tic;
193 }
194
195 tic->t_flags |= XLOG_TIC_IN_Q;
196}
197
198static void
199xlog_del_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
200{
201 if (tic == tic->t_next) {
202 *qp = NULL;
203 } else {
204 *qp = tic->t_next;
205 tic->t_next->t_prev = tic->t_prev;
206 tic->t_prev->t_next = tic->t_next;
207 }
208
209 tic->t_next = tic->t_prev = NULL;
210 tic->t_flags &= ~XLOG_TIC_IN_Q;
211}
212
213static void
214xlog_grant_sub_space(struct log *log, int bytes)
215{
216 log->l_grant_write_bytes -= bytes;
217 if (log->l_grant_write_bytes < 0) {
218 log->l_grant_write_bytes += log->l_logsize;
219 log->l_grant_write_cycle--;
220 }
221
222 log->l_grant_reserve_bytes -= bytes;
223 if ((log)->l_grant_reserve_bytes < 0) {
224 log->l_grant_reserve_bytes += log->l_logsize;
225 log->l_grant_reserve_cycle--;
226 }
227
228}
229
230static void
231xlog_grant_add_space_write(struct log *log, int bytes)
232{
233 log->l_grant_write_bytes += bytes;
234 if (log->l_grant_write_bytes > log->l_logsize) {
235 log->l_grant_write_bytes -= log->l_logsize;
236 log->l_grant_write_cycle++;
237 }
238}
239
240static void
241xlog_grant_add_space_reserve(struct log *log, int bytes)
242{
243 log->l_grant_reserve_bytes += bytes;
244 if (log->l_grant_reserve_bytes > log->l_logsize) {
245 log->l_grant_reserve_bytes -= log->l_logsize;
246 log->l_grant_reserve_cycle++;
247 }
248}
249
250static inline void
251xlog_grant_add_space(struct log *log, int bytes)
252{
253 xlog_grant_add_space_write(log, bytes);
254 xlog_grant_add_space_reserve(log, bytes);
255}
256
257
181/* 258/*
182 * NOTES: 259 * NOTES:
183 * 260 *
@@ -428,7 +505,7 @@ xfs_log_mount(xfs_mount_t *mp,
428 if (readonly) 505 if (readonly)
429 vfsp->vfs_flag &= ~VFS_RDONLY; 506 vfsp->vfs_flag &= ~VFS_RDONLY;
430 507
431 error = xlog_recover(mp->m_log, readonly); 508 error = xlog_recover(mp->m_log);
432 509
433 if (readonly) 510 if (readonly)
434 vfsp->vfs_flag |= VFS_RDONLY; 511 vfsp->vfs_flag |= VFS_RDONLY;
@@ -1320,8 +1397,7 @@ xlog_sync(xlog_t *log,
1320 1397
1321 /* move grant heads by roundoff in sync */ 1398 /* move grant heads by roundoff in sync */
1322 s = GRANT_LOCK(log); 1399 s = GRANT_LOCK(log);
1323 XLOG_GRANT_ADD_SPACE(log, roundoff, 'w'); 1400 xlog_grant_add_space(log, roundoff);
1324 XLOG_GRANT_ADD_SPACE(log, roundoff, 'r');
1325 GRANT_UNLOCK(log, s); 1401 GRANT_UNLOCK(log, s);
1326 1402
1327 /* put cycle number in every block */ 1403 /* put cycle number in every block */
@@ -1515,7 +1591,6 @@ xlog_state_finish_copy(xlog_t *log,
1515 * print out info relating to regions written which consume 1591 * print out info relating to regions written which consume
1516 * the reservation 1592 * the reservation
1517 */ 1593 */
1518#if defined(XFS_LOG_RES_DEBUG)
1519STATIC void 1594STATIC void
1520xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket) 1595xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
1521{ 1596{
@@ -1605,11 +1680,11 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
1605 ticket->t_res_arr_sum, ticket->t_res_o_flow, 1680 ticket->t_res_arr_sum, ticket->t_res_o_flow,
1606 ticket->t_res_num_ophdrs, ophdr_spc, 1681 ticket->t_res_num_ophdrs, ophdr_spc,
1607 ticket->t_res_arr_sum + 1682 ticket->t_res_arr_sum +
1608 ticket->t_res_o_flow + ophdr_spc, 1683 ticket->t_res_o_flow + ophdr_spc,
1609 ticket->t_res_num); 1684 ticket->t_res_num);
1610 1685
1611 for (i = 0; i < ticket->t_res_num; i++) { 1686 for (i = 0; i < ticket->t_res_num; i++) {
1612 uint r_type = ticket->t_res_arr[i].r_type; 1687 uint r_type = ticket->t_res_arr[i].r_type;
1613 cmn_err(CE_WARN, 1688 cmn_err(CE_WARN,
1614 "region[%u]: %s - %u bytes\n", 1689 "region[%u]: %s - %u bytes\n",
1615 i, 1690 i,
@@ -1618,9 +1693,6 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
1618 ticket->t_res_arr[i].r_len); 1693 ticket->t_res_arr[i].r_len);
1619 } 1694 }
1620} 1695}
1621#else
1622#define xlog_print_tic_res(mp, ticket)
1623#endif
1624 1696
1625/* 1697/*
1626 * Write some region out to in-core log 1698 * Write some region out to in-core log
@@ -2389,7 +2461,7 @@ xlog_grant_log_space(xlog_t *log,
2389 2461
2390 /* something is already sleeping; insert new transaction at end */ 2462 /* something is already sleeping; insert new transaction at end */
2391 if (log->l_reserve_headq) { 2463 if (log->l_reserve_headq) {
2392 XLOG_INS_TICKETQ(log->l_reserve_headq, tic); 2464 xlog_ins_ticketq(&log->l_reserve_headq, tic);
2393 xlog_trace_loggrant(log, tic, 2465 xlog_trace_loggrant(log, tic,
2394 "xlog_grant_log_space: sleep 1"); 2466 "xlog_grant_log_space: sleep 1");
2395 /* 2467 /*
@@ -2422,7 +2494,7 @@ redo:
2422 log->l_grant_reserve_bytes); 2494 log->l_grant_reserve_bytes);
2423 if (free_bytes < need_bytes) { 2495 if (free_bytes < need_bytes) {
2424 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2496 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2425 XLOG_INS_TICKETQ(log->l_reserve_headq, tic); 2497 xlog_ins_ticketq(&log->l_reserve_headq, tic);
2426 xlog_trace_loggrant(log, tic, 2498 xlog_trace_loggrant(log, tic,
2427 "xlog_grant_log_space: sleep 2"); 2499 "xlog_grant_log_space: sleep 2");
2428 XFS_STATS_INC(xs_sleep_logspace); 2500 XFS_STATS_INC(xs_sleep_logspace);
@@ -2439,11 +2511,10 @@ redo:
2439 s = GRANT_LOCK(log); 2511 s = GRANT_LOCK(log);
2440 goto redo; 2512 goto redo;
2441 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2513 } else if (tic->t_flags & XLOG_TIC_IN_Q)
2442 XLOG_DEL_TICKETQ(log->l_reserve_headq, tic); 2514 xlog_del_ticketq(&log->l_reserve_headq, tic);
2443 2515
2444 /* we've got enough space */ 2516 /* we've got enough space */
2445 XLOG_GRANT_ADD_SPACE(log, need_bytes, 'w'); 2517 xlog_grant_add_space(log, need_bytes);
2446 XLOG_GRANT_ADD_SPACE(log, need_bytes, 'r');
2447#ifdef DEBUG 2518#ifdef DEBUG
2448 tail_lsn = log->l_tail_lsn; 2519 tail_lsn = log->l_tail_lsn;
2449 /* 2520 /*
@@ -2464,7 +2535,7 @@ redo:
2464 2535
2465 error_return: 2536 error_return:
2466 if (tic->t_flags & XLOG_TIC_IN_Q) 2537 if (tic->t_flags & XLOG_TIC_IN_Q)
2467 XLOG_DEL_TICKETQ(log->l_reserve_headq, tic); 2538 xlog_del_ticketq(&log->l_reserve_headq, tic);
2468 xlog_trace_loggrant(log, tic, "xlog_grant_log_space: err_ret"); 2539 xlog_trace_loggrant(log, tic, "xlog_grant_log_space: err_ret");
2469 /* 2540 /*
2470 * If we are failing, make sure the ticket doesn't have any 2541 * If we are failing, make sure the ticket doesn't have any
@@ -2533,7 +2604,7 @@ xlog_regrant_write_log_space(xlog_t *log,
2533 2604
2534 if (ntic != log->l_write_headq) { 2605 if (ntic != log->l_write_headq) {
2535 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2606 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2536 XLOG_INS_TICKETQ(log->l_write_headq, tic); 2607 xlog_ins_ticketq(&log->l_write_headq, tic);
2537 2608
2538 xlog_trace_loggrant(log, tic, 2609 xlog_trace_loggrant(log, tic,
2539 "xlog_regrant_write_log_space: sleep 1"); 2610 "xlog_regrant_write_log_space: sleep 1");
@@ -2565,7 +2636,7 @@ redo:
2565 log->l_grant_write_bytes); 2636 log->l_grant_write_bytes);
2566 if (free_bytes < need_bytes) { 2637 if (free_bytes < need_bytes) {
2567 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2638 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2568 XLOG_INS_TICKETQ(log->l_write_headq, tic); 2639 xlog_ins_ticketq(&log->l_write_headq, tic);
2569 XFS_STATS_INC(xs_sleep_logspace); 2640 XFS_STATS_INC(xs_sleep_logspace);
2570 sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); 2641 sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s);
2571 2642
@@ -2581,9 +2652,10 @@ redo:
2581 s = GRANT_LOCK(log); 2652 s = GRANT_LOCK(log);
2582 goto redo; 2653 goto redo;
2583 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2654 } else if (tic->t_flags & XLOG_TIC_IN_Q)
2584 XLOG_DEL_TICKETQ(log->l_write_headq, tic); 2655 xlog_del_ticketq(&log->l_write_headq, tic);
2585 2656
2586 XLOG_GRANT_ADD_SPACE(log, need_bytes, 'w'); /* we've got enough space */ 2657 /* we've got enough space */
2658 xlog_grant_add_space_write(log, need_bytes);
2587#ifdef DEBUG 2659#ifdef DEBUG
2588 tail_lsn = log->l_tail_lsn; 2660 tail_lsn = log->l_tail_lsn;
2589 if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) { 2661 if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) {
@@ -2600,7 +2672,7 @@ redo:
2600 2672
2601 error_return: 2673 error_return:
2602 if (tic->t_flags & XLOG_TIC_IN_Q) 2674 if (tic->t_flags & XLOG_TIC_IN_Q)
2603 XLOG_DEL_TICKETQ(log->l_reserve_headq, tic); 2675 xlog_del_ticketq(&log->l_reserve_headq, tic);
2604 xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: err_ret"); 2676 xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: err_ret");
2605 /* 2677 /*
2606 * If we are failing, make sure the ticket doesn't have any 2678 * If we are failing, make sure the ticket doesn't have any
@@ -2633,8 +2705,7 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2633 ticket->t_cnt--; 2705 ticket->t_cnt--;
2634 2706
2635 s = GRANT_LOCK(log); 2707 s = GRANT_LOCK(log);
2636 XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w'); 2708 xlog_grant_sub_space(log, ticket->t_curr_res);
2637 XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r');
2638 ticket->t_curr_res = ticket->t_unit_res; 2709 ticket->t_curr_res = ticket->t_unit_res;
2639 XLOG_TIC_RESET_RES(ticket); 2710 XLOG_TIC_RESET_RES(ticket);
2640 xlog_trace_loggrant(log, ticket, 2711 xlog_trace_loggrant(log, ticket,
@@ -2647,7 +2718,7 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2647 return; 2718 return;
2648 } 2719 }
2649 2720
2650 XLOG_GRANT_ADD_SPACE(log, ticket->t_unit_res, 'r'); 2721 xlog_grant_add_space_reserve(log, ticket->t_unit_res);
2651 xlog_trace_loggrant(log, ticket, 2722 xlog_trace_loggrant(log, ticket,
2652 "xlog_regrant_reserve_log_space: exit"); 2723 "xlog_regrant_reserve_log_space: exit");
2653 xlog_verify_grant_head(log, 0); 2724 xlog_verify_grant_head(log, 0);
@@ -2683,8 +2754,7 @@ xlog_ungrant_log_space(xlog_t *log,
2683 s = GRANT_LOCK(log); 2754 s = GRANT_LOCK(log);
2684 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: enter"); 2755 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: enter");
2685 2756
2686 XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w'); 2757 xlog_grant_sub_space(log, ticket->t_curr_res);
2687 XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r');
2688 2758
2689 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: sub current"); 2759 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: sub current");
2690 2760
@@ -2693,8 +2763,7 @@ xlog_ungrant_log_space(xlog_t *log,
2693 */ 2763 */
2694 if (ticket->t_cnt > 0) { 2764 if (ticket->t_cnt > 0) {
2695 ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV); 2765 ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV);
2696 XLOG_GRANT_SUB_SPACE(log, ticket->t_unit_res*ticket->t_cnt,'w'); 2766 xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt);
2697 XLOG_GRANT_SUB_SPACE(log, ticket->t_unit_res*ticket->t_cnt,'r');
2698 } 2767 }
2699 2768
2700 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: exit"); 2769 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: exit");
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index f40d4391fcfc..4b2ac88dbb83 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -96,7 +96,6 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
96 96
97 97
98/* Region types for iovec's i_type */ 98/* Region types for iovec's i_type */
99#if defined(XFS_LOG_RES_DEBUG)
100#define XLOG_REG_TYPE_BFORMAT 1 99#define XLOG_REG_TYPE_BFORMAT 1
101#define XLOG_REG_TYPE_BCHUNK 2 100#define XLOG_REG_TYPE_BCHUNK 2
102#define XLOG_REG_TYPE_EFI_FORMAT 3 101#define XLOG_REG_TYPE_EFI_FORMAT 3
@@ -117,21 +116,13 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
117#define XLOG_REG_TYPE_COMMIT 18 116#define XLOG_REG_TYPE_COMMIT 18
118#define XLOG_REG_TYPE_TRANSHDR 19 117#define XLOG_REG_TYPE_TRANSHDR 19
119#define XLOG_REG_TYPE_MAX 19 118#define XLOG_REG_TYPE_MAX 19
120#endif
121 119
122#if defined(XFS_LOG_RES_DEBUG)
123#define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t)) 120#define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t))
124#else
125#define XLOG_VEC_SET_TYPE(vecp, t)
126#endif
127
128 121
129typedef struct xfs_log_iovec { 122typedef struct xfs_log_iovec {
130 xfs_caddr_t i_addr; /* beginning address of region */ 123 xfs_caddr_t i_addr; /* beginning address of region */
131 int i_len; /* length in bytes of region */ 124 int i_len; /* length in bytes of region */
132#if defined(XFS_LOG_RES_DEBUG) 125 uint i_type; /* type of region */
133 uint i_type; /* type of region */
134#endif
135} xfs_log_iovec_t; 126} xfs_log_iovec_t;
136 127
137typedef void* xfs_log_ticket_t; 128typedef void* xfs_log_ticket_t;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 4518b188ade6..34bcbf50789c 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -253,7 +253,6 @@ typedef __uint32_t xlog_tid_t;
253 253
254 254
255/* Ticket reservation region accounting */ 255/* Ticket reservation region accounting */
256#if defined(XFS_LOG_RES_DEBUG)
257#define XLOG_TIC_LEN_MAX 15 256#define XLOG_TIC_LEN_MAX 15
258#define XLOG_TIC_RESET_RES(t) ((t)->t_res_num = \ 257#define XLOG_TIC_RESET_RES(t) ((t)->t_res_num = \
259 (t)->t_res_arr_sum = (t)->t_res_num_ophdrs = 0) 258 (t)->t_res_arr_sum = (t)->t_res_num_ophdrs = 0)
@@ -278,15 +277,9 @@ typedef __uint32_t xlog_tid_t;
278 * we don't care about. 277 * we don't care about.
279 */ 278 */
280typedef struct xlog_res { 279typedef struct xlog_res {
281 uint r_len; 280 uint r_len; /* region length :4 */
282 uint r_type; 281 uint r_type; /* region's transaction type :4 */
283} xlog_res_t; 282} xlog_res_t;
284#else
285#define XLOG_TIC_RESET_RES(t)
286#define XLOG_TIC_ADD_OPHDR(t)
287#define XLOG_TIC_ADD_REGION(t, len, type)
288#endif
289
290 283
291typedef struct xlog_ticket { 284typedef struct xlog_ticket {
292 sv_t t_sema; /* sleep on this semaphore : 20 */ 285 sv_t t_sema; /* sleep on this semaphore : 20 */
@@ -301,14 +294,12 @@ typedef struct xlog_ticket {
301 char t_flags; /* properties of reservation : 1 */ 294 char t_flags; /* properties of reservation : 1 */
302 uint t_trans_type; /* transaction type : 4 */ 295 uint t_trans_type; /* transaction type : 4 */
303 296
304#if defined (XFS_LOG_RES_DEBUG)
305 /* reservation array fields */ 297 /* reservation array fields */
306 uint t_res_num; /* num in array : 4 */ 298 uint t_res_num; /* num in array : 4 */
307 xlog_res_t t_res_arr[XLOG_TIC_LEN_MAX]; /* array of res : X */
308 uint t_res_num_ophdrs; /* num op hdrs : 4 */ 299 uint t_res_num_ophdrs; /* num op hdrs : 4 */
309 uint t_res_arr_sum; /* array sum : 4 */ 300 uint t_res_arr_sum; /* array sum : 4 */
310 uint t_res_o_flow; /* sum overflow : 4 */ 301 uint t_res_o_flow; /* sum overflow : 4 */
311#endif 302 xlog_res_t t_res_arr[XLOG_TIC_LEN_MAX]; /* array of res : 8 * 15 */
312} xlog_ticket_t; 303} xlog_ticket_t;
313 304
314#endif 305#endif
@@ -494,71 +485,13 @@ typedef struct log {
494 485
495#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) 486#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR)
496 487
497#define XLOG_GRANT_SUB_SPACE(log,bytes,type) \
498 { \
499 if (type == 'w') { \
500 (log)->l_grant_write_bytes -= (bytes); \
501 if ((log)->l_grant_write_bytes < 0) { \
502 (log)->l_grant_write_bytes += (log)->l_logsize; \
503 (log)->l_grant_write_cycle--; \
504 } \
505 } else { \
506 (log)->l_grant_reserve_bytes -= (bytes); \
507 if ((log)->l_grant_reserve_bytes < 0) { \
508 (log)->l_grant_reserve_bytes += (log)->l_logsize;\
509 (log)->l_grant_reserve_cycle--; \
510 } \
511 } \
512 }
513#define XLOG_GRANT_ADD_SPACE(log,bytes,type) \
514 { \
515 if (type == 'w') { \
516 (log)->l_grant_write_bytes += (bytes); \
517 if ((log)->l_grant_write_bytes > (log)->l_logsize) { \
518 (log)->l_grant_write_bytes -= (log)->l_logsize; \
519 (log)->l_grant_write_cycle++; \
520 } \
521 } else { \
522 (log)->l_grant_reserve_bytes += (bytes); \
523 if ((log)->l_grant_reserve_bytes > (log)->l_logsize) { \
524 (log)->l_grant_reserve_bytes -= (log)->l_logsize;\
525 (log)->l_grant_reserve_cycle++; \
526 } \
527 } \
528 }
529#define XLOG_INS_TICKETQ(q, tic) \
530 { \
531 if (q) { \
532 (tic)->t_next = (q); \
533 (tic)->t_prev = (q)->t_prev; \
534 (q)->t_prev->t_next = (tic); \
535 (q)->t_prev = (tic); \
536 } else { \
537 (tic)->t_prev = (tic)->t_next = (tic); \
538 (q) = (tic); \
539 } \
540 (tic)->t_flags |= XLOG_TIC_IN_Q; \
541 }
542#define XLOG_DEL_TICKETQ(q, tic) \
543 { \
544 if ((tic) == (tic)->t_next) { \
545 (q) = NULL; \
546 } else { \
547 (q) = (tic)->t_next; \
548 (tic)->t_next->t_prev = (tic)->t_prev; \
549 (tic)->t_prev->t_next = (tic)->t_next; \
550 } \
551 (tic)->t_next = (tic)->t_prev = NULL; \
552 (tic)->t_flags &= ~XLOG_TIC_IN_Q; \
553 }
554 488
555/* common routines */ 489/* common routines */
556extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); 490extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
557extern int xlog_find_tail(xlog_t *log, 491extern int xlog_find_tail(xlog_t *log,
558 xfs_daddr_t *head_blk, 492 xfs_daddr_t *head_blk,
559 xfs_daddr_t *tail_blk, 493 xfs_daddr_t *tail_blk);
560 int readonly); 494extern int xlog_recover(xlog_t *log);
561extern int xlog_recover(xlog_t *log, int readonly);
562extern int xlog_recover_finish(xlog_t *log, int mfsi_flags); 495extern int xlog_recover_finish(xlog_t *log, int mfsi_flags);
563extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); 496extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
564extern void xlog_recover_process_iunlinks(xlog_t *log); 497extern void xlog_recover_process_iunlinks(xlog_t *log);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 8ab7df768063..7d46cbd6a07a 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -783,8 +783,7 @@ int
783xlog_find_tail( 783xlog_find_tail(
784 xlog_t *log, 784 xlog_t *log,
785 xfs_daddr_t *head_blk, 785 xfs_daddr_t *head_blk,
786 xfs_daddr_t *tail_blk, 786 xfs_daddr_t *tail_blk)
787 int readonly)
788{ 787{
789 xlog_rec_header_t *rhead; 788 xlog_rec_header_t *rhead;
790 xlog_op_header_t *op_head; 789 xlog_op_header_t *op_head;
@@ -2563,10 +2562,12 @@ xlog_recover_do_quotaoff_trans(
2563 2562
2564 /* 2563 /*
2565 * The logitem format's flag tells us if this was user quotaoff, 2564 * The logitem format's flag tells us if this was user quotaoff,
2566 * group quotaoff or both. 2565 * group/project quotaoff or both.
2567 */ 2566 */
2568 if (qoff_f->qf_flags & XFS_UQUOTA_ACCT) 2567 if (qoff_f->qf_flags & XFS_UQUOTA_ACCT)
2569 log->l_quotaoffs_flag |= XFS_DQ_USER; 2568 log->l_quotaoffs_flag |= XFS_DQ_USER;
2569 if (qoff_f->qf_flags & XFS_PQUOTA_ACCT)
2570 log->l_quotaoffs_flag |= XFS_DQ_PROJ;
2570 if (qoff_f->qf_flags & XFS_GQUOTA_ACCT) 2571 if (qoff_f->qf_flags & XFS_GQUOTA_ACCT)
2571 log->l_quotaoffs_flag |= XFS_DQ_GROUP; 2572 log->l_quotaoffs_flag |= XFS_DQ_GROUP;
2572 2573
@@ -3890,14 +3891,13 @@ xlog_do_recover(
3890 */ 3891 */
3891int 3892int
3892xlog_recover( 3893xlog_recover(
3893 xlog_t *log, 3894 xlog_t *log)
3894 int readonly)
3895{ 3895{
3896 xfs_daddr_t head_blk, tail_blk; 3896 xfs_daddr_t head_blk, tail_blk;
3897 int error; 3897 int error;
3898 3898
3899 /* find the tail of the log */ 3899 /* find the tail of the log */
3900 if ((error = xlog_find_tail(log, &head_blk, &tail_blk, readonly))) 3900 if ((error = xlog_find_tail(log, &head_blk, &tail_blk)))
3901 return error; 3901 return error;
3902 3902
3903 if (tail_blk != head_blk) { 3903 if (tail_blk != head_blk) {
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 303af86739bf..6088e14f84e3 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -51,7 +51,7 @@ STATIC int xfs_uuid_mount(xfs_mount_t *);
51STATIC void xfs_uuid_unmount(xfs_mount_t *mp); 51STATIC void xfs_uuid_unmount(xfs_mount_t *mp);
52STATIC void xfs_unmountfs_wait(xfs_mount_t *); 52STATIC void xfs_unmountfs_wait(xfs_mount_t *);
53 53
54static struct { 54static const struct {
55 short offset; 55 short offset;
56 short type; /* 0 = integer 56 short type; /* 0 = integer
57 * 1 = binary / string (no translation) 57 * 1 = binary / string (no translation)
@@ -1077,8 +1077,7 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
1077 1077
1078 xfs_iflush_all(mp); 1078 xfs_iflush_all(mp);
1079 1079
1080 XFS_QM_DQPURGEALL(mp, 1080 XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
1081 XFS_QMOPT_UQUOTA | XFS_QMOPT_GQUOTA | XFS_QMOPT_UMOUNTING);
1082 1081
1083 /* 1082 /*
1084 * Flush out the log synchronously so that we know for sure 1083 * Flush out the log synchronously so that we know for sure
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 3432fd5a3986..cd3cf9613a00 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -308,7 +308,6 @@ typedef struct xfs_mount {
308 xfs_buftarg_t *m_ddev_targp; /* saves taking the address */ 308 xfs_buftarg_t *m_ddev_targp; /* saves taking the address */
309 xfs_buftarg_t *m_logdev_targp;/* ptr to log device */ 309 xfs_buftarg_t *m_logdev_targp;/* ptr to log device */
310 xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */ 310 xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */
311#define m_dev m_ddev_targp->pbr_dev
312 __uint8_t m_dircook_elog; /* log d-cookie entry bits */ 311 __uint8_t m_dircook_elog; /* log d-cookie entry bits */
313 __uint8_t m_blkbit_log; /* blocklog + NBBY */ 312 __uint8_t m_blkbit_log; /* blocklog + NBBY */
314 __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ 313 __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */
@@ -393,7 +392,7 @@ typedef struct xfs_mount {
393 user */ 392 user */
394#define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment 393#define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment
395 allocations */ 394 allocations */
396#define XFS_MOUNT_COMPAT_ATTR (1ULL << 8) /* do not use attr2 format */ 395#define XFS_MOUNT_ATTR2 (1ULL << 8) /* allow use of attr2 format */
397 /* (1ULL << 9) -- currently unused */ 396 /* (1ULL << 9) -- currently unused */
398#define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */ 397#define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */
399#define XFS_MOUNT_SHARED (1ULL << 11) /* shared mount */ 398#define XFS_MOUNT_SHARED (1ULL << 11) /* shared mount */
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 4d4e8f4e768e..81a05cfd77d2 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -243,7 +243,6 @@ xfs_rename(
243 xfs_inode_t *inodes[4]; 243 xfs_inode_t *inodes[4];
244 int target_ip_dropped = 0; /* dropped target_ip link? */ 244 int target_ip_dropped = 0; /* dropped target_ip link? */
245 vnode_t *src_dir_vp; 245 vnode_t *src_dir_vp;
246 bhv_desc_t *target_dir_bdp;
247 int spaceres; 246 int spaceres;
248 int target_link_zero = 0; 247 int target_link_zero = 0;
249 int num_inodes; 248 int num_inodes;
@@ -260,14 +259,12 @@ xfs_rename(
260 * Find the XFS behavior descriptor for the target directory 259 * Find the XFS behavior descriptor for the target directory
261 * vnode since it was not handed to us. 260 * vnode since it was not handed to us.
262 */ 261 */
263 target_dir_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(target_dir_vp), 262 target_dp = xfs_vtoi(target_dir_vp);
264 &xfs_vnodeops); 263 if (target_dp == NULL) {
265 if (target_dir_bdp == NULL) {
266 return XFS_ERROR(EXDEV); 264 return XFS_ERROR(EXDEV);
267 } 265 }
268 266
269 src_dp = XFS_BHVTOI(src_dir_bdp); 267 src_dp = XFS_BHVTOI(src_dir_bdp);
270 target_dp = XFS_BHVTOI(target_dir_bdp);
271 mp = src_dp->i_mount; 268 mp = src_dp->i_mount;
272 269
273 if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_RENAME) || 270 if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_RENAME) ||
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index c4b20872f07d..a59c102cf214 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -238,6 +238,7 @@ xfs_bioerror_relse(
238 } 238 }
239 return (EIO); 239 return (EIO);
240} 240}
241
241/* 242/*
242 * Prints out an ALERT message about I/O error. 243 * Prints out an ALERT message about I/O error.
243 */ 244 */
@@ -252,11 +253,9 @@ xfs_ioerror_alert(
252 "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx" 253 "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx"
253 " (\"%s\") error %d buf count %zd", 254 " (\"%s\") error %d buf count %zd",
254 (!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname, 255 (!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname,
255 XFS_BUFTARG_NAME(bp->pb_target), 256 XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
256 (__uint64_t)blkno, 257 (__uint64_t)blkno, func,
257 func, 258 XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp));
258 XFS_BUF_GETERROR(bp),
259 XFS_BUF_COUNT(bp));
260} 259}
261 260
262/* 261/*
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 4a17d335f897..bf168a91ddb8 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -68,18 +68,6 @@ struct xfs_mount;
68 (XFS_SB_VERSION_NUMBITS | \ 68 (XFS_SB_VERSION_NUMBITS | \
69 XFS_SB_VERSION_OKREALFBITS | \ 69 XFS_SB_VERSION_OKREALFBITS | \
70 XFS_SB_VERSION_OKSASHFBITS) 70 XFS_SB_VERSION_OKSASHFBITS)
71#define XFS_SB_VERSION_MKFS(ia,dia,extflag,dirv2,na,sflag,morebits) \
72 (((ia) || (dia) || (extflag) || (dirv2) || (na) || (sflag) || \
73 (morebits)) ? \
74 (XFS_SB_VERSION_4 | \
75 ((ia) ? XFS_SB_VERSION_ALIGNBIT : 0) | \
76 ((dia) ? XFS_SB_VERSION_DALIGNBIT : 0) | \
77 ((extflag) ? XFS_SB_VERSION_EXTFLGBIT : 0) | \
78 ((dirv2) ? XFS_SB_VERSION_DIRV2BIT : 0) | \
79 ((na) ? XFS_SB_VERSION_LOGV2BIT : 0) | \
80 ((sflag) ? XFS_SB_VERSION_SECTORBIT : 0) | \
81 ((morebits) ? XFS_SB_VERSION_MOREBITSBIT : 0)) : \
82 XFS_SB_VERSION_1)
83 71
84/* 72/*
85 * There are two words to hold XFS "feature" bits: the original 73 * There are two words to hold XFS "feature" bits: the original
@@ -105,11 +93,6 @@ struct xfs_mount;
105 (XFS_SB_VERSION2_OKREALFBITS | \ 93 (XFS_SB_VERSION2_OKREALFBITS | \
106 XFS_SB_VERSION2_OKSASHFBITS ) 94 XFS_SB_VERSION2_OKSASHFBITS )
107 95
108/*
109 * mkfs macro to set up sb_features2 word
110 */
111#define XFS_SB_VERSION2_MKFS(resvd1, sbcntr) 0
112
113typedef struct xfs_sb 96typedef struct xfs_sb
114{ 97{
115 __uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */ 98 __uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 279e043d7323..d3d714e6b32a 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1014,6 +1014,7 @@ xfs_trans_cancel(
1014 xfs_log_item_t *lip; 1014 xfs_log_item_t *lip;
1015 int i; 1015 int i;
1016#endif 1016#endif
1017 xfs_mount_t *mp = tp->t_mountp;
1017 1018
1018 /* 1019 /*
1019 * See if the caller is being too lazy to figure out if 1020 * See if the caller is being too lazy to figure out if
@@ -1026,9 +1027,10 @@ xfs_trans_cancel(
1026 * filesystem. This happens in paths where we detect 1027 * filesystem. This happens in paths where we detect
1027 * corruption and decide to give up. 1028 * corruption and decide to give up.
1028 */ 1029 */
1029 if ((tp->t_flags & XFS_TRANS_DIRTY) && 1030 if ((tp->t_flags & XFS_TRANS_DIRTY) && !XFS_FORCED_SHUTDOWN(mp)) {
1030 !XFS_FORCED_SHUTDOWN(tp->t_mountp)) 1031 XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp);
1031 xfs_force_shutdown(tp->t_mountp, XFS_CORRUPT_INCORE); 1032 xfs_force_shutdown(mp, XFS_CORRUPT_INCORE);
1033 }
1032#ifdef DEBUG 1034#ifdef DEBUG
1033 if (!(flags & XFS_TRANS_ABORT)) { 1035 if (!(flags & XFS_TRANS_ABORT)) {
1034 licp = &(tp->t_items); 1036 licp = &(tp->t_items);
@@ -1040,7 +1042,7 @@ xfs_trans_cancel(
1040 } 1042 }
1041 1043
1042 lip = lidp->lid_item; 1044 lip = lidp->lid_item;
1043 if (!XFS_FORCED_SHUTDOWN(tp->t_mountp)) 1045 if (!XFS_FORCED_SHUTDOWN(mp))
1044 ASSERT(!(lip->li_type == XFS_LI_EFD)); 1046 ASSERT(!(lip->li_type == XFS_LI_EFD));
1045 } 1047 }
1046 licp = licp->lic_next; 1048 licp = licp->lic_next;
@@ -1048,7 +1050,7 @@ xfs_trans_cancel(
1048 } 1050 }
1049#endif 1051#endif
1050 xfs_trans_unreserve_and_mod_sb(tp); 1052 xfs_trans_unreserve_and_mod_sb(tp);
1051 XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(tp->t_mountp, tp); 1053 XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp);
1052 1054
1053 if (tp->t_ticket) { 1055 if (tp->t_ticket) {
1054 if (flags & XFS_TRANS_RELEASE_LOG_RES) { 1056 if (flags & XFS_TRANS_RELEASE_LOG_RES) {
@@ -1057,7 +1059,7 @@ xfs_trans_cancel(
1057 } else { 1059 } else {
1058 log_flags = 0; 1060 log_flags = 0;
1059 } 1061 }
1060 xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, log_flags); 1062 xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
1061 } 1063 }
1062 1064
1063 /* mark this thread as no longer being in a transaction */ 1065 /* mark this thread as no longer being in a transaction */
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index a889963fdd14..d77901c07f63 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -973,7 +973,6 @@ void xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *);
973void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *); 973void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *);
974void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *); 974void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *);
975void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); 975void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
976void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
977void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); 976void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);
978void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); 977void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
979void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); 978void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index fefe1d60377f..34654ec6ae10 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -55,16 +55,13 @@ xfs_get_dir_entry(
55 xfs_inode_t **ipp) 55 xfs_inode_t **ipp)
56{ 56{
57 vnode_t *vp; 57 vnode_t *vp;
58 bhv_desc_t *bdp;
59 58
60 vp = VNAME_TO_VNODE(dentry); 59 vp = VNAME_TO_VNODE(dentry);
61 bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops); 60
62 if (!bdp) { 61 *ipp = xfs_vtoi(vp);
63 *ipp = NULL; 62 if (!*ipp)
64 return XFS_ERROR(ENOENT); 63 return XFS_ERROR(ENOENT);
65 }
66 VN_HOLD(vp); 64 VN_HOLD(vp);
67 *ipp = XFS_BHVTOI(bdp);
68 return 0; 65 return 0;
69} 66}
70 67
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 7bdbd991ab1c..b6ad370fab3d 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -53,6 +53,7 @@
53#include "xfs_acl.h" 53#include "xfs_acl.h"
54#include "xfs_attr.h" 54#include "xfs_attr.h"
55#include "xfs_clnt.h" 55#include "xfs_clnt.h"
56#include "xfs_fsops.h"
56 57
57STATIC int xfs_sync(bhv_desc_t *, int, cred_t *); 58STATIC int xfs_sync(bhv_desc_t *, int, cred_t *);
58 59
@@ -290,8 +291,8 @@ xfs_start_flags(
290 mp->m_flags |= XFS_MOUNT_IDELETE; 291 mp->m_flags |= XFS_MOUNT_IDELETE;
291 if (ap->flags & XFSMNT_DIRSYNC) 292 if (ap->flags & XFSMNT_DIRSYNC)
292 mp->m_flags |= XFS_MOUNT_DIRSYNC; 293 mp->m_flags |= XFS_MOUNT_DIRSYNC;
293 if (ap->flags & XFSMNT_COMPAT_ATTR) 294 if (ap->flags & XFSMNT_ATTR2)
294 mp->m_flags |= XFS_MOUNT_COMPAT_ATTR; 295 mp->m_flags |= XFS_MOUNT_ATTR2;
295 296
296 if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE) 297 if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
297 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; 298 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
@@ -312,6 +313,8 @@ xfs_start_flags(
312 mp->m_flags |= XFS_MOUNT_NOUUID; 313 mp->m_flags |= XFS_MOUNT_NOUUID;
313 if (ap->flags & XFSMNT_BARRIER) 314 if (ap->flags & XFSMNT_BARRIER)
314 mp->m_flags |= XFS_MOUNT_BARRIER; 315 mp->m_flags |= XFS_MOUNT_BARRIER;
316 else
317 mp->m_flags &= ~XFS_MOUNT_BARRIER;
315 318
316 return 0; 319 return 0;
317} 320}
@@ -330,10 +333,11 @@ xfs_finish_flags(
330 333
331 /* Fail a mount where the logbuf is smaller then the log stripe */ 334 /* Fail a mount where the logbuf is smaller then the log stripe */
332 if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) { 335 if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) {
333 if ((ap->logbufsize == -1) && 336 if ((ap->logbufsize <= 0) &&
334 (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) { 337 (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
335 mp->m_logbsize = mp->m_sb.sb_logsunit; 338 mp->m_logbsize = mp->m_sb.sb_logsunit;
336 } else if (ap->logbufsize < mp->m_sb.sb_logsunit) { 339 } else if (ap->logbufsize > 0 &&
340 ap->logbufsize < mp->m_sb.sb_logsunit) {
337 cmn_err(CE_WARN, 341 cmn_err(CE_WARN,
338 "XFS: logbuf size must be greater than or equal to log stripe size"); 342 "XFS: logbuf size must be greater than or equal to log stripe size");
339 return XFS_ERROR(EINVAL); 343 return XFS_ERROR(EINVAL);
@@ -347,6 +351,10 @@ xfs_finish_flags(
347 } 351 }
348 } 352 }
349 353
354 if (XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
355 mp->m_flags |= XFS_MOUNT_ATTR2;
356 }
357
350 /* 358 /*
351 * prohibit r/w mounts of read-only filesystems 359 * prohibit r/w mounts of read-only filesystems
352 */ 360 */
@@ -382,10 +390,6 @@ xfs_finish_flags(
382 return XFS_ERROR(EINVAL); 390 return XFS_ERROR(EINVAL);
383 } 391 }
384 392
385 if (XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
386 mp->m_flags &= ~XFS_MOUNT_COMPAT_ATTR;
387 }
388
389 return 0; 393 return 0;
390} 394}
391 395
@@ -504,13 +508,13 @@ xfs_mount(
504 if (error) 508 if (error)
505 goto error2; 509 goto error2;
506 510
511 if ((mp->m_flags & XFS_MOUNT_BARRIER) && !(vfsp->vfs_flag & VFS_RDONLY))
512 xfs_mountfs_check_barriers(mp);
513
507 error = XFS_IOINIT(vfsp, args, flags); 514 error = XFS_IOINIT(vfsp, args, flags);
508 if (error) 515 if (error)
509 goto error2; 516 goto error2;
510 517
511 if ((args->flags & XFSMNT_BARRIER) &&
512 !(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY))
513 xfs_mountfs_check_barriers(mp);
514 return 0; 518 return 0;
515 519
516error2: 520error2:
@@ -655,6 +659,11 @@ xfs_mntupdate(
655 else 659 else
656 mp->m_flags &= ~XFS_MOUNT_NOATIME; 660 mp->m_flags &= ~XFS_MOUNT_NOATIME;
657 661
662 if (args->flags & XFSMNT_BARRIER)
663 mp->m_flags |= XFS_MOUNT_BARRIER;
664 else
665 mp->m_flags &= ~XFS_MOUNT_BARRIER;
666
658 if ((vfsp->vfs_flag & VFS_RDONLY) && 667 if ((vfsp->vfs_flag & VFS_RDONLY) &&
659 !(*flags & MS_RDONLY)) { 668 !(*flags & MS_RDONLY)) {
660 vfsp->vfs_flag &= ~VFS_RDONLY; 669 vfsp->vfs_flag &= ~VFS_RDONLY;
@@ -1634,6 +1643,7 @@ xfs_vget(
1634#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ 1643#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */
1635#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and 1644#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and
1636 * unwritten extent conversion */ 1645 * unwritten extent conversion */
1646#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */
1637#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */ 1647#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
1638#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ 1648#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
1639#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ 1649#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */
@@ -1680,7 +1690,6 @@ xfs_parseargs(
1680 int iosize; 1690 int iosize;
1681 1691
1682 args->flags2 |= XFSMNT2_COMPAT_IOSIZE; 1692 args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
1683 args->flags |= XFSMNT_COMPAT_ATTR;
1684 1693
1685#if 0 /* XXX: off by default, until some remaining issues ironed out */ 1694#if 0 /* XXX: off by default, until some remaining issues ironed out */
1686 args->flags |= XFSMNT_IDELETE; /* default to on */ 1695 args->flags |= XFSMNT_IDELETE; /* default to on */
@@ -1806,6 +1815,8 @@ xfs_parseargs(
1806 args->flags |= XFSMNT_NOUUID; 1815 args->flags |= XFSMNT_NOUUID;
1807 } else if (!strcmp(this_char, MNTOPT_BARRIER)) { 1816 } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
1808 args->flags |= XFSMNT_BARRIER; 1817 args->flags |= XFSMNT_BARRIER;
1818 } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
1819 args->flags &= ~XFSMNT_BARRIER;
1809 } else if (!strcmp(this_char, MNTOPT_IKEEP)) { 1820 } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
1810 args->flags &= ~XFSMNT_IDELETE; 1821 args->flags &= ~XFSMNT_IDELETE;
1811 } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { 1822 } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
@@ -1815,9 +1826,9 @@ xfs_parseargs(
1815 } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) { 1826 } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
1816 args->flags2 |= XFSMNT2_COMPAT_IOSIZE; 1827 args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
1817 } else if (!strcmp(this_char, MNTOPT_ATTR2)) { 1828 } else if (!strcmp(this_char, MNTOPT_ATTR2)) {
1818 args->flags &= ~XFSMNT_COMPAT_ATTR; 1829 args->flags |= XFSMNT_ATTR2;
1819 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { 1830 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
1820 args->flags |= XFSMNT_COMPAT_ATTR; 1831 args->flags &= ~XFSMNT_ATTR2;
1821 } else if (!strcmp(this_char, "osyncisdsync")) { 1832 } else if (!strcmp(this_char, "osyncisdsync")) {
1822 /* no-op, this is now the default */ 1833 /* no-op, this is now the default */
1823printk("XFS: osyncisdsync is now the default, option is deprecated.\n"); 1834printk("XFS: osyncisdsync is now the default, option is deprecated.\n");
@@ -1892,7 +1903,6 @@ xfs_showargs(
1892 { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID }, 1903 { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID },
1893 { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY }, 1904 { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY },
1894 { XFS_MOUNT_OSYNCISOSYNC, "," MNTOPT_OSYNCISOSYNC }, 1905 { XFS_MOUNT_OSYNCISOSYNC, "," MNTOPT_OSYNCISOSYNC },
1895 { XFS_MOUNT_BARRIER, "," MNTOPT_BARRIER },
1896 { XFS_MOUNT_IDELETE, "," MNTOPT_NOIKEEP }, 1906 { XFS_MOUNT_IDELETE, "," MNTOPT_NOIKEEP },
1897 { 0, NULL } 1907 { 0, NULL }
1898 }; 1908 };
@@ -1914,33 +1924,28 @@ xfs_showargs(
1914 1924
1915 if (mp->m_logbufs > 0) 1925 if (mp->m_logbufs > 0)
1916 seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs); 1926 seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
1917
1918 if (mp->m_logbsize > 0) 1927 if (mp->m_logbsize > 0)
1919 seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10); 1928 seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
1920 1929
1921 if (mp->m_logname) 1930 if (mp->m_logname)
1922 seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname); 1931 seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
1923
1924 if (mp->m_rtname) 1932 if (mp->m_rtname)
1925 seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname); 1933 seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
1926 1934
1927 if (mp->m_dalign > 0) 1935 if (mp->m_dalign > 0)
1928 seq_printf(m, "," MNTOPT_SUNIT "=%d", 1936 seq_printf(m, "," MNTOPT_SUNIT "=%d",
1929 (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); 1937 (int)XFS_FSB_TO_BB(mp, mp->m_dalign));
1930
1931 if (mp->m_swidth > 0) 1938 if (mp->m_swidth > 0)
1932 seq_printf(m, "," MNTOPT_SWIDTH "=%d", 1939 seq_printf(m, "," MNTOPT_SWIDTH "=%d",
1933 (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); 1940 (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
1934 1941
1935 if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR))
1936 seq_printf(m, "," MNTOPT_ATTR2);
1937
1938 if (!(mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE)) 1942 if (!(mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE))
1939 seq_printf(m, "," MNTOPT_LARGEIO); 1943 seq_printf(m, "," MNTOPT_LARGEIO);
1944 if (mp->m_flags & XFS_MOUNT_BARRIER)
1945 seq_printf(m, "," MNTOPT_BARRIER);
1940 1946
1941 if (!(vfsp->vfs_flag & VFS_32BITINODES)) 1947 if (!(vfsp->vfs_flag & VFS_32BITINODES))
1942 seq_printf(m, "," MNTOPT_64BITINODE); 1948 seq_printf(m, "," MNTOPT_64BITINODE);
1943
1944 if (vfsp->vfs_flag & VFS_GRPID) 1949 if (vfsp->vfs_flag & VFS_GRPID)
1945 seq_printf(m, "," MNTOPT_GRPID); 1950 seq_printf(m, "," MNTOPT_GRPID);
1946 1951
@@ -1959,6 +1964,7 @@ xfs_freeze(
1959 /* Push the superblock and write an unmount record */ 1964 /* Push the superblock and write an unmount record */
1960 xfs_log_unmount_write(mp); 1965 xfs_log_unmount_write(mp);
1961 xfs_unmountfs_writesb(mp); 1966 xfs_unmountfs_writesb(mp);
1967 xfs_fs_log_dummy(mp);
1962} 1968}
1963 1969
1964 1970
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index e92cacde02f5..8076cc981e11 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -185,8 +185,7 @@ xfs_getattr(
185 break; 185 break;
186 } 186 }
187 187
188 vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec; 188 vn_atime_to_timespec(vp, &vap->va_atime);
189 vap->va_atime.tv_nsec = ip->i_d.di_atime.t_nsec;
190 vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec; 189 vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
191 vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; 190 vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
192 vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec; 191 vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
@@ -544,24 +543,6 @@ xfs_setattr(
544 } 543 }
545 544
546 /* 545 /*
547 * Can't set extent size unless the file is marked, or
548 * about to be marked as a realtime file.
549 *
550 * This check will be removed when fixed size extents
551 * with buffered data writes is implemented.
552 *
553 */
554 if ((mask & XFS_AT_EXTSIZE) &&
555 ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
556 vap->va_extsize) &&
557 (!((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ||
558 ((mask & XFS_AT_XFLAGS) &&
559 (vap->va_xflags & XFS_XFLAG_REALTIME))))) {
560 code = XFS_ERROR(EINVAL);
561 goto error_return;
562 }
563
564 /*
565 * Can't change realtime flag if any extents are allocated. 546 * Can't change realtime flag if any extents are allocated.
566 */ 547 */
567 if ((ip->i_d.di_nextents || ip->i_delayed_blks) && 548 if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
@@ -823,13 +804,17 @@ xfs_setattr(
823 di_flags |= XFS_DIFLAG_RTINHERIT; 804 di_flags |= XFS_DIFLAG_RTINHERIT;
824 if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS) 805 if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS)
825 di_flags |= XFS_DIFLAG_NOSYMLINKS; 806 di_flags |= XFS_DIFLAG_NOSYMLINKS;
826 } else { 807 if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT)
808 di_flags |= XFS_DIFLAG_EXTSZINHERIT;
809 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
827 if (vap->va_xflags & XFS_XFLAG_REALTIME) { 810 if (vap->va_xflags & XFS_XFLAG_REALTIME) {
828 di_flags |= XFS_DIFLAG_REALTIME; 811 di_flags |= XFS_DIFLAG_REALTIME;
829 ip->i_iocore.io_flags |= XFS_IOCORE_RT; 812 ip->i_iocore.io_flags |= XFS_IOCORE_RT;
830 } else { 813 } else {
831 ip->i_iocore.io_flags &= ~XFS_IOCORE_RT; 814 ip->i_iocore.io_flags &= ~XFS_IOCORE_RT;
832 } 815 }
816 if (vap->va_xflags & XFS_XFLAG_EXTSIZE)
817 di_flags |= XFS_DIFLAG_EXTSIZE;
833 } 818 }
834 ip->i_d.di_flags = di_flags; 819 ip->i_d.di_flags = di_flags;
835 } 820 }
@@ -999,10 +984,6 @@ xfs_readlink(
999 goto error_return; 984 goto error_return;
1000 } 985 }
1001 986
1002 if (!(ioflags & IO_INVIS)) {
1003 xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
1004 }
1005
1006 /* 987 /*
1007 * See if the symlink is stored inline. 988 * See if the symlink is stored inline.
1008 */ 989 */
@@ -1234,7 +1215,8 @@ xfs_inactive_free_eofblocks(
1234 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1215 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1235 1216
1236 if (!error && (nimaps != 0) && 1217 if (!error && (nimaps != 0) &&
1237 (imap.br_startblock != HOLESTARTBLOCK)) { 1218 (imap.br_startblock != HOLESTARTBLOCK ||
1219 ip->i_delayed_blks)) {
1238 /* 1220 /*
1239 * Attach the dquots to the inode up front. 1221 * Attach the dquots to the inode up front.
1240 */ 1222 */
@@ -1569,9 +1551,11 @@ xfs_release(
1569 1551
1570 if (ip->i_d.di_nlink != 0) { 1552 if (ip->i_d.di_nlink != 0) {
1571 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1553 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
1572 ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0)) && 1554 ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 ||
1555 ip->i_delayed_blks > 0)) &&
1573 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 1556 (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
1574 (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)))) { 1557 (!(ip->i_d.di_flags &
1558 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
1575 if ((error = xfs_inactive_free_eofblocks(mp, ip))) 1559 if ((error = xfs_inactive_free_eofblocks(mp, ip)))
1576 return (error); 1560 return (error);
1577 /* Update linux inode block count after free above */ 1561 /* Update linux inode block count after free above */
@@ -1628,7 +1612,8 @@ xfs_inactive(
1628 * only one with a reference to the inode. 1612 * only one with a reference to the inode.
1629 */ 1613 */
1630 truncate = ((ip->i_d.di_nlink == 0) && 1614 truncate = ((ip->i_d.di_nlink == 0) &&
1631 ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0)) && 1615 ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0) ||
1616 (ip->i_delayed_blks > 0)) &&
1632 ((ip->i_d.di_mode & S_IFMT) == S_IFREG)); 1617 ((ip->i_d.di_mode & S_IFMT) == S_IFREG));
1633 1618
1634 mp = ip->i_mount; 1619 mp = ip->i_mount;
@@ -1646,10 +1631,12 @@ xfs_inactive(
1646 1631
1647 if (ip->i_d.di_nlink != 0) { 1632 if (ip->i_d.di_nlink != 0) {
1648 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1633 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
1649 ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0)) && 1634 ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 ||
1650 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 1635 ip->i_delayed_blks > 0)) &&
1651 (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)) || 1636 (ip->i_df.if_flags & XFS_IFEXTENTS) &&
1652 (ip->i_delayed_blks != 0))) { 1637 (!(ip->i_d.di_flags &
1638 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
1639 (ip->i_delayed_blks != 0)))) {
1653 if ((error = xfs_inactive_free_eofblocks(mp, ip))) 1640 if ((error = xfs_inactive_free_eofblocks(mp, ip)))
1654 return (VN_INACTIVE_CACHE); 1641 return (VN_INACTIVE_CACHE);
1655 /* Update linux inode block count after free above */ 1642 /* Update linux inode block count after free above */
@@ -2593,7 +2580,6 @@ xfs_link(
2593 int cancel_flags; 2580 int cancel_flags;
2594 int committed; 2581 int committed;
2595 vnode_t *target_dir_vp; 2582 vnode_t *target_dir_vp;
2596 bhv_desc_t *src_bdp;
2597 int resblks; 2583 int resblks;
2598 char *target_name = VNAME(dentry); 2584 char *target_name = VNAME(dentry);
2599 int target_namelen; 2585 int target_namelen;
@@ -2606,8 +2592,7 @@ xfs_link(
2606 if (VN_ISDIR(src_vp)) 2592 if (VN_ISDIR(src_vp))
2607 return XFS_ERROR(EPERM); 2593 return XFS_ERROR(EPERM);
2608 2594
2609 src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops); 2595 sip = xfs_vtoi(src_vp);
2610 sip = XFS_BHVTOI(src_bdp);
2611 tdp = XFS_BHVTOI(target_dir_bdp); 2596 tdp = XFS_BHVTOI(target_dir_bdp);
2612 mp = tdp->i_mount; 2597 mp = tdp->i_mount;
2613 if (XFS_FORCED_SHUTDOWN(mp)) 2598 if (XFS_FORCED_SHUTDOWN(mp))
@@ -3240,7 +3225,6 @@ xfs_readdir(
3240 xfs_trans_t *tp = NULL; 3225 xfs_trans_t *tp = NULL;
3241 int error = 0; 3226 int error = 0;
3242 uint lock_mode; 3227 uint lock_mode;
3243 xfs_off_t start_offset;
3244 3228
3245 vn_trace_entry(BHV_TO_VNODE(dir_bdp), __FUNCTION__, 3229 vn_trace_entry(BHV_TO_VNODE(dir_bdp), __FUNCTION__,
3246 (inst_t *)__return_address); 3230 (inst_t *)__return_address);
@@ -3251,11 +3235,7 @@ xfs_readdir(
3251 } 3235 }
3252 3236
3253 lock_mode = xfs_ilock_map_shared(dp); 3237 lock_mode = xfs_ilock_map_shared(dp);
3254 start_offset = uiop->uio_offset;
3255 error = XFS_DIR_GETDENTS(dp->i_mount, tp, dp, uiop, eofp); 3238 error = XFS_DIR_GETDENTS(dp->i_mount, tp, dp, uiop, eofp);
3256 if (start_offset != uiop->uio_offset) {
3257 xfs_ichgtime(dp, XFS_ICHGTIME_ACC);
3258 }
3259 xfs_iunlock_map_shared(dp, lock_mode); 3239 xfs_iunlock_map_shared(dp, lock_mode);
3260 return error; 3240 return error;
3261} 3241}
@@ -3832,7 +3812,12 @@ xfs_reclaim(
3832 vn_iowait(vp); 3812 vn_iowait(vp);
3833 3813
3834 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); 3814 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
3835 ASSERT(VN_CACHED(vp) == 0); 3815
3816 /*
3817 * Make sure the atime in the XFS inode is correct before freeing the
3818 * Linux inode.
3819 */
3820 xfs_synchronize_atime(ip);
3836 3821
3837 /* If we have nothing to flush with this inode then complete the 3822 /* If we have nothing to flush with this inode then complete the
3838 * teardown now, otherwise break the link between the xfs inode 3823 * teardown now, otherwise break the link between the xfs inode
@@ -4002,42 +3987,36 @@ xfs_alloc_file_space(
4002 int alloc_type, 3987 int alloc_type,
4003 int attr_flags) 3988 int attr_flags)
4004{ 3989{
3990 xfs_mount_t *mp = ip->i_mount;
3991 xfs_off_t count;
4005 xfs_filblks_t allocated_fsb; 3992 xfs_filblks_t allocated_fsb;
4006 xfs_filblks_t allocatesize_fsb; 3993 xfs_filblks_t allocatesize_fsb;
4007 int committed; 3994 xfs_extlen_t extsz, temp;
4008 xfs_off_t count; 3995 xfs_fileoff_t startoffset_fsb;
4009 xfs_filblks_t datablocks;
4010 int error;
4011 xfs_fsblock_t firstfsb; 3996 xfs_fsblock_t firstfsb;
4012 xfs_bmap_free_t free_list; 3997 int nimaps;
4013 xfs_bmbt_irec_t *imapp; 3998 int bmapi_flag;
4014 xfs_bmbt_irec_t imaps[1]; 3999 int quota_flag;
4015 xfs_mount_t *mp;
4016 int numrtextents;
4017 int reccount;
4018 uint resblks;
4019 int rt; 4000 int rt;
4020 int rtextsize;
4021 xfs_fileoff_t startoffset_fsb;
4022 xfs_trans_t *tp; 4001 xfs_trans_t *tp;
4023 int xfs_bmapi_flags; 4002 xfs_bmbt_irec_t imaps[1], *imapp;
4003 xfs_bmap_free_t free_list;
4004 uint qblocks, resblks, resrtextents;
4005 int committed;
4006 int error;
4024 4007
4025 vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); 4008 vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
4026 mp = ip->i_mount;
4027 4009
4028 if (XFS_FORCED_SHUTDOWN(mp)) 4010 if (XFS_FORCED_SHUTDOWN(mp))
4029 return XFS_ERROR(EIO); 4011 return XFS_ERROR(EIO);
4030 4012
4031 /* 4013 rt = XFS_IS_REALTIME_INODE(ip);
4032 * determine if this is a realtime file 4014 if (unlikely(rt)) {
4033 */ 4015 if (!(extsz = ip->i_d.di_extsize))
4034 if ((rt = XFS_IS_REALTIME_INODE(ip)) != 0) { 4016 extsz = mp->m_sb.sb_rextsize;
4035 if (ip->i_d.di_extsize) 4017 } else {
4036 rtextsize = ip->i_d.di_extsize; 4018 extsz = ip->i_d.di_extsize;
4037 else 4019 }
4038 rtextsize = mp->m_sb.sb_rextsize;
4039 } else
4040 rtextsize = 0;
4041 4020
4042 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 4021 if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
4043 return error; 4022 return error;
@@ -4048,8 +4027,8 @@ xfs_alloc_file_space(
4048 count = len; 4027 count = len;
4049 error = 0; 4028 error = 0;
4050 imapp = &imaps[0]; 4029 imapp = &imaps[0];
4051 reccount = 1; 4030 nimaps = 1;
4052 xfs_bmapi_flags = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); 4031 bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
4053 startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 4032 startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
4054 allocatesize_fsb = XFS_B_TO_FSB(mp, count); 4033 allocatesize_fsb = XFS_B_TO_FSB(mp, count);
4055 4034
@@ -4070,43 +4049,51 @@ xfs_alloc_file_space(
4070 } 4049 }
4071 4050
4072 /* 4051 /*
4073 * allocate file space until done or until there is an error 4052 * Allocate file space until done or until there is an error
4074 */ 4053 */
4075retry: 4054retry:
4076 while (allocatesize_fsb && !error) { 4055 while (allocatesize_fsb && !error) {
4056 xfs_fileoff_t s, e;
4057
4077 /* 4058 /*
4078 * determine if reserving space on 4059 * Determine space reservations for data/realtime.
4079 * the data or realtime partition.
4080 */ 4060 */
4081 if (rt) { 4061 if (unlikely(extsz)) {
4082 xfs_fileoff_t s, e;
4083
4084 s = startoffset_fsb; 4062 s = startoffset_fsb;
4085 do_div(s, rtextsize); 4063 do_div(s, extsz);
4086 s *= rtextsize; 4064 s *= extsz;
4087 e = roundup_64(startoffset_fsb + allocatesize_fsb, 4065 e = startoffset_fsb + allocatesize_fsb;
4088 rtextsize); 4066 if ((temp = do_mod(startoffset_fsb, extsz)))
4089 numrtextents = (int)(e - s) / mp->m_sb.sb_rextsize; 4067 e += temp;
4090 datablocks = 0; 4068 if ((temp = do_mod(e, extsz)))
4069 e += extsz - temp;
4070 } else {
4071 s = 0;
4072 e = allocatesize_fsb;
4073 }
4074
4075 if (unlikely(rt)) {
4076 resrtextents = qblocks = (uint)(e - s);
4077 resrtextents /= mp->m_sb.sb_rextsize;
4078 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
4079 quota_flag = XFS_QMOPT_RES_RTBLKS;
4091 } else { 4080 } else {
4092 datablocks = allocatesize_fsb; 4081 resrtextents = 0;
4093 numrtextents = 0; 4082 resblks = qblocks = \
4083 XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s));
4084 quota_flag = XFS_QMOPT_RES_REGBLKS;
4094 } 4085 }
4095 4086
4096 /* 4087 /*
4097 * allocate and setup the transaction 4088 * Allocate and setup the transaction.
4098 */ 4089 */
4099 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 4090 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
4100 resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks); 4091 error = xfs_trans_reserve(tp, resblks,
4101 error = xfs_trans_reserve(tp, 4092 XFS_WRITE_LOG_RES(mp), resrtextents,
4102 resblks,
4103 XFS_WRITE_LOG_RES(mp),
4104 numrtextents,
4105 XFS_TRANS_PERM_LOG_RES, 4093 XFS_TRANS_PERM_LOG_RES,
4106 XFS_WRITE_LOG_COUNT); 4094 XFS_WRITE_LOG_COUNT);
4107
4108 /* 4095 /*
4109 * check for running out of space 4096 * Check for running out of space
4110 */ 4097 */
4111 if (error) { 4098 if (error) {
4112 /* 4099 /*
@@ -4117,8 +4104,8 @@ retry:
4117 break; 4104 break;
4118 } 4105 }
4119 xfs_ilock(ip, XFS_ILOCK_EXCL); 4106 xfs_ilock(ip, XFS_ILOCK_EXCL);
4120 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, 4107 error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
4121 ip->i_udquot, ip->i_gdquot, resblks, 0, 0); 4108 qblocks, 0, quota_flag);
4122 if (error) 4109 if (error)
4123 goto error1; 4110 goto error1;
4124 4111
@@ -4126,19 +4113,19 @@ retry:
4126 xfs_trans_ihold(tp, ip); 4113 xfs_trans_ihold(tp, ip);
4127 4114
4128 /* 4115 /*
4129 * issue the bmapi() call to allocate the blocks 4116 * Issue the xfs_bmapi() call to allocate the blocks
4130 */ 4117 */
4131 XFS_BMAP_INIT(&free_list, &firstfsb); 4118 XFS_BMAP_INIT(&free_list, &firstfsb);
4132 error = xfs_bmapi(tp, ip, startoffset_fsb, 4119 error = xfs_bmapi(tp, ip, startoffset_fsb,
4133 allocatesize_fsb, xfs_bmapi_flags, 4120 allocatesize_fsb, bmapi_flag,
4134 &firstfsb, 0, imapp, &reccount, 4121 &firstfsb, 0, imapp, &nimaps,
4135 &free_list); 4122 &free_list);
4136 if (error) { 4123 if (error) {
4137 goto error0; 4124 goto error0;
4138 } 4125 }
4139 4126
4140 /* 4127 /*
4141 * complete the transaction 4128 * Complete the transaction
4142 */ 4129 */
4143 error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); 4130 error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed);
4144 if (error) { 4131 if (error) {
@@ -4153,7 +4140,7 @@ retry:
4153 4140
4154 allocated_fsb = imapp->br_blockcount; 4141 allocated_fsb = imapp->br_blockcount;
4155 4142
4156 if (reccount == 0) { 4143 if (nimaps == 0) {
4157 error = XFS_ERROR(ENOSPC); 4144 error = XFS_ERROR(ENOSPC);
4158 break; 4145 break;
4159 } 4146 }
@@ -4176,9 +4163,11 @@ dmapi_enospc_check:
4176 4163
4177 return error; 4164 return error;
4178 4165
4179 error0: 4166error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
4180 xfs_bmap_cancel(&free_list); 4167 xfs_bmap_cancel(&free_list);
4181 error1: 4168 XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
4169
4170error1: /* Just cancel transaction */
4182 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 4171 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
4183 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4172 xfs_iunlock(ip, XFS_ILOCK_EXCL);
4184 goto dmapi_enospc_check; 4173 goto dmapi_enospc_check;
@@ -4423,8 +4412,8 @@ xfs_free_file_space(
4423 } 4412 }
4424 xfs_ilock(ip, XFS_ILOCK_EXCL); 4413 xfs_ilock(ip, XFS_ILOCK_EXCL);
4425 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, 4414 error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
4426 ip->i_udquot, ip->i_gdquot, resblks, 0, rt ? 4415 ip->i_udquot, ip->i_gdquot, resblks, 0,
4427 XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); 4416 XFS_QMOPT_RES_REGBLKS);
4428 if (error) 4417 if (error)
4429 goto error1; 4418 goto error1;
4430 4419