aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_inode.c3
-rw-r--r--fs/bfs/dir.c2
-rw-r--r--fs/dcache.c10
-rw-r--r--fs/exec.c2
-rw-r--r--fs/inotify_user.c27
-rw-r--r--fs/nfs/super.c6
-rw-r--r--fs/ocfs2/aops.c2
-rw-r--r--fs/partitions/check.c4
-rw-r--r--fs/proc/generic.c4
-rw-r--r--fs/proc/proc_misc.c12
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/ubifs/budget.c114
-rw-r--r--fs/ubifs/debug.c2
-rw-r--r--fs/ubifs/dir.c3
-rw-r--r--fs/ubifs/file.c20
-rw-r--r--fs/ubifs/find.c19
-rw-r--r--fs/ubifs/gc.c20
-rw-r--r--fs/ubifs/misc.h49
-rw-r--r--fs/ubifs/super.c25
-rw-r--r--fs/ubifs/tnc.c116
-rw-r--r--fs/ubifs/ubifs-media.h2
-rw-r--r--fs/ubifs/ubifs.h14
-rw-r--r--fs/udf/file.c1
-rw-r--r--fs/udf/ialloc.c44
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c20
-rw-r--r--fs/xfs/xfs_buf_item.c44
-rw-r--r--fs/xfs/xfs_dfrag.c9
-rw-r--r--fs/xfs/xfs_inode.c94
-rw-r--r--fs/xfs/xfs_log.c62
-rw-r--r--fs/xfs/xfs_log_priv.h1
-rw-r--r--fs/xfs/xfs_vnodeops.c26
32 files changed, 426 insertions, 337 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index c95295c65045..e83aa5ebe861 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -626,8 +626,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
626 return NULL; 626 return NULL;
627 627
628error: 628error:
629 if (fid) 629 p9_client_clunk(fid);
630 p9_client_clunk(fid);
631 630
632 return ERR_PTR(result); 631 return ERR_PTR(result);
633} 632}
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 87ee5ccee348..ed8feb052df9 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -125,8 +125,8 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode,
125 inode->i_ino); 125 inode->i_ino);
126 if (err) { 126 if (err) {
127 inode_dec_link_count(inode); 127 inode_dec_link_count(inode);
128 iput(inode);
129 mutex_unlock(&info->bfs_lock); 128 mutex_unlock(&info->bfs_lock);
129 iput(inode);
130 return err; 130 return err;
131 } 131 }
132 mutex_unlock(&info->bfs_lock); 132 mutex_unlock(&info->bfs_lock);
diff --git a/fs/dcache.c b/fs/dcache.c
index 80e93956aced..e7a1a99b7464 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1395,6 +1395,10 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1395 if (dentry->d_parent != parent) 1395 if (dentry->d_parent != parent)
1396 goto next; 1396 goto next;
1397 1397
1398 /* non-existing due to RCU? */
1399 if (d_unhashed(dentry))
1400 goto next;
1401
1398 /* 1402 /*
1399 * It is safe to compare names since d_move() cannot 1403 * It is safe to compare names since d_move() cannot
1400 * change the qstr (protected by d_lock). 1404 * change the qstr (protected by d_lock).
@@ -1410,10 +1414,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1410 goto next; 1414 goto next;
1411 } 1415 }
1412 1416
1413 if (!d_unhashed(dentry)) { 1417 atomic_inc(&dentry->d_count);
1414 atomic_inc(&dentry->d_count); 1418 found = dentry;
1415 found = dentry;
1416 }
1417 spin_unlock(&dentry->d_lock); 1419 spin_unlock(&dentry->d_lock);
1418 break; 1420 break;
1419next: 1421next:
diff --git a/fs/exec.c b/fs/exec.c
index 32993beecbe9..cecee501ce78 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -752,11 +752,11 @@ static int exec_mmap(struct mm_struct *mm)
752 tsk->active_mm = mm; 752 tsk->active_mm = mm;
753 activate_mm(active_mm, mm); 753 activate_mm(active_mm, mm);
754 task_unlock(tsk); 754 task_unlock(tsk);
755 mm_update_next_owner(old_mm);
756 arch_pick_mmap_layout(mm); 755 arch_pick_mmap_layout(mm);
757 if (old_mm) { 756 if (old_mm) {
758 up_read(&old_mm->mmap_sem); 757 up_read(&old_mm->mmap_sem);
759 BUG_ON(active_mm != old_mm); 758 BUG_ON(active_mm != old_mm);
759 mm_update_next_owner(old_mm);
760 mmput(old_mm); 760 mmput(old_mm);
761 return 0; 761 return 0;
762 } 762 }
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 60249429a253..d85c7d931cdf 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -323,7 +323,7 @@ out:
323} 323}
324 324
325/* 325/*
326 * remove_kevent - cleans up and ultimately frees the given kevent 326 * remove_kevent - cleans up the given kevent
327 * 327 *
328 * Caller must hold dev->ev_mutex. 328 * Caller must hold dev->ev_mutex.
329 */ 329 */
@@ -334,7 +334,13 @@ static void remove_kevent(struct inotify_device *dev,
334 334
335 dev->event_count--; 335 dev->event_count--;
336 dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len; 336 dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
337}
337 338
339/*
340 * free_kevent - frees the given kevent.
341 */
342static void free_kevent(struct inotify_kernel_event *kevent)
343{
338 kfree(kevent->name); 344 kfree(kevent->name);
339 kmem_cache_free(event_cachep, kevent); 345 kmem_cache_free(event_cachep, kevent);
340} 346}
@@ -350,6 +356,7 @@ static void inotify_dev_event_dequeue(struct inotify_device *dev)
350 struct inotify_kernel_event *kevent; 356 struct inotify_kernel_event *kevent;
351 kevent = inotify_dev_get_event(dev); 357 kevent = inotify_dev_get_event(dev);
352 remove_kevent(dev, kevent); 358 remove_kevent(dev, kevent);
359 free_kevent(kevent);
353 } 360 }
354} 361}
355 362
@@ -433,17 +440,15 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
433 dev = file->private_data; 440 dev = file->private_data;
434 441
435 while (1) { 442 while (1) {
436 int events;
437 443
438 prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); 444 prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
439 445
440 mutex_lock(&dev->ev_mutex); 446 mutex_lock(&dev->ev_mutex);
441 events = !list_empty(&dev->events); 447 if (!list_empty(&dev->events)) {
442 mutex_unlock(&dev->ev_mutex);
443 if (events) {
444 ret = 0; 448 ret = 0;
445 break; 449 break;
446 } 450 }
451 mutex_unlock(&dev->ev_mutex);
447 452
448 if (file->f_flags & O_NONBLOCK) { 453 if (file->f_flags & O_NONBLOCK) {
449 ret = -EAGAIN; 454 ret = -EAGAIN;
@@ -462,7 +467,6 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
462 if (ret) 467 if (ret)
463 return ret; 468 return ret;
464 469
465 mutex_lock(&dev->ev_mutex);
466 while (1) { 470 while (1) {
467 struct inotify_kernel_event *kevent; 471 struct inotify_kernel_event *kevent;
468 472
@@ -481,6 +485,13 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
481 } 485 }
482 break; 486 break;
483 } 487 }
488 remove_kevent(dev, kevent);
489
490 /*
491 * Must perform the copy_to_user outside the mutex in order
492 * to avoid a lock order reversal with mmap_sem.
493 */
494 mutex_unlock(&dev->ev_mutex);
484 495
485 if (copy_to_user(buf, &kevent->event, event_size)) { 496 if (copy_to_user(buf, &kevent->event, event_size)) {
486 ret = -EFAULT; 497 ret = -EFAULT;
@@ -498,7 +509,9 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
498 count -= kevent->event.len; 509 count -= kevent->event.len;
499 } 510 }
500 511
501 remove_kevent(dev, kevent); 512 free_kevent(kevent);
513
514 mutex_lock(&dev->ev_mutex);
502 } 515 }
503 mutex_unlock(&dev->ev_mutex); 516 mutex_unlock(&dev->ev_mutex);
504 517
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 9abcd2b329f7..e9b20173fef3 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1279,6 +1279,12 @@ static int nfs_parse_mount_options(char *raw,
1279 } 1279 }
1280 } 1280 }
1281 1281
1282 if (errors > 0) {
1283 dfprintk(MOUNT, "NFS: parsing encountered %d error%s\n",
1284 errors, (errors == 1 ? "" : "s"));
1285 if (!sloppy)
1286 return 0;
1287 }
1282 return 1; 1288 return 1;
1283 1289
1284out_nomem: 1290out_nomem:
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 506c24fb5078..a53da1466277 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -594,7 +594,7 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
594 goto bail; 594 goto bail;
595 } 595 }
596 596
597 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno) { 597 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno && create) {
598 ocfs2_error(inode->i_sb, 598 ocfs2_error(inode->i_sb,
599 "Inode %llu has a hole at block %llu\n", 599 "Inode %llu has a hole at block %llu\n",
600 (unsigned long long)OCFS2_I(inode)->ip_blkno, 600 (unsigned long long)OCFS2_I(inode)->ip_blkno,
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 7d6b34e201db..ecc3330972e5 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -499,9 +499,9 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
499 if (!size) 499 if (!size)
500 continue; 500 continue;
501 if (from + size > get_capacity(disk)) { 501 if (from + size > get_capacity(disk)) {
502 printk(KERN_ERR " %s: p%d exceeds device capacity\n", 502 printk(KERN_WARNING
503 "%s: p%d exceeds device capacity\n",
503 disk->disk_name, p); 504 disk->disk_name, p);
504 continue;
505 } 505 }
506 res = add_partition(disk, p, from, size, state->parts[p].flags); 506 res = add_partition(disk, p, from, size, state->parts[p].flags);
507 if (res) { 507 if (res) {
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index bca0f81eb687..7821589a17d5 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -547,8 +547,8 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
547 547
548 for (tmp = dir->subdir; tmp; tmp = tmp->next) 548 for (tmp = dir->subdir; tmp; tmp = tmp->next)
549 if (strcmp(tmp->name, dp->name) == 0) { 549 if (strcmp(tmp->name, dp->name) == 0) {
550 printk(KERN_WARNING "proc_dir_entry '%s' already " 550 printk(KERN_WARNING "proc_dir_entry '%s/%s' already registered\n",
551 "registered\n", dp->name); 551 dir->name, dp->name);
552 dump_stack(); 552 dump_stack();
553 break; 553 break;
554 } 554 }
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 00f10a2dcf12..29e20c6b1f7f 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -183,6 +183,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
183 "SReclaimable: %8lu kB\n" 183 "SReclaimable: %8lu kB\n"
184 "SUnreclaim: %8lu kB\n" 184 "SUnreclaim: %8lu kB\n"
185 "PageTables: %8lu kB\n" 185 "PageTables: %8lu kB\n"
186#ifdef CONFIG_QUICKLIST
187 "Quicklists: %8lu kB\n"
188#endif
186 "NFS_Unstable: %8lu kB\n" 189 "NFS_Unstable: %8lu kB\n"
187 "Bounce: %8lu kB\n" 190 "Bounce: %8lu kB\n"
188 "WritebackTmp: %8lu kB\n" 191 "WritebackTmp: %8lu kB\n"
@@ -190,8 +193,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
190 "Committed_AS: %8lu kB\n" 193 "Committed_AS: %8lu kB\n"
191 "VmallocTotal: %8lu kB\n" 194 "VmallocTotal: %8lu kB\n"
192 "VmallocUsed: %8lu kB\n" 195 "VmallocUsed: %8lu kB\n"
193 "VmallocChunk: %8lu kB\n" 196 "VmallocChunk: %8lu kB\n",
194 "Quicklists: %8lu kB\n",
195 K(i.totalram), 197 K(i.totalram),
196 K(i.freeram), 198 K(i.freeram),
197 K(i.bufferram), 199 K(i.bufferram),
@@ -216,6 +218,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
216 K(global_page_state(NR_SLAB_RECLAIMABLE)), 218 K(global_page_state(NR_SLAB_RECLAIMABLE)),
217 K(global_page_state(NR_SLAB_UNRECLAIMABLE)), 219 K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
218 K(global_page_state(NR_PAGETABLE)), 220 K(global_page_state(NR_PAGETABLE)),
221#ifdef CONFIG_QUICKLIST
222 K(quicklist_total_size()),
223#endif
219 K(global_page_state(NR_UNSTABLE_NFS)), 224 K(global_page_state(NR_UNSTABLE_NFS)),
220 K(global_page_state(NR_BOUNCE)), 225 K(global_page_state(NR_BOUNCE)),
221 K(global_page_state(NR_WRITEBACK_TEMP)), 226 K(global_page_state(NR_WRITEBACK_TEMP)),
@@ -223,8 +228,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
223 K(committed), 228 K(committed),
224 (unsigned long)VMALLOC_TOTAL >> 10, 229 (unsigned long)VMALLOC_TOTAL >> 10,
225 vmi.used >> 10, 230 vmi.used >> 10,
226 vmi.largest_chunk >> 10, 231 vmi.largest_chunk >> 10
227 K(quicklist_total_size())
228 ); 232 );
229 233
230 len += hugetlb_report_meminfo(page + len); 234 len += hugetlb_report_meminfo(page + len);
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 52312ec93ff4..5145cb9125af 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -58,7 +58,7 @@ const struct inode_operations ramfs_file_inode_operations = {
58 * size 0 on the assumption that it's going to be used for an mmap of shared 58 * size 0 on the assumption that it's going to be used for an mmap of shared
59 * memory 59 * memory
60 */ 60 */
61static int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) 61int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
62{ 62{
63 struct pagevec lru_pvec; 63 struct pagevec lru_pvec;
64 unsigned long npages, xpages, loop, limit; 64 unsigned long npages, xpages, loop, limit;
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 154098157473..73db464cd08b 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -302,18 +302,6 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
302 int subtract_lebs; 302 int subtract_lebs;
303 long long available; 303 long long available;
304 304
305 /*
306 * Force the amount available to the total size reported if the used
307 * space is zero.
308 */
309 if (c->lst.total_used <= UBIFS_INO_NODE_SZ &&
310 c->budg_data_growth + c->budg_dd_growth == 0) {
311 /* Do the same calculation as for c->block_cnt */
312 available = c->main_lebs - 2;
313 available *= c->leb_size - c->dark_wm;
314 return available;
315 }
316
317 available = c->main_bytes - c->lst.total_used; 305 available = c->main_bytes - c->lst.total_used;
318 306
319 /* 307 /*
@@ -714,34 +702,106 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
714} 702}
715 703
716/** 704/**
717 * ubifs_budg_get_free_space - return amount of free space. 705 * ubifs_reported_space - calculate reported free space.
706 * @c: the UBIFS file-system description object
707 * @free: amount of free space
708 *
709 * This function calculates amount of free space which will be reported to
710 * user-space. User-space application tend to expect that if the file-system
711 * (e.g., via the 'statfs()' call) reports that it has N bytes available, they
712 * are able to write a file of size N. UBIFS attaches node headers to each data
713 * node and it has to write indexind nodes as well. This introduces additional
714 * overhead, and UBIFS it has to report sligtly less free space to meet the
715 * above expectetion.
716 *
717 * This function assumes free space is made up of uncompressed data nodes and
718 * full index nodes (one per data node, tripled because we always allow enough
719 * space to write the index thrice).
720 *
721 * Note, the calculation is pessimistic, which means that most of the time
722 * UBIFS reports less space than it actually has.
723 */
724long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free)
725{
726 int divisor, factor, f;
727
728 /*
729 * Reported space size is @free * X, where X is UBIFS block size
730 * divided by UBIFS block size + all overhead one data block
731 * introduces. The overhead is the node header + indexing overhead.
732 *
733 * Indexing overhead calculations are based on the following formula:
734 * I = N/(f - 1) + 1, where I - number of indexing nodes, N - number
735 * of data nodes, f - fanout. Because effective UBIFS fanout is twice
736 * as less than maximum fanout, we assume that each data node
737 * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes.
738 * Note, the multiplier 3 is because UBIFS reseves thrice as more space
739 * for the index.
740 */
741 f = c->fanout > 3 ? c->fanout >> 1 : 2;
742 factor = UBIFS_BLOCK_SIZE;
743 divisor = UBIFS_MAX_DATA_NODE_SZ;
744 divisor += (c->max_idx_node_sz * 3) / (f - 1);
745 free *= factor;
746 do_div(free, divisor);
747 return free;
748}
749
750/**
751 * ubifs_get_free_space - return amount of free space.
718 * @c: UBIFS file-system description object 752 * @c: UBIFS file-system description object
719 * 753 *
720 * This function returns amount of free space on the file-system. 754 * This function calculates amount of free space to report to user-space.
755 *
756 * Because UBIFS may introduce substantial overhead (the index, node headers,
757 * alighment, wastage at the end of eraseblocks, etc), it cannot report real
758 * amount of free flash space it has (well, because not all dirty space is
759 * reclamable, UBIFS does not actually know the real amount). If UBIFS did so,
760 * it would bread user expectetion about what free space is. Users seem to
761 * accustomed to assume that if the file-system reports N bytes of free space,
762 * they would be able to fit a file of N bytes to the FS. This almost works for
763 * traditional file-systems, because they have way less overhead than UBIFS.
764 * So, to keep users happy, UBIFS tries to take the overhead into account.
721 */ 765 */
722long long ubifs_budg_get_free_space(struct ubifs_info *c) 766long long ubifs_get_free_space(struct ubifs_info *c)
723{ 767{
724 int min_idx_lebs, rsvd_idx_lebs; 768 int min_idx_lebs, rsvd_idx_lebs, lebs;
725 long long available, outstanding, free; 769 long long available, outstanding, free;
726 770
727 /* Do exactly the same calculations as in 'do_budget_space()' */
728 spin_lock(&c->space_lock); 771 spin_lock(&c->space_lock);
729 min_idx_lebs = ubifs_calc_min_idx_lebs(c); 772 min_idx_lebs = ubifs_calc_min_idx_lebs(c);
773 outstanding = c->budg_data_growth + c->budg_dd_growth;
730 774
731 if (min_idx_lebs > c->lst.idx_lebs) 775 /*
732 rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; 776 * Force the amount available to the total size reported if the used
733 else 777 * space is zero.
734 rsvd_idx_lebs = 0; 778 */
735 779 if (c->lst.total_used <= UBIFS_INO_NODE_SZ && !outstanding) {
736 if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt
737 - c->lst.taken_empty_lebs) {
738 spin_unlock(&c->space_lock); 780 spin_unlock(&c->space_lock);
739 return 0; 781 return (long long)c->block_cnt << UBIFS_BLOCK_SHIFT;
740 } 782 }
741 783
742 available = ubifs_calc_available(c, min_idx_lebs); 784 available = ubifs_calc_available(c, min_idx_lebs);
743 outstanding = c->budg_data_growth + c->budg_dd_growth; 785
744 c->min_idx_lebs = min_idx_lebs; 786 /*
787 * When reporting free space to user-space, UBIFS guarantees that it is
788 * possible to write a file of free space size. This means that for
789 * empty LEBs we may use more precise calculations than
790 * 'ubifs_calc_available()' is using. Namely, we know that in empty
791 * LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm.
792 * Thus, amend the available space.
793 *
794 * Note, the calculations below are similar to what we have in
795 * 'do_budget_space()', so refer there for comments.
796 */
797 if (min_idx_lebs > c->lst.idx_lebs)
798 rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
799 else
800 rsvd_idx_lebs = 0;
801 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
802 c->lst.taken_empty_lebs;
803 lebs -= rsvd_idx_lebs;
804 available += lebs * (c->dark_wm - c->leb_overhead);
745 spin_unlock(&c->space_lock); 805 spin_unlock(&c->space_lock);
746 806
747 if (available > outstanding) 807 if (available > outstanding)
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index b9cb77473758..d7f7645779f2 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -538,7 +538,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
538 printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); 538 printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n);
539 for (i = 0; i < n; i++) 539 for (i = 0; i < n; i++)
540 printk(KERN_DEBUG "\t ino %llu\n", 540 printk(KERN_DEBUG "\t ino %llu\n",
541 le64_to_cpu(orph->inos[i])); 541 (unsigned long long)le64_to_cpu(orph->inos[i]));
542 break; 542 break;
543 } 543 }
544 default: 544 default:
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 5c96f1fb7016..526c01ec8003 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -426,7 +426,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir)
426 426
427 while (1) { 427 while (1) {
428 dbg_gen("feed '%s', ino %llu, new f_pos %#x", 428 dbg_gen("feed '%s', ino %llu, new f_pos %#x",
429 dent->name, le64_to_cpu(dent->inum), 429 dent->name, (unsigned long long)le64_to_cpu(dent->inum),
430 key_hash_flash(c, &dent->key)); 430 key_hash_flash(c, &dent->key));
431 ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum); 431 ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum);
432 432
@@ -587,7 +587,6 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
587 if (err) { 587 if (err) {
588 if (err != -ENOSPC) 588 if (err != -ENOSPC)
589 return err; 589 return err;
590 err = 0;
591 budgeted = 0; 590 budgeted = 0;
592 } 591 }
593 592
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 4071d1cae29f..3d698e2022b1 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -793,7 +793,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
793 int err; 793 int err;
794 struct ubifs_budget_req req; 794 struct ubifs_budget_req req;
795 loff_t old_size = inode->i_size, new_size = attr->ia_size; 795 loff_t old_size = inode->i_size, new_size = attr->ia_size;
796 int offset = new_size & (UBIFS_BLOCK_SIZE - 1); 796 int offset = new_size & (UBIFS_BLOCK_SIZE - 1), budgeted = 1;
797 struct ubifs_inode *ui = ubifs_inode(inode); 797 struct ubifs_inode *ui = ubifs_inode(inode);
798 798
799 dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); 799 dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size);
@@ -811,8 +811,15 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
811 /* A funny way to budget for truncation node */ 811 /* A funny way to budget for truncation node */
812 req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; 812 req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ;
813 err = ubifs_budget_space(c, &req); 813 err = ubifs_budget_space(c, &req);
814 if (err) 814 if (err) {
815 return err; 815 /*
816 * Treat truncations to zero as deletion and always allow them,
817 * just like we do for '->unlink()'.
818 */
819 if (new_size || err != -ENOSPC)
820 return err;
821 budgeted = 0;
822 }
816 823
817 err = vmtruncate(inode, new_size); 824 err = vmtruncate(inode, new_size);
818 if (err) 825 if (err)
@@ -869,7 +876,12 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
869 err = ubifs_jnl_truncate(c, inode, old_size, new_size); 876 err = ubifs_jnl_truncate(c, inode, old_size, new_size);
870 mutex_unlock(&ui->ui_mutex); 877 mutex_unlock(&ui->ui_mutex);
871out_budg: 878out_budg:
872 ubifs_release_budget(c, &req); 879 if (budgeted)
880 ubifs_release_budget(c, &req);
881 else {
882 c->nospace = c->nospace_rp = 0;
883 smp_wmb();
884 }
873 return err; 885 return err;
874} 886}
875 887
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index adee7b5ddeab..47814cde2407 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -211,14 +211,8 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
211 * dirty index heap, and it falls-back to LPT scanning if the heaps are empty 211 * dirty index heap, and it falls-back to LPT scanning if the heaps are empty
212 * or do not have an LEB which satisfies the @min_space criteria. 212 * or do not have an LEB which satisfies the @min_space criteria.
213 * 213 *
214 * Note: 214 * Note, LEBs which have less than dead watermark of free + dirty space are
215 * o LEBs which have less than dead watermark of dirty space are never picked 215 * never picked by this function.
216 * by this function;
217 *
218 * Returns zero and the LEB properties of
219 * found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a
220 * negative error code in case of other failures. The returned LEB is marked as
221 * "taken".
222 * 216 *
223 * The additional @pick_free argument controls if this function has to return a 217 * The additional @pick_free argument controls if this function has to return a
224 * free or freeable LEB if one is present. For example, GC must to set it to %1, 218 * free or freeable LEB if one is present. For example, GC must to set it to %1,
@@ -231,6 +225,10 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
231 * 225 *
232 * In addition @pick_free is set to %2 by the recovery process in order to 226 * In addition @pick_free is set to %2 by the recovery process in order to
233 * recover gc_lnum in which case an index LEB must not be returned. 227 * recover gc_lnum in which case an index LEB must not be returned.
228 *
229 * This function returns zero and the LEB properties of found dirty LEB in case
230 * of success, %-ENOSPC if no dirty LEB was found and a negative error code in
231 * case of other failures. The returned LEB is marked as "taken".
234 */ 232 */
235int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, 233int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
236 int min_space, int pick_free) 234 int min_space, int pick_free)
@@ -245,7 +243,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
245 int lebs, rsvd_idx_lebs = 0; 243 int lebs, rsvd_idx_lebs = 0;
246 244
247 spin_lock(&c->space_lock); 245 spin_lock(&c->space_lock);
248 lebs = c->lst.empty_lebs; 246 lebs = c->lst.empty_lebs + c->idx_gc_cnt;
249 lebs += c->freeable_cnt - c->lst.taken_empty_lebs; 247 lebs += c->freeable_cnt - c->lst.taken_empty_lebs;
250 248
251 /* 249 /*
@@ -317,7 +315,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
317 lp = idx_lp; 315 lp = idx_lp;
318 316
319 if (lp) { 317 if (lp) {
320 ubifs_assert(lp->dirty >= c->dead_wm); 318 ubifs_assert(lp->free + lp->dirty >= c->dead_wm);
321 goto found; 319 goto found;
322 } 320 }
323 321
@@ -509,7 +507,6 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
509 rsvd_idx_lebs = 0; 507 rsvd_idx_lebs = 0;
510 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - 508 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
511 c->lst.taken_empty_lebs; 509 c->lst.taken_empty_lebs;
512 ubifs_assert(lebs + c->lst.idx_lebs >= c->min_idx_lebs);
513 if (rsvd_idx_lebs < lebs) 510 if (rsvd_idx_lebs < lebs)
514 /* 511 /*
515 * OK to allocate an empty LEB, but we still don't want to go 512 * OK to allocate an empty LEB, but we still don't want to go
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index d0f3dac29081..02aba36fe3d4 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -334,15 +334,21 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
334 334
335 err = move_nodes(c, sleb); 335 err = move_nodes(c, sleb);
336 if (err) 336 if (err)
337 goto out; 337 goto out_inc_seq;
338 338
339 err = gc_sync_wbufs(c); 339 err = gc_sync_wbufs(c);
340 if (err) 340 if (err)
341 goto out; 341 goto out_inc_seq;
342 342
343 err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0); 343 err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0);
344 if (err) 344 if (err)
345 goto out; 345 goto out_inc_seq;
346
347 /* Allow for races with TNC */
348 c->gced_lnum = lnum;
349 smp_wmb();
350 c->gc_seq += 1;
351 smp_wmb();
346 352
347 if (c->gc_lnum == -1) { 353 if (c->gc_lnum == -1) {
348 c->gc_lnum = lnum; 354 c->gc_lnum = lnum;
@@ -363,6 +369,14 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
363out: 369out:
364 ubifs_scan_destroy(sleb); 370 ubifs_scan_destroy(sleb);
365 return err; 371 return err;
372
373out_inc_seq:
374 /* We may have moved at least some nodes so allow for races with TNC */
375 c->gced_lnum = lnum;
376 smp_wmb();
377 c->gc_seq += 1;
378 smp_wmb();
379 goto out;
366} 380}
367 381
368/** 382/**
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index 87dabf9fe742..4c12a9215d7f 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -284,38 +284,6 @@ static inline void *ubifs_idx_key(const struct ubifs_info *c,
284} 284}
285 285
286/** 286/**
287 * ubifs_reported_space - calculate reported free space.
288 * @c: the UBIFS file-system description object
289 * @free: amount of free space
290 *
291 * This function calculates amount of free space which will be reported to
292 * user-space. User-space application tend to expect that if the file-system
293 * (e.g., via the 'statfs()' call) reports that it has N bytes available, they
294 * are able to write a file of size N. UBIFS attaches node headers to each data
295 * node and it has to write indexind nodes as well. This introduces additional
296 * overhead, and UBIFS it has to report sligtly less free space to meet the
297 * above expectetion.
298 *
299 * This function assumes free space is made up of uncompressed data nodes and
300 * full index nodes (one per data node, doubled because we always allow enough
301 * space to write the index twice).
302 *
303 * Note, the calculation is pessimistic, which means that most of the time
304 * UBIFS reports less space than it actually has.
305 */
306static inline long long ubifs_reported_space(const struct ubifs_info *c,
307 uint64_t free)
308{
309 int divisor, factor;
310
311 divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz * 3);
312 factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ;
313 do_div(free, divisor);
314
315 return free * factor;
316}
317
318/**
319 * ubifs_current_time - round current time to time granularity. 287 * ubifs_current_time - round current time to time granularity.
320 * @inode: inode 288 * @inode: inode
321 */ 289 */
@@ -325,4 +293,21 @@ static inline struct timespec ubifs_current_time(struct inode *inode)
325 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; 293 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
326} 294}
327 295
296/**
297 * ubifs_tnc_lookup - look up a file-system node.
298 * @c: UBIFS file-system description object
299 * @key: node key to lookup
300 * @node: the node is returned here
301 *
302 * This function look up and reads node with key @key. The caller has to make
303 * sure the @node buffer is large enough to fit the node. Returns zero in case
304 * of success, %-ENOENT if the node was not found, and a negative error code in
305 * case of failure.
306 */
307static inline int ubifs_tnc_lookup(struct ubifs_info *c,
308 const union ubifs_key *key, void *node)
309{
310 return ubifs_tnc_locate(c, key, node, NULL, NULL);
311}
312
328#endif /* __UBIFS_MISC_H__ */ 313#endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index f71e6b8822c4..3f4902060c7a 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -370,8 +370,9 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
370{ 370{
371 struct ubifs_info *c = dentry->d_sb->s_fs_info; 371 struct ubifs_info *c = dentry->d_sb->s_fs_info;
372 unsigned long long free; 372 unsigned long long free;
373 __le32 *uuid = (__le32 *)c->uuid;
373 374
374 free = ubifs_budg_get_free_space(c); 375 free = ubifs_get_free_space(c);
375 dbg_gen("free space %lld bytes (%lld blocks)", 376 dbg_gen("free space %lld bytes (%lld blocks)",
376 free, free >> UBIFS_BLOCK_SHIFT); 377 free, free >> UBIFS_BLOCK_SHIFT);
377 378
@@ -386,7 +387,8 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
386 buf->f_files = 0; 387 buf->f_files = 0;
387 buf->f_ffree = 0; 388 buf->f_ffree = 0;
388 buf->f_namelen = UBIFS_MAX_NLEN; 389 buf->f_namelen = UBIFS_MAX_NLEN;
389 390 buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]);
391 buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]);
390 return 0; 392 return 0;
391} 393}
392 394
@@ -530,6 +532,12 @@ static int init_constants_early(struct ubifs_info *c)
530 c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); 532 c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size);
531 c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); 533 c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size);
532 534
535 /*
536 * Calculate how many bytes would be wasted at the end of LEB if it was
537 * fully filled with data nodes of maximum size. This is used in
538 * calculations when reporting free space.
539 */
540 c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ;
533 return 0; 541 return 0;
534} 542}
535 543
@@ -647,13 +655,11 @@ static int init_constants_late(struct ubifs_info *c)
647 * internally because it does not make much sense for UBIFS, but it is 655 * internally because it does not make much sense for UBIFS, but it is
648 * necessary to report something for the 'statfs()' call. 656 * necessary to report something for the 'statfs()' call.
649 * 657 *
650 * Subtract the LEB reserved for GC and the LEB which is reserved for 658 * Subtract the LEB reserved for GC, the LEB which is reserved for
651 * deletions. 659 * deletions, and assume only one journal head is available.
652 *
653 * Review 'ubifs_calc_available()' if changing this calculation.
654 */ 660 */
655 tmp64 = c->main_lebs - 2; 661 tmp64 = c->main_lebs - 2 - c->jhead_cnt + 1;
656 tmp64 *= (uint64_t)c->leb_size - c->dark_wm; 662 tmp64 *= (uint64_t)c->leb_size - c->leb_overhead;
657 tmp64 = ubifs_reported_space(c, tmp64); 663 tmp64 = ubifs_reported_space(c, tmp64);
658 c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; 664 c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT;
659 665
@@ -1018,14 +1024,13 @@ static int mount_ubifs(struct ubifs_info *c)
1018 goto out_dereg; 1024 goto out_dereg;
1019 } 1025 }
1020 1026
1027 sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);
1021 if (!mounted_read_only) { 1028 if (!mounted_read_only) {
1022 err = alloc_wbufs(c); 1029 err = alloc_wbufs(c);
1023 if (err) 1030 if (err)
1024 goto out_cbuf; 1031 goto out_cbuf;
1025 1032
1026 /* Create background thread */ 1033 /* Create background thread */
1027 sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num,
1028 c->vi.vol_id);
1029 c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); 1034 c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
1030 if (!c->bgt) 1035 if (!c->bgt)
1031 c->bgt = ERR_PTR(-EINVAL); 1036 c->bgt = ERR_PTR(-EINVAL);
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index e909f4a96443..7634c5970887 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -506,7 +506,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
506 if (keys_cmp(c, key, &node_key) != 0) 506 if (keys_cmp(c, key, &node_key) != 0)
507 ret = 0; 507 ret = 0;
508 } 508 }
509 if (ret == 0) 509 if (ret == 0 && c->replaying)
510 dbg_mnt("dangling branch LEB %d:%d len %d, key %s", 510 dbg_mnt("dangling branch LEB %d:%d len %d, key %s",
511 zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); 511 zbr->lnum, zbr->offs, zbr->len, DBGKEY(key));
512 return ret; 512 return ret;
@@ -1382,50 +1382,39 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key,
1382} 1382}
1383 1383
1384/** 1384/**
1385 * ubifs_tnc_lookup - look up a file-system node. 1385 * maybe_leb_gced - determine if a LEB may have been garbage collected.
1386 * @c: UBIFS file-system description object 1386 * @c: UBIFS file-system description object
1387 * @key: node key to lookup 1387 * @lnum: LEB number
1388 * @node: the node is returned here 1388 * @gc_seq1: garbage collection sequence number
1389 * 1389 *
1390 * This function look up and reads node with key @key. The caller has to make 1390 * This function determines if @lnum may have been garbage collected since
1391 * sure the @node buffer is large enough to fit the node. Returns zero in case 1391 * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise
1392 * of success, %-ENOENT if the node was not found, and a negative error code in 1392 * %0 is returned.
1393 * case of failure.
1394 */ 1393 */
1395int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, 1394static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1)
1396 void *node)
1397{ 1395{
1398 int found, n, err; 1396 int gc_seq2, gced_lnum;
1399 struct ubifs_znode *znode;
1400 struct ubifs_zbranch zbr, *zt;
1401 1397
1402 mutex_lock(&c->tnc_mutex); 1398 gced_lnum = c->gced_lnum;
1403 found = ubifs_lookup_level0(c, key, &znode, &n); 1399 smp_rmb();
1404 if (!found) { 1400 gc_seq2 = c->gc_seq;
1405 err = -ENOENT; 1401 /* Same seq means no GC */
1406 goto out; 1402 if (gc_seq1 == gc_seq2)
1407 } else if (found < 0) { 1403 return 0;
1408 err = found; 1404 /* Different by more than 1 means we don't know */
1409 goto out; 1405 if (gc_seq1 + 1 != gc_seq2)
1410 } 1406 return 1;
1411 zt = &znode->zbranch[n]; 1407 /*
1412 if (is_hash_key(c, key)) { 1408 * We have seen the sequence number has increased by 1. Now we need to
1413 /* 1409 * be sure we read the right LEB number, so read it again.
1414 * In this case the leaf node cache gets used, so we pass the 1410 */
1415 * address of the zbranch and keep the mutex locked 1411 smp_rmb();
1416 */ 1412 if (gced_lnum != c->gced_lnum)
1417 err = tnc_read_node_nm(c, zt, node); 1413 return 1;
1418 goto out; 1414 /* Finally we can check lnum */
1419 } 1415 if (gced_lnum == lnum)
1420 zbr = znode->zbranch[n]; 1416 return 1;
1421 mutex_unlock(&c->tnc_mutex); 1417 return 0;
1422
1423 err = ubifs_tnc_read_node(c, &zbr, node);
1424 return err;
1425
1426out:
1427 mutex_unlock(&c->tnc_mutex);
1428 return err;
1429} 1418}
1430 1419
1431/** 1420/**
@@ -1436,16 +1425,19 @@ out:
1436 * @lnum: LEB number is returned here 1425 * @lnum: LEB number is returned here
1437 * @offs: offset is returned here 1426 * @offs: offset is returned here
1438 * 1427 *
1439 * This function is the same as 'ubifs_tnc_lookup()' but it returns the node 1428 * This function look up and reads node with key @key. The caller has to make
1440 * location also. See 'ubifs_tnc_lookup()'. 1429 * sure the @node buffer is large enough to fit the node. Returns zero in case
1430 * of success, %-ENOENT if the node was not found, and a negative error code in
1431 * case of failure. The node location can be returned in @lnum and @offs.
1441 */ 1432 */
1442int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, 1433int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
1443 void *node, int *lnum, int *offs) 1434 void *node, int *lnum, int *offs)
1444{ 1435{
1445 int found, n, err; 1436 int found, n, err, safely = 0, gc_seq1;
1446 struct ubifs_znode *znode; 1437 struct ubifs_znode *znode;
1447 struct ubifs_zbranch zbr, *zt; 1438 struct ubifs_zbranch zbr, *zt;
1448 1439
1440again:
1449 mutex_lock(&c->tnc_mutex); 1441 mutex_lock(&c->tnc_mutex);
1450 found = ubifs_lookup_level0(c, key, &znode, &n); 1442 found = ubifs_lookup_level0(c, key, &znode, &n);
1451 if (!found) { 1443 if (!found) {
@@ -1456,24 +1448,43 @@ int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
1456 goto out; 1448 goto out;
1457 } 1449 }
1458 zt = &znode->zbranch[n]; 1450 zt = &znode->zbranch[n];
1451 if (lnum) {
1452 *lnum = zt->lnum;
1453 *offs = zt->offs;
1454 }
1459 if (is_hash_key(c, key)) { 1455 if (is_hash_key(c, key)) {
1460 /* 1456 /*
1461 * In this case the leaf node cache gets used, so we pass the 1457 * In this case the leaf node cache gets used, so we pass the
1462 * address of the zbranch and keep the mutex locked 1458 * address of the zbranch and keep the mutex locked
1463 */ 1459 */
1464 *lnum = zt->lnum;
1465 *offs = zt->offs;
1466 err = tnc_read_node_nm(c, zt, node); 1460 err = tnc_read_node_nm(c, zt, node);
1467 goto out; 1461 goto out;
1468 } 1462 }
1463 if (safely) {
1464 err = ubifs_tnc_read_node(c, zt, node);
1465 goto out;
1466 }
1467 /* Drop the TNC mutex prematurely and race with garbage collection */
1469 zbr = znode->zbranch[n]; 1468 zbr = znode->zbranch[n];
1469 gc_seq1 = c->gc_seq;
1470 mutex_unlock(&c->tnc_mutex); 1470 mutex_unlock(&c->tnc_mutex);
1471 1471
1472 *lnum = zbr.lnum; 1472 if (ubifs_get_wbuf(c, zbr.lnum)) {
1473 *offs = zbr.offs; 1473 /* We do not GC journal heads */
1474 err = ubifs_tnc_read_node(c, &zbr, node);
1475 return err;
1476 }
1474 1477
1475 err = ubifs_tnc_read_node(c, &zbr, node); 1478 err = fallible_read_node(c, key, &zbr, node);
1476 return err; 1479 if (err <= 0 || maybe_leb_gced(c, zbr.lnum, gc_seq1)) {
1480 /*
1481 * The node may have been GC'ed out from under us so try again
1482 * while keeping the TNC mutex locked.
1483 */
1484 safely = 1;
1485 goto again;
1486 }
1487 return 0;
1477 1488
1478out: 1489out:
1479 mutex_unlock(&c->tnc_mutex); 1490 mutex_unlock(&c->tnc_mutex);
@@ -1498,7 +1509,6 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
1498{ 1509{
1499 int found, n, err; 1510 int found, n, err;
1500 struct ubifs_znode *znode; 1511 struct ubifs_znode *znode;
1501 struct ubifs_zbranch zbr;
1502 1512
1503 dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); 1513 dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key));
1504 mutex_lock(&c->tnc_mutex); 1514 mutex_lock(&c->tnc_mutex);
@@ -1522,11 +1532,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
1522 goto out_unlock; 1532 goto out_unlock;
1523 } 1533 }
1524 1534
1525 zbr = znode->zbranch[n]; 1535 err = tnc_read_node_nm(c, &znode->zbranch[n], node);
1526 mutex_unlock(&c->tnc_mutex);
1527
1528 err = tnc_read_node_nm(c, &zbr, node);
1529 return err;
1530 1536
1531out_unlock: 1537out_unlock:
1532 mutex_unlock(&c->tnc_mutex); 1538 mutex_unlock(&c->tnc_mutex);
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index bd2121f3426e..a9ecbd9af20d 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -87,7 +87,7 @@
87#define UBIFS_SK_LEN 8 87#define UBIFS_SK_LEN 8
88 88
89/* Minimum index tree fanout */ 89/* Minimum index tree fanout */
90#define UBIFS_MIN_FANOUT 2 90#define UBIFS_MIN_FANOUT 3
91 91
92/* Maximum number of levels in UBIFS indexing B-tree */ 92/* Maximum number of levels in UBIFS indexing B-tree */
93#define UBIFS_MAX_LEVELS 512 93#define UBIFS_MAX_LEVELS 512
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index d7f706f7a302..17c620b93eec 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -995,6 +995,9 @@ struct ubifs_mount_opts {
995 * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary 995 * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary
996 * @max_inode_sz: maximum possible inode size in bytes 996 * @max_inode_sz: maximum possible inode size in bytes
997 * @max_znode_sz: size of znode in bytes 997 * @max_znode_sz: size of znode in bytes
998 *
999 * @leb_overhead: how many bytes are wasted in an LEB when it is filled with
1000 * data nodes of maximum size - used in free space reporting
998 * @dead_wm: LEB dead space watermark 1001 * @dead_wm: LEB dead space watermark
999 * @dark_wm: LEB dark space watermark 1002 * @dark_wm: LEB dark space watermark
1000 * @block_cnt: count of 4KiB blocks on the FS 1003 * @block_cnt: count of 4KiB blocks on the FS
@@ -1028,6 +1031,8 @@ struct ubifs_mount_opts {
1028 * @sbuf: a buffer of LEB size used by GC and replay for scanning 1031 * @sbuf: a buffer of LEB size used by GC and replay for scanning
1029 * @idx_gc: list of index LEBs that have been garbage collected 1032 * @idx_gc: list of index LEBs that have been garbage collected
1030 * @idx_gc_cnt: number of elements on the idx_gc list 1033 * @idx_gc_cnt: number of elements on the idx_gc list
1034 * @gc_seq: incremented for every non-index LEB garbage collected
1035 * @gced_lnum: last non-index LEB that was garbage collected
1031 * 1036 *
1032 * @infos_list: links all 'ubifs_info' objects 1037 * @infos_list: links all 'ubifs_info' objects
1033 * @umount_mutex: serializes shrinker and un-mount 1038 * @umount_mutex: serializes shrinker and un-mount
@@ -1224,6 +1229,8 @@ struct ubifs_info {
1224 int max_idx_node_sz; 1229 int max_idx_node_sz;
1225 long long max_inode_sz; 1230 long long max_inode_sz;
1226 int max_znode_sz; 1231 int max_znode_sz;
1232
1233 int leb_overhead;
1227 int dead_wm; 1234 int dead_wm;
1228 int dark_wm; 1235 int dark_wm;
1229 int block_cnt; 1236 int block_cnt;
@@ -1257,6 +1264,8 @@ struct ubifs_info {
1257 void *sbuf; 1264 void *sbuf;
1258 struct list_head idx_gc; 1265 struct list_head idx_gc;
1259 int idx_gc_cnt; 1266 int idx_gc_cnt;
1267 volatile int gc_seq;
1268 volatile int gced_lnum;
1260 1269
1261 struct list_head infos_list; 1270 struct list_head infos_list;
1262 struct mutex umount_mutex; 1271 struct mutex umount_mutex;
@@ -1434,9 +1443,10 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode,
1434 struct ubifs_budget_req *req); 1443 struct ubifs_budget_req *req);
1435void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, 1444void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode,
1436 struct ubifs_budget_req *req); 1445 struct ubifs_budget_req *req);
1437long long ubifs_budg_get_free_space(struct ubifs_info *c); 1446long long ubifs_get_free_space(struct ubifs_info *c);
1438int ubifs_calc_min_idx_lebs(struct ubifs_info *c); 1447int ubifs_calc_min_idx_lebs(struct ubifs_info *c);
1439void ubifs_convert_page_budget(struct ubifs_info *c); 1448void ubifs_convert_page_budget(struct ubifs_info *c);
1449long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free);
1440long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); 1450long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs);
1441 1451
1442/* find.c */ 1452/* find.c */
@@ -1451,8 +1461,6 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c);
1451/* tnc.c */ 1461/* tnc.c */
1452int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, 1462int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
1453 struct ubifs_znode **zn, int *n); 1463 struct ubifs_znode **zn, int *n);
1454int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
1455 void *node);
1456int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, 1464int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
1457 void *node, const struct qstr *nm); 1465 void *node, const struct qstr *nm);
1458int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, 1466int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 0ed6e146a0d9..eb91f3b70320 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -211,6 +211,7 @@ const struct file_operations udf_file_operations = {
211 .release = udf_release_file, 211 .release = udf_release_file,
212 .fsync = udf_fsync_file, 212 .fsync = udf_fsync_file,
213 .splice_read = generic_file_splice_read, 213 .splice_read = generic_file_splice_read,
214 .llseek = generic_file_llseek,
214}; 215};
215 216
216const struct inode_operations udf_file_inode_operations = { 217const struct inode_operations udf_file_inode_operations = {
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index eb9cfa23dc3d..a4f2b3ce45b0 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -76,11 +76,24 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
76 *err = -ENOSPC; 76 *err = -ENOSPC;
77 77
78 iinfo = UDF_I(inode); 78 iinfo = UDF_I(inode);
79 iinfo->i_unique = 0; 79 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) {
80 iinfo->i_lenExtents = 0; 80 iinfo->i_efe = 1;
81 iinfo->i_next_alloc_block = 0; 81 if (UDF_VERS_USE_EXTENDED_FE > sbi->s_udfrev)
82 iinfo->i_next_alloc_goal = 0; 82 sbi->s_udfrev = UDF_VERS_USE_EXTENDED_FE;
83 iinfo->i_strat4096 = 0; 83 iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize -
84 sizeof(struct extendedFileEntry),
85 GFP_KERNEL);
86 } else {
87 iinfo->i_efe = 0;
88 iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize -
89 sizeof(struct fileEntry),
90 GFP_KERNEL);
91 }
92 if (!iinfo->i_ext.i_data) {
93 iput(inode);
94 *err = -ENOMEM;
95 return NULL;
96 }
84 97
85 block = udf_new_block(dir->i_sb, NULL, 98 block = udf_new_block(dir->i_sb, NULL,
86 dinfo->i_location.partitionReferenceNum, 99 dinfo->i_location.partitionReferenceNum,
@@ -111,6 +124,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
111 lvhd->uniqueID = cpu_to_le64(uniqueID); 124 lvhd->uniqueID = cpu_to_le64(uniqueID);
112 mark_buffer_dirty(sbi->s_lvid_bh); 125 mark_buffer_dirty(sbi->s_lvid_bh);
113 } 126 }
127 mutex_unlock(&sbi->s_alloc_mutex);
114 inode->i_mode = mode; 128 inode->i_mode = mode;
115 inode->i_uid = current->fsuid; 129 inode->i_uid = current->fsuid;
116 if (dir->i_mode & S_ISGID) { 130 if (dir->i_mode & S_ISGID) {
@@ -129,25 +143,6 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
129 iinfo->i_lenEAttr = 0; 143 iinfo->i_lenEAttr = 0;
130 iinfo->i_lenAlloc = 0; 144 iinfo->i_lenAlloc = 0;
131 iinfo->i_use = 0; 145 iinfo->i_use = 0;
132 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) {
133 iinfo->i_efe = 1;
134 if (UDF_VERS_USE_EXTENDED_FE > sbi->s_udfrev)
135 sbi->s_udfrev = UDF_VERS_USE_EXTENDED_FE;
136 iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize -
137 sizeof(struct extendedFileEntry),
138 GFP_KERNEL);
139 } else {
140 iinfo->i_efe = 0;
141 iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize -
142 sizeof(struct fileEntry),
143 GFP_KERNEL);
144 }
145 if (!iinfo->i_ext.i_data) {
146 iput(inode);
147 *err = -ENOMEM;
148 mutex_unlock(&sbi->s_alloc_mutex);
149 return NULL;
150 }
151 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) 146 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB))
152 iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; 147 iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
153 else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) 148 else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
@@ -158,7 +153,6 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
158 iinfo->i_crtime = current_fs_time(inode->i_sb); 153 iinfo->i_crtime = current_fs_time(inode->i_sb);
159 insert_inode_hash(inode); 154 insert_inode_hash(inode);
160 mark_inode_dirty(inode); 155 mark_inode_dirty(inode);
161 mutex_unlock(&sbi->s_alloc_mutex);
162 156
163 if (DQUOT_ALLOC_INODE(inode)) { 157 if (DQUOT_ALLOC_INODE(inode)) {
164 DQUOT_DROP(inode); 158 DQUOT_DROP(inode);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index f42f80a3b1fa..a44d68eb50b5 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1338,6 +1338,10 @@ __xfs_get_blocks(
1338 offset = (xfs_off_t)iblock << inode->i_blkbits; 1338 offset = (xfs_off_t)iblock << inode->i_blkbits;
1339 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); 1339 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
1340 size = bh_result->b_size; 1340 size = bh_result->b_size;
1341
1342 if (!create && direct && offset >= i_size_read(inode))
1343 return 0;
1344
1341 error = xfs_iomap(XFS_I(inode), offset, size, 1345 error = xfs_iomap(XFS_I(inode), offset, size,
1342 create ? flags : BMAPI_READ, &iomap, &niomap); 1346 create ? flags : BMAPI_READ, &iomap, &niomap);
1343 if (error) 1347 if (error)
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 73c65f19e549..18d3c8487835 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1302,9 +1302,29 @@ xfs_fs_remount(
1302 mp->m_flags &= ~XFS_MOUNT_BARRIER; 1302 mp->m_flags &= ~XFS_MOUNT_BARRIER;
1303 break; 1303 break;
1304 default: 1304 default:
1305 /*
1306 * Logically we would return an error here to prevent
1307 * users from believing they might have changed
1308 * mount options using remount which can't be changed.
1309 *
1310 * But unfortunately mount(8) adds all options from
1311 * mtab and fstab to the mount arguments in some cases
1312 * so we can't blindly reject options, but have to
1313 * check for each specified option if it actually
1314 * differs from the currently set option and only
1315 * reject it if that's the case.
1316 *
1317 * Until that is implemented we return success for
1318 * every remount request, and silently ignore all
1319 * options that we can't actually change.
1320 */
1321#if 0
1305 printk(KERN_INFO 1322 printk(KERN_INFO
1306 "XFS: mount option \"%s\" not supported for remount\n", p); 1323 "XFS: mount option \"%s\" not supported for remount\n", p);
1307 return -EINVAL; 1324 return -EINVAL;
1325#else
1326 return 0;
1327#endif
1308 } 1328 }
1309 } 1329 }
1310 1330
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 608c30c3f76b..002fc2617c8e 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -732,6 +732,7 @@ xfs_buf_item_init(
732 bip->bli_item.li_ops = &xfs_buf_item_ops; 732 bip->bli_item.li_ops = &xfs_buf_item_ops;
733 bip->bli_item.li_mountp = mp; 733 bip->bli_item.li_mountp = mp;
734 bip->bli_buf = bp; 734 bip->bli_buf = bp;
735 xfs_buf_hold(bp);
735 bip->bli_format.blf_type = XFS_LI_BUF; 736 bip->bli_format.blf_type = XFS_LI_BUF;
736 bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); 737 bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp);
737 bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); 738 bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp));
@@ -867,6 +868,21 @@ xfs_buf_item_dirty(
867 return (bip->bli_flags & XFS_BLI_DIRTY); 868 return (bip->bli_flags & XFS_BLI_DIRTY);
868} 869}
869 870
871STATIC void
872xfs_buf_item_free(
873 xfs_buf_log_item_t *bip)
874{
875#ifdef XFS_TRANS_DEBUG
876 kmem_free(bip->bli_orig);
877 kmem_free(bip->bli_logged);
878#endif /* XFS_TRANS_DEBUG */
879
880#ifdef XFS_BLI_TRACE
881 ktrace_free(bip->bli_trace);
882#endif
883 kmem_zone_free(xfs_buf_item_zone, bip);
884}
885
870/* 886/*
871 * This is called when the buf log item is no longer needed. It should 887 * This is called when the buf log item is no longer needed. It should
872 * free the buf log item associated with the given buffer and clear 888 * free the buf log item associated with the given buffer and clear
@@ -887,18 +903,8 @@ xfs_buf_item_relse(
887 (XFS_BUF_IODONE_FUNC(bp) != NULL)) { 903 (XFS_BUF_IODONE_FUNC(bp) != NULL)) {
888 XFS_BUF_CLR_IODONE_FUNC(bp); 904 XFS_BUF_CLR_IODONE_FUNC(bp);
889 } 905 }
890 906 xfs_buf_rele(bp);
891#ifdef XFS_TRANS_DEBUG 907 xfs_buf_item_free(bip);
892 kmem_free(bip->bli_orig);
893 bip->bli_orig = NULL;
894 kmem_free(bip->bli_logged);
895 bip->bli_logged = NULL;
896#endif /* XFS_TRANS_DEBUG */
897
898#ifdef XFS_BLI_TRACE
899 ktrace_free(bip->bli_trace);
900#endif
901 kmem_zone_free(xfs_buf_item_zone, bip);
902} 908}
903 909
904 910
@@ -1120,6 +1126,7 @@ xfs_buf_iodone(
1120 1126
1121 ASSERT(bip->bli_buf == bp); 1127 ASSERT(bip->bli_buf == bp);
1122 1128
1129 xfs_buf_rele(bp);
1123 mp = bip->bli_item.li_mountp; 1130 mp = bip->bli_item.li_mountp;
1124 1131
1125 /* 1132 /*
@@ -1136,18 +1143,7 @@ xfs_buf_iodone(
1136 * xfs_trans_delete_ail() drops the AIL lock. 1143 * xfs_trans_delete_ail() drops the AIL lock.
1137 */ 1144 */
1138 xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); 1145 xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip);
1139 1146 xfs_buf_item_free(bip);
1140#ifdef XFS_TRANS_DEBUG
1141 kmem_free(bip->bli_orig);
1142 bip->bli_orig = NULL;
1143 kmem_free(bip->bli_logged);
1144 bip->bli_logged = NULL;
1145#endif /* XFS_TRANS_DEBUG */
1146
1147#ifdef XFS_BLI_TRACE
1148 ktrace_free(bip->bli_trace);
1149#endif
1150 kmem_zone_free(xfs_buf_item_zone, bip);
1151} 1147}
1152 1148
1153#if defined(XFS_BLI_TRACE) 1149#if defined(XFS_BLI_TRACE)
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 760f4c5b5160..75b0cd4da0ea 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -149,7 +149,14 @@ xfs_swap_extents(
149 149
150 sbp = &sxp->sx_stat; 150 sbp = &sxp->sx_stat;
151 151
152 xfs_lock_two_inodes(ip, tip, lock_flags); 152 /*
153 * we have to do two separate lock calls here to keep lockdep
154 * happy. If we try to get all the locks in one call, lock will
155 * report false positives when we drop the ILOCK and regain them
156 * below.
157 */
158 xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
159 xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
153 locked = 1; 160 locked = 1;
154 161
155 /* Verify that both files have the same format */ 162 /* Verify that both files have the same format */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 00e80df9dd9d..dbd9cef852ec 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -4118,7 +4118,7 @@ xfs_iext_indirect_to_direct(
4118 ASSERT(nextents <= XFS_LINEAR_EXTS); 4118 ASSERT(nextents <= XFS_LINEAR_EXTS);
4119 size = nextents * sizeof(xfs_bmbt_rec_t); 4119 size = nextents * sizeof(xfs_bmbt_rec_t);
4120 4120
4121 xfs_iext_irec_compact_full(ifp); 4121 xfs_iext_irec_compact_pages(ifp);
4122 ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); 4122 ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
4123 4123
4124 ep = ifp->if_u1.if_ext_irec->er_extbuf; 4124 ep = ifp->if_u1.if_ext_irec->er_extbuf;
@@ -4449,8 +4449,7 @@ xfs_iext_irec_remove(
4449 * compaction policy is as follows: 4449 * compaction policy is as follows:
4450 * 4450 *
4451 * Full Compaction: Extents fit into a single page (or inline buffer) 4451 * Full Compaction: Extents fit into a single page (or inline buffer)
4452 * Full Compaction: Extents occupy less than 10% of allocated space 4452 * Partial Compaction: Extents occupy less than 50% of allocated space
4453 * Partial Compaction: Extents occupy > 10% and < 50% of allocated space
4454 * No Compaction: Extents occupy at least 50% of allocated space 4453 * No Compaction: Extents occupy at least 50% of allocated space
4455 */ 4454 */
4456void 4455void
@@ -4471,8 +4470,6 @@ xfs_iext_irec_compact(
4471 xfs_iext_direct_to_inline(ifp, nextents); 4470 xfs_iext_direct_to_inline(ifp, nextents);
4472 } else if (nextents <= XFS_LINEAR_EXTS) { 4471 } else if (nextents <= XFS_LINEAR_EXTS) {
4473 xfs_iext_indirect_to_direct(ifp); 4472 xfs_iext_indirect_to_direct(ifp);
4474 } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 3) {
4475 xfs_iext_irec_compact_full(ifp);
4476 } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { 4473 } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
4477 xfs_iext_irec_compact_pages(ifp); 4474 xfs_iext_irec_compact_pages(ifp);
4478 } 4475 }
@@ -4496,7 +4493,7 @@ xfs_iext_irec_compact_pages(
4496 erp_next = erp + 1; 4493 erp_next = erp + 1;
4497 if (erp_next->er_extcount <= 4494 if (erp_next->er_extcount <=
4498 (XFS_LINEAR_EXTS - erp->er_extcount)) { 4495 (XFS_LINEAR_EXTS - erp->er_extcount)) {
4499 memmove(&erp->er_extbuf[erp->er_extcount], 4496 memcpy(&erp->er_extbuf[erp->er_extcount],
4500 erp_next->er_extbuf, erp_next->er_extcount * 4497 erp_next->er_extbuf, erp_next->er_extcount *
4501 sizeof(xfs_bmbt_rec_t)); 4498 sizeof(xfs_bmbt_rec_t));
4502 erp->er_extcount += erp_next->er_extcount; 4499 erp->er_extcount += erp_next->er_extcount;
@@ -4516,91 +4513,6 @@ xfs_iext_irec_compact_pages(
4516} 4513}
4517 4514
4518/* 4515/*
4519 * Fully compact the extent records managed by the indirection array.
4520 */
4521void
4522xfs_iext_irec_compact_full(
4523 xfs_ifork_t *ifp) /* inode fork pointer */
4524{
4525 xfs_bmbt_rec_host_t *ep, *ep_next; /* extent record pointers */
4526 xfs_ext_irec_t *erp, *erp_next; /* extent irec pointers */
4527 int erp_idx = 0; /* extent irec index */
4528 int ext_avail; /* empty entries in ex list */
4529 int ext_diff; /* number of exts to add */
4530 int nlists; /* number of irec's (ex lists) */
4531
4532 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4533
4534 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4535 erp = ifp->if_u1.if_ext_irec;
4536 ep = &erp->er_extbuf[erp->er_extcount];
4537 erp_next = erp + 1;
4538 ep_next = erp_next->er_extbuf;
4539
4540 while (erp_idx < nlists - 1) {
4541 /*
4542 * Check how many extent records are available in this irec.
4543 * If there is none skip the whole exercise.
4544 */
4545 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
4546 if (ext_avail) {
4547
4548 /*
4549 * Copy over as many as possible extent records into
4550 * the previous page.
4551 */
4552 ext_diff = MIN(ext_avail, erp_next->er_extcount);
4553 memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t));
4554 erp->er_extcount += ext_diff;
4555 erp_next->er_extcount -= ext_diff;
4556
4557 /*
4558 * If the next irec is empty now we can simply
4559 * remove it.
4560 */
4561 if (erp_next->er_extcount == 0) {
4562 /*
4563 * Free page before removing extent record
4564 * so er_extoffs don't get modified in
4565 * xfs_iext_irec_remove.
4566 */
4567 kmem_free(erp_next->er_extbuf);
4568 erp_next->er_extbuf = NULL;
4569 xfs_iext_irec_remove(ifp, erp_idx + 1);
4570 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4571 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4572
4573 /*
4574 * If the next irec is not empty move up the content
4575 * that has not been copied to the previous page to
4576 * the beggining of this one.
4577 */
4578 } else {
4579 memmove(erp_next->er_extbuf, &ep_next[ext_diff],
4580 erp_next->er_extcount *
4581 sizeof(xfs_bmbt_rec_t));
4582 ep_next = erp_next->er_extbuf;
4583 memset(&ep_next[erp_next->er_extcount], 0,
4584 (XFS_LINEAR_EXTS -
4585 erp_next->er_extcount) *
4586 sizeof(xfs_bmbt_rec_t));
4587 }
4588 }
4589
4590 if (erp->er_extcount == XFS_LINEAR_EXTS) {
4591 erp_idx++;
4592 if (erp_idx < nlists)
4593 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4594 else
4595 break;
4596 }
4597 ep = &erp->er_extbuf[erp->er_extcount];
4598 erp_next = erp + 1;
4599 ep_next = erp_next->er_extbuf;
4600 }
4601}
4602
4603/*
4604 * This is called to update the er_extoff field in the indirection 4516 * This is called to update the er_extoff field in the indirection
4605 * array when extents have been added or removed from one of the 4517 * array when extents have been added or removed from one of the
4606 * extent lists. erp_idx contains the irec index to begin updating 4518 * extent lists. erp_idx contains the irec index to begin updating
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index ccba14eb9dbe..503ea89e8b9a 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -124,16 +124,27 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
124STATIC int xlog_iclogs_empty(xlog_t *log); 124STATIC int xlog_iclogs_empty(xlog_t *log);
125 125
126#if defined(XFS_LOG_TRACE) 126#if defined(XFS_LOG_TRACE)
127
128#define XLOG_TRACE_LOGGRANT_SIZE 2048
129#define XLOG_TRACE_ICLOG_SIZE 256
130
131void
132xlog_trace_loggrant_alloc(xlog_t *log)
133{
134 log->l_grant_trace = ktrace_alloc(XLOG_TRACE_LOGGRANT_SIZE, KM_NOFS);
135}
136
137void
138xlog_trace_loggrant_dealloc(xlog_t *log)
139{
140 ktrace_free(log->l_grant_trace);
141}
142
127void 143void
128xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) 144xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
129{ 145{
130 unsigned long cnts; 146 unsigned long cnts;
131 147
132 if (!log->l_grant_trace) {
133 log->l_grant_trace = ktrace_alloc(2048, KM_NOSLEEP);
134 if (!log->l_grant_trace)
135 return;
136 }
137 /* ticket counts are 1 byte each */ 148 /* ticket counts are 1 byte each */
138 cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8; 149 cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8;
139 150
@@ -157,10 +168,20 @@ xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
157} 168}
158 169
159void 170void
171xlog_trace_iclog_alloc(xlog_in_core_t *iclog)
172{
173 iclog->ic_trace = ktrace_alloc(XLOG_TRACE_ICLOG_SIZE, KM_NOFS);
174}
175
176void
177xlog_trace_iclog_dealloc(xlog_in_core_t *iclog)
178{
179 ktrace_free(iclog->ic_trace);
180}
181
182void
160xlog_trace_iclog(xlog_in_core_t *iclog, uint state) 183xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
161{ 184{
162 if (!iclog->ic_trace)
163 iclog->ic_trace = ktrace_alloc(256, KM_NOFS);
164 ktrace_enter(iclog->ic_trace, 185 ktrace_enter(iclog->ic_trace,
165 (void *)((unsigned long)state), 186 (void *)((unsigned long)state),
166 (void *)((unsigned long)current_pid()), 187 (void *)((unsigned long)current_pid()),
@@ -170,8 +191,15 @@ xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
170 (void *)NULL, (void *)NULL); 191 (void *)NULL, (void *)NULL);
171} 192}
172#else 193#else
194
195#define xlog_trace_loggrant_alloc(log)
196#define xlog_trace_loggrant_dealloc(log)
173#define xlog_trace_loggrant(log,tic,string) 197#define xlog_trace_loggrant(log,tic,string)
198
199#define xlog_trace_iclog_alloc(iclog)
200#define xlog_trace_iclog_dealloc(iclog)
174#define xlog_trace_iclog(iclog,state) 201#define xlog_trace_iclog(iclog,state)
202
175#endif /* XFS_LOG_TRACE */ 203#endif /* XFS_LOG_TRACE */
176 204
177 205
@@ -1009,7 +1037,7 @@ xlog_iodone(xfs_buf_t *bp)
1009 * layer, it means the underlyin device no longer supports 1037 * layer, it means the underlyin device no longer supports
1010 * barrier I/O. Warn loudly and turn off barriers. 1038 * barrier I/O. Warn loudly and turn off barriers.
1011 */ 1039 */
1012 if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ORDERED(bp)) { 1040 if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ISORDERED(bp)) {
1013 l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; 1041 l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER;
1014 xfs_fs_cmn_err(CE_WARN, l->l_mp, 1042 xfs_fs_cmn_err(CE_WARN, l->l_mp,
1015 "xlog_iodone: Barriers are no longer supported" 1043 "xlog_iodone: Barriers are no longer supported"
@@ -1231,6 +1259,7 @@ xlog_alloc_log(xfs_mount_t *mp,
1231 spin_lock_init(&log->l_grant_lock); 1259 spin_lock_init(&log->l_grant_lock);
1232 sv_init(&log->l_flush_wait, 0, "flush_wait"); 1260 sv_init(&log->l_flush_wait, 0, "flush_wait");
1233 1261
1262 xlog_trace_loggrant_alloc(log);
1234 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ 1263 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
1235 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); 1264 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
1236 1265
@@ -1285,6 +1314,8 @@ xlog_alloc_log(xfs_mount_t *mp,
1285 sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); 1314 sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force");
1286 sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); 1315 sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");
1287 1316
1317 xlog_trace_iclog_alloc(iclog);
1318
1288 iclogp = &iclog->ic_next; 1319 iclogp = &iclog->ic_next;
1289 } 1320 }
1290 *iclogp = log->l_iclog; /* complete ring */ 1321 *iclogp = log->l_iclog; /* complete ring */
@@ -1565,11 +1596,7 @@ xlog_dealloc_log(xlog_t *log)
1565 sv_destroy(&iclog->ic_force_wait); 1596 sv_destroy(&iclog->ic_force_wait);
1566 sv_destroy(&iclog->ic_write_wait); 1597 sv_destroy(&iclog->ic_write_wait);
1567 xfs_buf_free(iclog->ic_bp); 1598 xfs_buf_free(iclog->ic_bp);
1568#ifdef XFS_LOG_TRACE 1599 xlog_trace_iclog_dealloc(iclog);
1569 if (iclog->ic_trace != NULL) {
1570 ktrace_free(iclog->ic_trace);
1571 }
1572#endif
1573 next_iclog = iclog->ic_next; 1600 next_iclog = iclog->ic_next;
1574 kmem_free(iclog); 1601 kmem_free(iclog);
1575 iclog = next_iclog; 1602 iclog = next_iclog;
@@ -1578,14 +1605,7 @@ xlog_dealloc_log(xlog_t *log)
1578 spinlock_destroy(&log->l_grant_lock); 1605 spinlock_destroy(&log->l_grant_lock);
1579 1606
1580 xfs_buf_free(log->l_xbuf); 1607 xfs_buf_free(log->l_xbuf);
1581#ifdef XFS_LOG_TRACE 1608 xlog_trace_loggrant_dealloc(log);
1582 if (log->l_trace != NULL) {
1583 ktrace_free(log->l_trace);
1584 }
1585 if (log->l_grant_trace != NULL) {
1586 ktrace_free(log->l_grant_trace);
1587 }
1588#endif
1589 log->l_mp->m_log = NULL; 1609 log->l_mp->m_log = NULL;
1590 kmem_free(log); 1610 kmem_free(log);
1591} /* xlog_dealloc_log */ 1611} /* xlog_dealloc_log */
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index c8a5b22ee3e3..e7d8f84443fa 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -448,7 +448,6 @@ typedef struct log {
448 int l_grant_write_bytes; 448 int l_grant_write_bytes;
449 449
450#ifdef XFS_LOG_TRACE 450#ifdef XFS_LOG_TRACE
451 struct ktrace *l_trace;
452 struct ktrace *l_grant_trace; 451 struct ktrace *l_grant_trace;
453#endif 452#endif
454 453
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index aa238c8fbd7a..8b6812f66a15 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1838,6 +1838,12 @@ again:
1838#endif 1838#endif
1839} 1839}
1840 1840
1841/*
1842 * xfs_lock_two_inodes() can only be used to lock one type of lock
1843 * at a time - the iolock or the ilock, but not both at once. If
1844 * we lock both at once, lockdep will report false positives saying
1845 * we have violated locking orders.
1846 */
1841void 1847void
1842xfs_lock_two_inodes( 1848xfs_lock_two_inodes(
1843 xfs_inode_t *ip0, 1849 xfs_inode_t *ip0,
@@ -1848,6 +1854,8 @@ xfs_lock_two_inodes(
1848 int attempts = 0; 1854 int attempts = 0;
1849 xfs_log_item_t *lp; 1855 xfs_log_item_t *lp;
1850 1856
1857 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
1858 ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0);
1851 ASSERT(ip0->i_ino != ip1->i_ino); 1859 ASSERT(ip0->i_ino != ip1->i_ino);
1852 1860
1853 if (ip0->i_ino > ip1->i_ino) { 1861 if (ip0->i_ino > ip1->i_ino) {
@@ -3152,6 +3160,13 @@ error1: /* Just cancel transaction */
3152/* 3160/*
3153 * Zero file bytes between startoff and endoff inclusive. 3161 * Zero file bytes between startoff and endoff inclusive.
3154 * The iolock is held exclusive and no blocks are buffered. 3162 * The iolock is held exclusive and no blocks are buffered.
3163 *
3164 * This function is used by xfs_free_file_space() to zero
3165 * partial blocks when the range to free is not block aligned.
3166 * When unreserving space with boundaries that are not block
3167 * aligned we round up the start and round down the end
3168 * boundaries and then use this function to zero the parts of
3169 * the blocks that got dropped during the rounding.
3155 */ 3170 */
3156STATIC int 3171STATIC int
3157xfs_zero_remaining_bytes( 3172xfs_zero_remaining_bytes(
@@ -3168,6 +3183,17 @@ xfs_zero_remaining_bytes(
3168 int nimap; 3183 int nimap;
3169 int error = 0; 3184 int error = 0;
3170 3185
3186 /*
3187 * Avoid doing I/O beyond eof - it's not necessary
3188 * since nothing can read beyond eof. The space will
3189 * be zeroed when the file is extended anyway.
3190 */
3191 if (startoff >= ip->i_size)
3192 return 0;
3193
3194 if (endoff > ip->i_size)
3195 endoff = ip->i_size;
3196
3171 bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, 3197 bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize,
3172 XFS_IS_REALTIME_INODE(ip) ? 3198 XFS_IS_REALTIME_INODE(ip) ?
3173 mp->m_rtdev_targp : mp->m_ddev_targp); 3199 mp->m_rtdev_targp : mp->m_ddev_targp);