aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/anon_inodes.c15
-rw-r--r--fs/binfmt_elf.c8
-rw-r--r--fs/binfmt_elf_fdpic.c8
-rw-r--r--fs/eventpoll.c21
-rw-r--r--fs/ext2/dir.c5
-rw-r--r--fs/ext2/ext2.h2
-rw-r--r--fs/ext2/namei.c5
-rw-r--r--fs/ext3/inode.c22
-rw-r--r--fs/ext3/resize.c2
-rw-r--r--fs/ext3/super.c2
-rw-r--r--fs/ext4/Makefile2
-rw-r--r--fs/ext4/ext4.h39
-rw-r--r--fs/ext4/ext4_extents.h4
-rw-r--r--fs/ext4/extents.c4
-rw-r--r--fs/ext4/file.c36
-rw-r--r--fs/ext4/fsync.c8
-rw-r--r--fs/ext4/ialloc.c48
-rw-r--r--fs/ext4/inode.c281
-rw-r--r--fs/ext4/ioctl.c36
-rw-r--r--fs/ext4/mballoc.c85
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/migrate.c8
-rw-r--r--fs/ext4/move_extent.c1320
-rw-r--r--fs/ext4/namei.c10
-rw-r--r--fs/ext4/resize.c2
-rw-r--r--fs/ext4/super.c30
-rw-r--r--fs/fat/inode.c2
-rw-r--r--fs/gfs2/Kconfig2
-rw-r--r--fs/isofs/dir.c5
-rw-r--r--fs/isofs/inode.c118
-rw-r--r--fs/isofs/isofs.h27
-rw-r--r--fs/isofs/namei.c4
-rw-r--r--fs/jbd/transaction.c48
-rw-r--r--fs/jbd2/checkpoint.c5
-rw-r--r--fs/jbd2/commit.c13
-rw-r--r--fs/jbd2/journal.c69
-rw-r--r--fs/jbd2/transaction.c49
-rw-r--r--fs/notify/inotify/inotify.h3
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c2
-rw-r--r--fs/notify/inotify/inotify_user.c32
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/proc_devtree.c10
-rw-r--r--fs/proc/softirqs.c44
-rw-r--r--fs/proc/stat.c15
-rw-r--r--fs/proc/vmcore.c7
-rw-r--r--fs/reiserfs/do_balan.c5
-rw-r--r--fs/reiserfs/lbalance.c10
-rw-r--r--fs/seq_file.c20
-rw-r--r--fs/ufs/inode.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c2
52 files changed, 1977 insertions, 534 deletions
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 1dd96d4406c0..47d4a01c5393 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -52,6 +52,19 @@ static const struct dentry_operations anon_inodefs_dentry_operations = {
52 .d_delete = anon_inodefs_delete_dentry, 52 .d_delete = anon_inodefs_delete_dentry,
53}; 53};
54 54
55/*
56 * nop .set_page_dirty method so that people can use .page_mkwrite on
57 * anon inodes.
58 */
59static int anon_set_page_dirty(struct page *page)
60{
61 return 0;
62};
63
64static const struct address_space_operations anon_aops = {
65 .set_page_dirty = anon_set_page_dirty,
66};
67
55/** 68/**
56 * anon_inode_getfd - creates a new file instance by hooking it up to an 69 * anon_inode_getfd - creates a new file instance by hooking it up to an
57 * anonymous inode, and a dentry that describe the "class" 70 * anonymous inode, and a dentry that describe the "class"
@@ -151,6 +164,8 @@ static struct inode *anon_inode_mkinode(void)
151 164
152 inode->i_fop = &anon_inode_fops; 165 inode->i_fop = &anon_inode_fops;
153 166
167 inode->i_mapping->a_ops = &anon_aops;
168
154 /* 169 /*
155 * Mark the inode dirty from the very beginning, 170 * Mark the inode dirty from the very beginning,
156 * that way it will never be moved to the dirty 171 * that way it will never be moved to the dirty
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 40381df34869..9fa212b014a5 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1340,8 +1340,10 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1340 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; 1340 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1341 prstatus->pr_sigpend = p->pending.signal.sig[0]; 1341 prstatus->pr_sigpend = p->pending.signal.sig[0];
1342 prstatus->pr_sighold = p->blocked.sig[0]; 1342 prstatus->pr_sighold = p->blocked.sig[0];
1343 rcu_read_lock();
1344 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1345 rcu_read_unlock();
1343 prstatus->pr_pid = task_pid_vnr(p); 1346 prstatus->pr_pid = task_pid_vnr(p);
1344 prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1345 prstatus->pr_pgrp = task_pgrp_vnr(p); 1347 prstatus->pr_pgrp = task_pgrp_vnr(p);
1346 prstatus->pr_sid = task_session_vnr(p); 1348 prstatus->pr_sid = task_session_vnr(p);
1347 if (thread_group_leader(p)) { 1349 if (thread_group_leader(p)) {
@@ -1382,8 +1384,10 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1382 psinfo->pr_psargs[i] = ' '; 1384 psinfo->pr_psargs[i] = ' ';
1383 psinfo->pr_psargs[len] = 0; 1385 psinfo->pr_psargs[len] = 0;
1384 1386
1387 rcu_read_lock();
1388 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1389 rcu_read_unlock();
1385 psinfo->pr_pid = task_pid_vnr(p); 1390 psinfo->pr_pid = task_pid_vnr(p);
1386 psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1387 psinfo->pr_pgrp = task_pgrp_vnr(p); 1391 psinfo->pr_pgrp = task_pgrp_vnr(p);
1388 psinfo->pr_sid = task_session_vnr(p); 1392 psinfo->pr_sid = task_session_vnr(p);
1389 1393
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index fdb66faa24f1..20fbeced472b 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1387,8 +1387,10 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1387 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; 1387 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1388 prstatus->pr_sigpend = p->pending.signal.sig[0]; 1388 prstatus->pr_sigpend = p->pending.signal.sig[0];
1389 prstatus->pr_sighold = p->blocked.sig[0]; 1389 prstatus->pr_sighold = p->blocked.sig[0];
1390 rcu_read_lock();
1391 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1392 rcu_read_unlock();
1390 prstatus->pr_pid = task_pid_vnr(p); 1393 prstatus->pr_pid = task_pid_vnr(p);
1391 prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1392 prstatus->pr_pgrp = task_pgrp_vnr(p); 1394 prstatus->pr_pgrp = task_pgrp_vnr(p);
1393 prstatus->pr_sid = task_session_vnr(p); 1395 prstatus->pr_sid = task_session_vnr(p);
1394 if (thread_group_leader(p)) { 1396 if (thread_group_leader(p)) {
@@ -1432,8 +1434,10 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1432 psinfo->pr_psargs[i] = ' '; 1434 psinfo->pr_psargs[i] = ' ';
1433 psinfo->pr_psargs[len] = 0; 1435 psinfo->pr_psargs[len] = 0;
1434 1436
1437 rcu_read_lock();
1438 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1439 rcu_read_unlock();
1435 psinfo->pr_pid = task_pid_vnr(p); 1440 psinfo->pr_pid = task_pid_vnr(p);
1436 psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1437 psinfo->pr_pgrp = task_pgrp_vnr(p); 1441 psinfo->pr_pgrp = task_pgrp_vnr(p);
1438 psinfo->pr_sid = task_session_vnr(p); 1442 psinfo->pr_sid = task_session_vnr(p);
1439 1443
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 5458e80fc558..085c5c063420 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -98,7 +98,7 @@ struct epoll_filefd {
98struct nested_call_node { 98struct nested_call_node {
99 struct list_head llink; 99 struct list_head llink;
100 void *cookie; 100 void *cookie;
101 int cpu; 101 void *ctx;
102}; 102};
103 103
104/* 104/*
@@ -317,17 +317,17 @@ static void ep_nested_calls_init(struct nested_calls *ncalls)
317 * @nproc: Nested call core function pointer. 317 * @nproc: Nested call core function pointer.
318 * @priv: Opaque data to be passed to the @nproc callback. 318 * @priv: Opaque data to be passed to the @nproc callback.
319 * @cookie: Cookie to be used to identify this nested call. 319 * @cookie: Cookie to be used to identify this nested call.
320 * @ctx: This instance context.
320 * 321 *
321 * Returns: Returns the code returned by the @nproc callback, or -1 if 322 * Returns: Returns the code returned by the @nproc callback, or -1 if
322 * the maximum recursion limit has been exceeded. 323 * the maximum recursion limit has been exceeded.
323 */ 324 */
324static int ep_call_nested(struct nested_calls *ncalls, int max_nests, 325static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
325 int (*nproc)(void *, void *, int), void *priv, 326 int (*nproc)(void *, void *, int), void *priv,
326 void *cookie) 327 void *cookie, void *ctx)
327{ 328{
328 int error, call_nests = 0; 329 int error, call_nests = 0;
329 unsigned long flags; 330 unsigned long flags;
330 int this_cpu = get_cpu();
331 struct list_head *lsthead = &ncalls->tasks_call_list; 331 struct list_head *lsthead = &ncalls->tasks_call_list;
332 struct nested_call_node *tncur; 332 struct nested_call_node *tncur;
333 struct nested_call_node tnode; 333 struct nested_call_node tnode;
@@ -340,7 +340,7 @@ static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
340 * very much limited. 340 * very much limited.
341 */ 341 */
342 list_for_each_entry(tncur, lsthead, llink) { 342 list_for_each_entry(tncur, lsthead, llink) {
343 if (tncur->cpu == this_cpu && 343 if (tncur->ctx == ctx &&
344 (tncur->cookie == cookie || ++call_nests > max_nests)) { 344 (tncur->cookie == cookie || ++call_nests > max_nests)) {
345 /* 345 /*
346 * Ops ... loop detected or maximum nest level reached. 346 * Ops ... loop detected or maximum nest level reached.
@@ -352,7 +352,7 @@ static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
352 } 352 }
353 353
354 /* Add the current task and cookie to the list */ 354 /* Add the current task and cookie to the list */
355 tnode.cpu = this_cpu; 355 tnode.ctx = ctx;
356 tnode.cookie = cookie; 356 tnode.cookie = cookie;
357 list_add(&tnode.llink, lsthead); 357 list_add(&tnode.llink, lsthead);
358 358
@@ -364,10 +364,9 @@ static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
364 /* Remove the current task from the list */ 364 /* Remove the current task from the list */
365 spin_lock_irqsave(&ncalls->lock, flags); 365 spin_lock_irqsave(&ncalls->lock, flags);
366 list_del(&tnode.llink); 366 list_del(&tnode.llink);
367 out_unlock: 367out_unlock:
368 spin_unlock_irqrestore(&ncalls->lock, flags); 368 spin_unlock_irqrestore(&ncalls->lock, flags);
369 369
370 put_cpu();
371 return error; 370 return error;
372} 371}
373 372
@@ -408,8 +407,12 @@ static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests)
408 */ 407 */
409static void ep_poll_safewake(wait_queue_head_t *wq) 408static void ep_poll_safewake(wait_queue_head_t *wq)
410{ 409{
410 int this_cpu = get_cpu();
411
411 ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS, 412 ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
412 ep_poll_wakeup_proc, NULL, wq); 413 ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
414
415 put_cpu();
413} 416}
414 417
415/* 418/*
@@ -663,7 +666,7 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
663 * could re-enter here. 666 * could re-enter here.
664 */ 667 */
665 pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS, 668 pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS,
666 ep_poll_readyevents_proc, ep, ep); 669 ep_poll_readyevents_proc, ep, ep, current);
667 670
668 return pollflags != -1 ? pollflags : 0; 671 return pollflags != -1 ? pollflags : 0;
669} 672}
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 003500498c22..6cde970b0a1a 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -450,7 +450,7 @@ ino_t ext2_inode_by_name(struct inode *dir, struct qstr *child)
450 450
451/* Releases the page */ 451/* Releases the page */
452void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, 452void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
453 struct page *page, struct inode *inode) 453 struct page *page, struct inode *inode, int update_times)
454{ 454{
455 loff_t pos = page_offset(page) + 455 loff_t pos = page_offset(page) +
456 (char *) de - (char *) page_address(page); 456 (char *) de - (char *) page_address(page);
@@ -465,7 +465,8 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
465 ext2_set_de_type(de, inode); 465 ext2_set_de_type(de, inode);
466 err = ext2_commit_chunk(page, pos, len); 466 err = ext2_commit_chunk(page, pos, len);
467 ext2_put_page(page); 467 ext2_put_page(page);
468 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; 468 if (update_times)
469 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
469 EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; 470 EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
470 mark_inode_dirty(dir); 471 mark_inode_dirty(dir);
471} 472}
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index f2e5811936d0..d988a718aedb 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -111,7 +111,7 @@ extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct qstr *,
111extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *); 111extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
112extern int ext2_empty_dir (struct inode *); 112extern int ext2_empty_dir (struct inode *);
113extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **); 113extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
114extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *); 114extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int);
115 115
116/* ialloc.c */ 116/* ialloc.c */
117extern struct inode * ext2_new_inode (struct inode *, int); 117extern struct inode * ext2_new_inode (struct inode *, int);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 90ea17998a73..6524ecaebb7a 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -320,7 +320,7 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
320 if (!new_de) 320 if (!new_de)
321 goto out_dir; 321 goto out_dir;
322 inode_inc_link_count(old_inode); 322 inode_inc_link_count(old_inode);
323 ext2_set_link(new_dir, new_de, new_page, old_inode); 323 ext2_set_link(new_dir, new_de, new_page, old_inode, 1);
324 new_inode->i_ctime = CURRENT_TIME_SEC; 324 new_inode->i_ctime = CURRENT_TIME_SEC;
325 if (dir_de) 325 if (dir_de)
326 drop_nlink(new_inode); 326 drop_nlink(new_inode);
@@ -352,7 +352,8 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
352 inode_dec_link_count(old_inode); 352 inode_dec_link_count(old_inode);
353 353
354 if (dir_de) { 354 if (dir_de) {
355 ext2_set_link(old_inode, dir_de, dir_page, new_dir); 355 if (old_dir != new_dir)
356 ext2_set_link(old_inode, dir_de, dir_page, new_dir, 0);
356 inode_dec_link_count(old_dir); 357 inode_dec_link_count(old_dir);
357 } 358 }
358 return 0; 359 return 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index b0248c6d5d4c..05dea8132fc0 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -820,7 +820,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
820 while (count < maxblocks && count <= blocks_to_boundary) { 820 while (count < maxblocks && count <= blocks_to_boundary) {
821 ext3_fsblk_t blk; 821 ext3_fsblk_t blk;
822 822
823 if (!verify_chain(chain, partial)) { 823 if (!verify_chain(chain, chain + depth - 1)) {
824 /* 824 /*
825 * Indirect block might be removed by 825 * Indirect block might be removed by
826 * truncate while we were reading it. 826 * truncate while we were reading it.
@@ -2374,7 +2374,7 @@ void ext3_truncate(struct inode *inode)
2374 struct page *page; 2374 struct page *page;
2375 2375
2376 if (!ext3_can_truncate(inode)) 2376 if (!ext3_can_truncate(inode))
2377 return; 2377 goto out_notrans;
2378 2378
2379 if (inode->i_size == 0 && ext3_should_writeback_data(inode)) 2379 if (inode->i_size == 0 && ext3_should_writeback_data(inode))
2380 ei->i_state |= EXT3_STATE_FLUSH_ON_CLOSE; 2380 ei->i_state |= EXT3_STATE_FLUSH_ON_CLOSE;
@@ -2390,7 +2390,7 @@ void ext3_truncate(struct inode *inode)
2390 page = grab_cache_page(mapping, 2390 page = grab_cache_page(mapping,
2391 inode->i_size >> PAGE_CACHE_SHIFT); 2391 inode->i_size >> PAGE_CACHE_SHIFT);
2392 if (!page) 2392 if (!page)
2393 return; 2393 goto out_notrans;
2394 } 2394 }
2395 2395
2396 handle = start_transaction(inode); 2396 handle = start_transaction(inode);
@@ -2401,7 +2401,7 @@ void ext3_truncate(struct inode *inode)
2401 unlock_page(page); 2401 unlock_page(page);
2402 page_cache_release(page); 2402 page_cache_release(page);
2403 } 2403 }
2404 return; /* AKPM: return what? */ 2404 goto out_notrans;
2405 } 2405 }
2406 2406
2407 last_block = (inode->i_size + blocksize-1) 2407 last_block = (inode->i_size + blocksize-1)
@@ -2525,6 +2525,14 @@ out_stop:
2525 ext3_orphan_del(handle, inode); 2525 ext3_orphan_del(handle, inode);
2526 2526
2527 ext3_journal_stop(handle); 2527 ext3_journal_stop(handle);
2528 return;
2529out_notrans:
2530 /*
2531 * Delete the inode from orphan list so that it doesn't stay there
2532 * forever and trigger assertion on umount.
2533 */
2534 if (inode->i_nlink)
2535 ext3_orphan_del(NULL, inode);
2528} 2536}
2529 2537
2530static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, 2538static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
@@ -3122,12 +3130,6 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
3122 3130
3123 rc = inode_setattr(inode, attr); 3131 rc = inode_setattr(inode, attr);
3124 3132
3125 /* If inode_setattr's call to ext3_truncate failed to get a
3126 * transaction handle at all, we need to clean up the in-core
3127 * orphan list manually. */
3128 if (inode->i_nlink)
3129 ext3_orphan_del(NULL, inode);
3130
3131 if (!rc && (ia_valid & ATTR_MODE)) 3133 if (!rc && (ia_valid & ATTR_MODE))
3132 rc = ext3_acl_chmod(inode); 3134 rc = ext3_acl_chmod(inode);
3133 3135
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 8a0b26340b54..8359e7b3dc89 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -990,7 +990,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
990 sb->s_id, n_blocks_count); 990 sb->s_id, n_blocks_count);
991 if (sizeof(sector_t) < 8) 991 if (sizeof(sector_t) < 8)
992 ext3_warning(sb, __func__, 992 ext3_warning(sb, __func__,
993 "CONFIG_LBD not enabled\n"); 993 "CONFIG_LBDAF not enabled\n");
994 return -EINVAL; 994 return -EINVAL;
995 } 995 }
996 996
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 26aa64dee6aa..601e881e6105 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1812,7 +1812,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1812 printk(KERN_ERR "EXT3-fs: filesystem on %s:" 1812 printk(KERN_ERR "EXT3-fs: filesystem on %s:"
1813 " too large to mount safely\n", sb->s_id); 1813 " too large to mount safely\n", sb->s_id);
1814 if (sizeof(sector_t) < 8) 1814 if (sizeof(sector_t) < 8)
1815 printk(KERN_WARNING "EXT3-fs: CONFIG_LBD not " 1815 printk(KERN_WARNING "EXT3-fs: CONFIG_LBDAF not "
1816 "enabled\n"); 1816 "enabled\n");
1817 goto failed_mount; 1817 goto failed_mount;
1818 } 1818 }
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 8a34710ecf40..8867b2a1e5fe 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
6 6
7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ 7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ 8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
9 ext4_jbd2.o migrate.o mballoc.o block_validity.o 9 ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
10 10
11ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o 11ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
12ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o 12ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index cc7d5edc38c9..17b9998680e3 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -352,6 +352,7 @@ struct ext4_new_group_data {
352 /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */ 352 /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
353 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ 353 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
354#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) 354#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
355#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
355 356
356/* 357/*
357 * ioctl commands in 32 bit emulation 358 * ioctl commands in 32 bit emulation
@@ -447,6 +448,15 @@ struct ext4_inode {
447 __le32 i_version_hi; /* high 32 bits for 64-bit version */ 448 __le32 i_version_hi; /* high 32 bits for 64-bit version */
448}; 449};
449 450
451struct move_extent {
452 __u32 reserved; /* should be zero */
453 __u32 donor_fd; /* donor file descriptor */
454 __u64 orig_start; /* logical start offset in block for orig */
455 __u64 donor_start; /* logical start offset in block for donor */
456 __u64 len; /* block length to be moved */
457 __u64 moved_len; /* moved block length */
458};
459#define MAX_DEFRAG_SIZE ((1UL<<31) - 1)
450 460
451#define EXT4_EPOCH_BITS 2 461#define EXT4_EPOCH_BITS 2
452#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) 462#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
@@ -674,7 +684,6 @@ struct ext4_inode_info {
674#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ 684#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */
675#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ 685#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
676#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ 686#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
677#define EXT4_MOUNT_ABORT 0x00200 /* Fatal error detected */
678#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ 687#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
679#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ 688#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
680#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ 689#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
@@ -696,17 +705,10 @@ struct ext4_inode_info {
696#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ 705#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
697#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ 706#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
698 707
699/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
700#ifndef _LINUX_EXT2_FS_H
701#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt 708#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
702#define set_opt(o, opt) o |= EXT4_MOUNT_##opt 709#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
703#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \ 710#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \
704 EXT4_MOUNT_##opt) 711 EXT4_MOUNT_##opt)
705#else
706#define EXT2_MOUNT_NOLOAD EXT4_MOUNT_NOLOAD
707#define EXT2_MOUNT_ABORT EXT4_MOUNT_ABORT
708#define EXT2_MOUNT_DATA_FLAGS EXT4_MOUNT_DATA_FLAGS
709#endif
710 712
711#define ext4_set_bit ext2_set_bit 713#define ext4_set_bit ext2_set_bit
712#define ext4_set_bit_atomic ext2_set_bit_atomic 714#define ext4_set_bit_atomic ext2_set_bit_atomic
@@ -824,6 +826,13 @@ struct ext4_super_block {
824}; 826};
825 827
826#ifdef __KERNEL__ 828#ifdef __KERNEL__
829
830/*
831 * run-time mount flags
832 */
833#define EXT4_MF_MNTDIR_SAMPLED 0x0001
834#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */
835
827/* 836/*
828 * fourth extended-fs super-block data in memory 837 * fourth extended-fs super-block data in memory
829 */ 838 */
@@ -842,7 +851,8 @@ struct ext4_sb_info {
842 struct buffer_head * s_sbh; /* Buffer containing the super block */ 851 struct buffer_head * s_sbh; /* Buffer containing the super block */
843 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ 852 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
844 struct buffer_head **s_group_desc; 853 struct buffer_head **s_group_desc;
845 unsigned long s_mount_opt; 854 unsigned int s_mount_opt;
855 unsigned int s_mount_flags;
846 ext4_fsblk_t s_sb_block; 856 ext4_fsblk_t s_sb_block;
847 uid_t s_resuid; 857 uid_t s_resuid;
848 gid_t s_resgid; 858 gid_t s_resgid;
@@ -853,6 +863,7 @@ struct ext4_sb_info {
853 int s_inode_size; 863 int s_inode_size;
854 int s_first_ino; 864 int s_first_ino;
855 unsigned int s_inode_readahead_blks; 865 unsigned int s_inode_readahead_blks;
866 unsigned int s_inode_goal;
856 spinlock_t s_next_gen_lock; 867 spinlock_t s_next_gen_lock;
857 u32 s_next_generation; 868 u32 s_next_generation;
858 u32 s_hash_seed[4]; 869 u32 s_hash_seed[4];
@@ -1305,7 +1316,8 @@ extern int ext4fs_dirhash(const char *name, int len, struct
1305 dx_hash_info *hinfo); 1316 dx_hash_info *hinfo);
1306 1317
1307/* ialloc.c */ 1318/* ialloc.c */
1308extern struct inode * ext4_new_inode(handle_t *, struct inode *, int); 1319extern struct inode *ext4_new_inode(handle_t *, struct inode *, int,
1320 const struct qstr *qstr, __u32 goal);
1309extern void ext4_free_inode(handle_t *, struct inode *); 1321extern void ext4_free_inode(handle_t *, struct inode *);
1310extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); 1322extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
1311extern unsigned long ext4_count_free_inodes(struct super_block *); 1323extern unsigned long ext4_count_free_inodes(struct super_block *);
@@ -1329,7 +1341,7 @@ extern void ext4_discard_preallocations(struct inode *);
1329extern int __init init_ext4_mballoc(void); 1341extern int __init init_ext4_mballoc(void);
1330extern void exit_ext4_mballoc(void); 1342extern void exit_ext4_mballoc(void);
1331extern void ext4_mb_free_blocks(handle_t *, struct inode *, 1343extern void ext4_mb_free_blocks(handle_t *, struct inode *,
1332 unsigned long, unsigned long, int, unsigned long *); 1344 ext4_fsblk_t, unsigned long, int, unsigned long *);
1333extern int ext4_mb_add_groupinfo(struct super_block *sb, 1345extern int ext4_mb_add_groupinfo(struct super_block *sb,
1334 ext4_group_t i, struct ext4_group_desc *desc); 1346 ext4_group_t i, struct ext4_group_desc *desc);
1335extern void ext4_mb_update_group_info(struct ext4_group_info *grp, 1347extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
@@ -1647,6 +1659,11 @@ extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
1647 struct buffer_head *bh, int flags); 1659 struct buffer_head *bh, int flags);
1648extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 1660extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1649 __u64 start, __u64 len); 1661 __u64 start, __u64 len);
1662/* move_extent.c */
1663extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
1664 __u64 start_orig, __u64 start_donor,
1665 __u64 len, __u64 *moved_len);
1666
1650 1667
1651/* 1668/*
1652 * Add new method to test wether block and inode bitmaps are properly 1669 * Add new method to test wether block and inode bitmaps are properly
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index f0c3ec85bd48..20a84105a10b 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -221,12 +221,16 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
221} 221}
222 222
223extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); 223extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
224extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
224extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); 225extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
225extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); 226extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
226extern int ext4_extent_tree_init(handle_t *, struct inode *); 227extern int ext4_extent_tree_init(handle_t *, struct inode *);
227extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, 228extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
228 int num, 229 int num,
229 struct ext4_ext_path *path); 230 struct ext4_ext_path *path);
231extern int ext4_can_extents_be_merged(struct inode *inode,
232 struct ext4_extent *ex1,
233 struct ext4_extent *ex2);
230extern int ext4_ext_try_to_merge(struct inode *inode, 234extern int ext4_ext_try_to_merge(struct inode *inode,
231 struct ext4_ext_path *path, 235 struct ext4_ext_path *path,
232 struct ext4_extent *); 236 struct ext4_extent *);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2593f748c3a4..50322a09bd01 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -49,7 +49,7 @@
49 * ext_pblock: 49 * ext_pblock:
50 * combine low and high parts of physical block number into ext4_fsblk_t 50 * combine low and high parts of physical block number into ext4_fsblk_t
51 */ 51 */
52static ext4_fsblk_t ext_pblock(struct ext4_extent *ex) 52ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
53{ 53{
54 ext4_fsblk_t block; 54 ext4_fsblk_t block;
55 55
@@ -1417,7 +1417,7 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
1417 return err; 1417 return err;
1418} 1418}
1419 1419
1420static int 1420int
1421ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, 1421ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1422 struct ext4_extent *ex2) 1422 struct ext4_extent *ex2)
1423{ 1423{
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 588af8c77246..3f1873fef1c6 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -21,6 +21,8 @@
21#include <linux/time.h> 21#include <linux/time.h>
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/jbd2.h> 23#include <linux/jbd2.h>
24#include <linux/mount.h>
25#include <linux/path.h>
24#include "ext4.h" 26#include "ext4.h"
25#include "ext4_jbd2.h" 27#include "ext4_jbd2.h"
26#include "xattr.h" 28#include "xattr.h"
@@ -145,6 +147,38 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
145 return 0; 147 return 0;
146} 148}
147 149
150static int ext4_file_open(struct inode * inode, struct file * filp)
151{
152 struct super_block *sb = inode->i_sb;
153 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
154 struct vfsmount *mnt = filp->f_path.mnt;
155 struct path path;
156 char buf[64], *cp;
157
158 if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
159 !(sb->s_flags & MS_RDONLY))) {
160 sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
161 /*
162 * Sample where the filesystem has been mounted and
163 * store it in the superblock for sysadmin convenience
164 * when trying to sort through large numbers of block
165 * devices or filesystem images.
166 */
167 memset(buf, 0, sizeof(buf));
168 path.mnt = mnt->mnt_parent;
169 path.dentry = mnt->mnt_mountpoint;
170 path_get(&path);
171 cp = d_path(&path, buf, sizeof(buf));
172 path_put(&path);
173 if (!IS_ERR(cp)) {
174 memcpy(sbi->s_es->s_last_mounted, cp,
175 sizeof(sbi->s_es->s_last_mounted));
176 sb->s_dirt = 1;
177 }
178 }
179 return generic_file_open(inode, filp);
180}
181
148const struct file_operations ext4_file_operations = { 182const struct file_operations ext4_file_operations = {
149 .llseek = generic_file_llseek, 183 .llseek = generic_file_llseek,
150 .read = do_sync_read, 184 .read = do_sync_read,
@@ -156,7 +190,7 @@ const struct file_operations ext4_file_operations = {
156 .compat_ioctl = ext4_compat_ioctl, 190 .compat_ioctl = ext4_compat_ioctl,
157#endif 191#endif
158 .mmap = ext4_file_mmap, 192 .mmap = ext4_file_mmap,
159 .open = generic_file_open, 193 .open = ext4_file_open,
160 .release = ext4_release_file, 194 .release = ext4_release_file,
161 .fsync = ext4_sync_file, 195 .fsync = ext4_sync_file,
162 .splice_read = generic_file_splice_read, 196 .splice_read = generic_file_splice_read,
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 5afe4370840b..83cf6415f599 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -28,10 +28,12 @@
28#include <linux/writeback.h> 28#include <linux/writeback.h>
29#include <linux/jbd2.h> 29#include <linux/jbd2.h>
30#include <linux/blkdev.h> 30#include <linux/blkdev.h>
31#include <linux/marker.h> 31
32#include "ext4.h" 32#include "ext4.h"
33#include "ext4_jbd2.h" 33#include "ext4_jbd2.h"
34 34
35#include <trace/events/ext4.h>
36
35/* 37/*
36 * akpm: A new design for ext4_sync_file(). 38 * akpm: A new design for ext4_sync_file().
37 * 39 *
@@ -52,9 +54,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
52 54
53 J_ASSERT(ext4_journal_current_handle() == NULL); 55 J_ASSERT(ext4_journal_current_handle() == NULL);
54 56
55 trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld", 57 trace_ext4_sync_file(file, dentry, datasync);
56 inode->i_sb->s_id, datasync, inode->i_ino,
57 dentry->d_parent->d_inode->i_ino);
58 58
59 /* 59 /*
60 * data=writeback: 60 * data=writeback:
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 3743bd849bce..2f645732e3b7 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -23,11 +23,14 @@
23#include <linux/bitops.h> 23#include <linux/bitops.h>
24#include <linux/blkdev.h> 24#include <linux/blkdev.h>
25#include <asm/byteorder.h> 25#include <asm/byteorder.h>
26
26#include "ext4.h" 27#include "ext4.h"
27#include "ext4_jbd2.h" 28#include "ext4_jbd2.h"
28#include "xattr.h" 29#include "xattr.h"
29#include "acl.h" 30#include "acl.h"
30 31
32#include <trace/events/ext4.h>
33
31/* 34/*
32 * ialloc.c contains the inodes allocation and deallocation routines 35 * ialloc.c contains the inodes allocation and deallocation routines
33 */ 36 */
@@ -208,11 +211,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
208 211
209 ino = inode->i_ino; 212 ino = inode->i_ino;
210 ext4_debug("freeing inode %lu\n", ino); 213 ext4_debug("freeing inode %lu\n", ino);
211 trace_mark(ext4_free_inode, 214 trace_ext4_free_inode(inode);
212 "dev %s ino %lu mode %d uid %lu gid %lu bocks %llu",
213 sb->s_id, inode->i_ino, inode->i_mode,
214 (unsigned long) inode->i_uid, (unsigned long) inode->i_gid,
215 (unsigned long long) inode->i_blocks);
216 215
217 /* 216 /*
218 * Note: we must free any quota before locking the superblock, 217 * Note: we must free any quota before locking the superblock,
@@ -471,7 +470,8 @@ void get_orlov_stats(struct super_block *sb, ext4_group_t g,
471 */ 470 */
472 471
473static int find_group_orlov(struct super_block *sb, struct inode *parent, 472static int find_group_orlov(struct super_block *sb, struct inode *parent,
474 ext4_group_t *group, int mode) 473 ext4_group_t *group, int mode,
474 const struct qstr *qstr)
475{ 475{
476 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 476 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
477 struct ext4_sb_info *sbi = EXT4_SB(sb); 477 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -486,6 +486,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
486 struct ext4_group_desc *desc; 486 struct ext4_group_desc *desc;
487 struct orlov_stats stats; 487 struct orlov_stats stats;
488 int flex_size = ext4_flex_bg_size(sbi); 488 int flex_size = ext4_flex_bg_size(sbi);
489 struct dx_hash_info hinfo;
489 490
490 ngroups = real_ngroups; 491 ngroups = real_ngroups;
491 if (flex_size > 1) { 492 if (flex_size > 1) {
@@ -507,7 +508,13 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
507 int best_ndir = inodes_per_group; 508 int best_ndir = inodes_per_group;
508 int ret = -1; 509 int ret = -1;
509 510
510 get_random_bytes(&grp, sizeof(grp)); 511 if (qstr) {
512 hinfo.hash_version = DX_HASH_HALF_MD4;
513 hinfo.seed = sbi->s_hash_seed;
514 ext4fs_dirhash(qstr->name, qstr->len, &hinfo);
515 grp = hinfo.hash;
516 } else
517 get_random_bytes(&grp, sizeof(grp));
511 parent_group = (unsigned)grp % ngroups; 518 parent_group = (unsigned)grp % ngroups;
512 for (i = 0; i < ngroups; i++) { 519 for (i = 0; i < ngroups; i++) {
513 g = (parent_group + i) % ngroups; 520 g = (parent_group + i) % ngroups;
@@ -650,7 +657,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
650 *group = parent_group + flex_size; 657 *group = parent_group + flex_size;
651 if (*group > ngroups) 658 if (*group > ngroups)
652 *group = 0; 659 *group = 0;
653 return find_group_orlov(sb, parent, group, mode); 660 return find_group_orlov(sb, parent, group, mode, 0);
654 } 661 }
655 662
656 /* 663 /*
@@ -791,7 +798,8 @@ err_ret:
791 * For other inodes, search forward from the parent directory's block 798 * For other inodes, search forward from the parent directory's block
792 * group to find a free inode. 799 * group to find a free inode.
793 */ 800 */
794struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) 801struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
802 const struct qstr *qstr, __u32 goal)
795{ 803{
796 struct super_block *sb; 804 struct super_block *sb;
797 struct buffer_head *inode_bitmap_bh = NULL; 805 struct buffer_head *inode_bitmap_bh = NULL;
@@ -815,14 +823,23 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
815 823
816 sb = dir->i_sb; 824 sb = dir->i_sb;
817 ngroups = ext4_get_groups_count(sb); 825 ngroups = ext4_get_groups_count(sb);
818 trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id, 826 trace_ext4_request_inode(dir, mode);
819 dir->i_ino, mode);
820 inode = new_inode(sb); 827 inode = new_inode(sb);
821 if (!inode) 828 if (!inode)
822 return ERR_PTR(-ENOMEM); 829 return ERR_PTR(-ENOMEM);
823 ei = EXT4_I(inode); 830 ei = EXT4_I(inode);
824 sbi = EXT4_SB(sb); 831 sbi = EXT4_SB(sb);
825 832
833 if (!goal)
834 goal = sbi->s_inode_goal;
835
836 if (goal && goal < le32_to_cpu(sbi->s_es->s_inodes_count)) {
837 group = (goal - 1) / EXT4_INODES_PER_GROUP(sb);
838 ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb);
839 ret2 = 0;
840 goto got_group;
841 }
842
826 if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { 843 if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
827 ret2 = find_group_flex(sb, dir, &group); 844 ret2 = find_group_flex(sb, dir, &group);
828 if (ret2 == -1) { 845 if (ret2 == -1) {
@@ -841,7 +858,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
841 if (test_opt(sb, OLDALLOC)) 858 if (test_opt(sb, OLDALLOC))
842 ret2 = find_group_dir(sb, dir, &group); 859 ret2 = find_group_dir(sb, dir, &group);
843 else 860 else
844 ret2 = find_group_orlov(sb, dir, &group, mode); 861 ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
845 } else 862 } else
846 ret2 = find_group_other(sb, dir, &group, mode); 863 ret2 = find_group_other(sb, dir, &group, mode);
847 864
@@ -851,7 +868,7 @@ got_group:
851 if (ret2 == -1) 868 if (ret2 == -1)
852 goto out; 869 goto out;
853 870
854 for (i = 0; i < ngroups; i++) { 871 for (i = 0; i < ngroups; i++, ino = 0) {
855 err = -EIO; 872 err = -EIO;
856 873
857 gdp = ext4_get_group_desc(sb, group, &group_desc_bh); 874 gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
@@ -863,8 +880,6 @@ got_group:
863 if (!inode_bitmap_bh) 880 if (!inode_bitmap_bh)
864 goto fail; 881 goto fail;
865 882
866 ino = 0;
867
868repeat_in_this_group: 883repeat_in_this_group:
869 ino = ext4_find_next_zero_bit((unsigned long *) 884 ino = ext4_find_next_zero_bit((unsigned long *)
870 inode_bitmap_bh->b_data, 885 inode_bitmap_bh->b_data,
@@ -1047,8 +1062,7 @@ got:
1047 } 1062 }
1048 1063
1049 ext4_debug("allocating inode %lu\n", inode->i_ino); 1064 ext4_debug("allocating inode %lu\n", inode->i_ino);
1050 trace_mark(ext4_allocate_inode, "dev %s ino %lu dir %lu mode %d", 1065 trace_ext4_allocate_inode(inode, dir, mode);
1051 sb->s_id, inode->i_ino, dir->i_ino, mode);
1052 goto really_out; 1066 goto really_out;
1053fail: 1067fail:
1054 ext4_std_error(sb, err); 1068 ext4_std_error(sb, err);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 875db944b22f..7c17ae275af4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -37,11 +37,14 @@
37#include <linux/namei.h> 37#include <linux/namei.h>
38#include <linux/uio.h> 38#include <linux/uio.h>
39#include <linux/bio.h> 39#include <linux/bio.h>
40
40#include "ext4_jbd2.h" 41#include "ext4_jbd2.h"
41#include "xattr.h" 42#include "xattr.h"
42#include "acl.h" 43#include "acl.h"
43#include "ext4_extents.h" 44#include "ext4_extents.h"
44 45
46#include <trace/events/ext4.h>
47
45#define MPAGE_DA_EXTENT_TAIL 0x01 48#define MPAGE_DA_EXTENT_TAIL 0x01
46 49
47static inline int ext4_begin_ordered_truncate(struct inode *inode, 50static inline int ext4_begin_ordered_truncate(struct inode *inode,
@@ -78,7 +81,7 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
78 * If the handle isn't valid we're not journaling so there's nothing to do. 81 * If the handle isn't valid we're not journaling so there's nothing to do.
79 */ 82 */
80int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, 83int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
81 struct buffer_head *bh, ext4_fsblk_t blocknr) 84 struct buffer_head *bh, ext4_fsblk_t blocknr)
82{ 85{
83 int err; 86 int err;
84 87
@@ -90,7 +93,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
90 BUFFER_TRACE(bh, "enter"); 93 BUFFER_TRACE(bh, "enter");
91 94
92 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " 95 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
93 "data mode %lx\n", 96 "data mode %x\n",
94 bh, is_metadata, inode->i_mode, 97 bh, is_metadata, inode->i_mode,
95 test_opt(inode->i_sb, DATA_FLAGS)); 98 test_opt(inode->i_sb, DATA_FLAGS));
96 99
@@ -329,8 +332,8 @@ static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
329 */ 332 */
330 333
331static int ext4_block_to_path(struct inode *inode, 334static int ext4_block_to_path(struct inode *inode,
332 ext4_lblk_t i_block, 335 ext4_lblk_t i_block,
333 ext4_lblk_t offsets[4], int *boundary) 336 ext4_lblk_t offsets[4], int *boundary)
334{ 337{
335 int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); 338 int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb);
336 int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); 339 int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb);
@@ -362,9 +365,9 @@ static int ext4_block_to_path(struct inode *inode,
362 final = ptrs; 365 final = ptrs;
363 } else { 366 } else {
364 ext4_warning(inode->i_sb, "ext4_block_to_path", 367 ext4_warning(inode->i_sb, "ext4_block_to_path",
365 "block %lu > max in inode %lu", 368 "block %lu > max in inode %lu",
366 i_block + direct_blocks + 369 i_block + direct_blocks +
367 indirect_blocks + double_blocks, inode->i_ino); 370 indirect_blocks + double_blocks, inode->i_ino);
368 } 371 }
369 if (boundary) 372 if (boundary)
370 *boundary = final - 1 - (i_block & (ptrs - 1)); 373 *boundary = final - 1 - (i_block & (ptrs - 1));
@@ -379,25 +382,25 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
379 382
380 while (bref < p+max) { 383 while (bref < p+max) {
381 blk = le32_to_cpu(*bref++); 384 blk = le32_to_cpu(*bref++);
382 if (blk && 385 if (blk &&
383 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), 386 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
384 blk, 1))) { 387 blk, 1))) {
385 ext4_error(inode->i_sb, function, 388 ext4_error(inode->i_sb, function,
386 "invalid block reference %u " 389 "invalid block reference %u "
387 "in inode #%lu", blk, inode->i_ino); 390 "in inode #%lu", blk, inode->i_ino);
388 return -EIO; 391 return -EIO;
389 } 392 }
390 } 393 }
391 return 0; 394 return 0;
392} 395}
393 396
394 397
395#define ext4_check_indirect_blockref(inode, bh) \ 398#define ext4_check_indirect_blockref(inode, bh) \
396 __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \ 399 __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \
397 EXT4_ADDR_PER_BLOCK((inode)->i_sb)) 400 EXT4_ADDR_PER_BLOCK((inode)->i_sb))
398 401
399#define ext4_check_inode_blockref(inode) \ 402#define ext4_check_inode_blockref(inode) \
400 __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \ 403 __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \
401 EXT4_NDIR_BLOCKS) 404 EXT4_NDIR_BLOCKS)
402 405
403/** 406/**
@@ -447,7 +450,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
447 bh = sb_getblk(sb, le32_to_cpu(p->key)); 450 bh = sb_getblk(sb, le32_to_cpu(p->key));
448 if (unlikely(!bh)) 451 if (unlikely(!bh))
449 goto failure; 452 goto failure;
450 453
451 if (!bh_uptodate_or_lock(bh)) { 454 if (!bh_uptodate_or_lock(bh)) {
452 if (bh_submit_read(bh) < 0) { 455 if (bh_submit_read(bh) < 0) {
453 put_bh(bh); 456 put_bh(bh);
@@ -459,7 +462,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
459 goto failure; 462 goto failure;
460 } 463 }
461 } 464 }
462 465
463 add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); 466 add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
464 /* Reader: end */ 467 /* Reader: end */
465 if (!p->key) 468 if (!p->key)
@@ -552,7 +555,7 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
552 * returns it. 555 * returns it.
553 */ 556 */
554static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, 557static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
555 Indirect *partial) 558 Indirect *partial)
556{ 559{
557 /* 560 /*
558 * XXX need to get goal block from mballoc's data structures 561 * XXX need to get goal block from mballoc's data structures
@@ -574,7 +577,7 @@ static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
574 * direct and indirect blocks. 577 * direct and indirect blocks.
575 */ 578 */
576static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, 579static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
577 int blocks_to_boundary) 580 int blocks_to_boundary)
578{ 581{
579 unsigned int count = 0; 582 unsigned int count = 0;
580 583
@@ -610,9 +613,9 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
610 * direct blocks 613 * direct blocks
611 */ 614 */
612static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, 615static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
613 ext4_lblk_t iblock, ext4_fsblk_t goal, 616 ext4_lblk_t iblock, ext4_fsblk_t goal,
614 int indirect_blks, int blks, 617 int indirect_blks, int blks,
615 ext4_fsblk_t new_blocks[4], int *err) 618 ext4_fsblk_t new_blocks[4], int *err)
616{ 619{
617 struct ext4_allocation_request ar; 620 struct ext4_allocation_request ar;
618 int target, i; 621 int target, i;
@@ -683,10 +686,10 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
683 } 686 }
684 if (!*err) { 687 if (!*err) {
685 if (target == blks) { 688 if (target == blks) {
686 /* 689 /*
687 * save the new block number 690 * save the new block number
688 * for the first direct block 691 * for the first direct block
689 */ 692 */
690 new_blocks[index] = current_block; 693 new_blocks[index] = current_block;
691 } 694 }
692 blk_allocated += ar.len; 695 blk_allocated += ar.len;
@@ -728,9 +731,9 @@ failed_out:
728 * as described above and return 0. 731 * as described above and return 0.
729 */ 732 */
730static int ext4_alloc_branch(handle_t *handle, struct inode *inode, 733static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
731 ext4_lblk_t iblock, int indirect_blks, 734 ext4_lblk_t iblock, int indirect_blks,
732 int *blks, ext4_fsblk_t goal, 735 int *blks, ext4_fsblk_t goal,
733 ext4_lblk_t *offsets, Indirect *branch) 736 ext4_lblk_t *offsets, Indirect *branch)
734{ 737{
735 int blocksize = inode->i_sb->s_blocksize; 738 int blocksize = inode->i_sb->s_blocksize;
736 int i, n = 0; 739 int i, n = 0;
@@ -777,7 +780,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
777 * the chain to point to the new allocated 780 * the chain to point to the new allocated
778 * data blocks numbers 781 * data blocks numbers
779 */ 782 */
780 for (i=1; i < num; i++) 783 for (i = 1; i < num; i++)
781 *(branch[n].p + i) = cpu_to_le32(++current_block); 784 *(branch[n].p + i) = cpu_to_le32(++current_block);
782 } 785 }
783 BUFFER_TRACE(bh, "marking uptodate"); 786 BUFFER_TRACE(bh, "marking uptodate");
@@ -820,7 +823,8 @@ failed:
820 * chain to new block and return 0. 823 * chain to new block and return 0.
821 */ 824 */
822static int ext4_splice_branch(handle_t *handle, struct inode *inode, 825static int ext4_splice_branch(handle_t *handle, struct inode *inode,
823 ext4_lblk_t block, Indirect *where, int num, int blks) 826 ext4_lblk_t block, Indirect *where, int num,
827 int blks)
824{ 828{
825 int i; 829 int i;
826 int err = 0; 830 int err = 0;
@@ -852,10 +856,6 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
852 } 856 }
853 857
854 /* We are done with atomic stuff, now do the rest of housekeeping */ 858 /* We are done with atomic stuff, now do the rest of housekeeping */
855
856 inode->i_ctime = ext4_current_time(inode);
857 ext4_mark_inode_dirty(handle, inode);
858
859 /* had we spliced it onto indirect block? */ 859 /* had we spliced it onto indirect block? */
860 if (where->bh) { 860 if (where->bh) {
861 /* 861 /*
@@ -874,8 +874,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
874 } else { 874 } else {
875 /* 875 /*
876 * OK, we spliced it into the inode itself on a direct block. 876 * OK, we spliced it into the inode itself on a direct block.
877 * Inode was dirtied above.
878 */ 877 */
878 ext4_mark_inode_dirty(handle, inode);
879 jbd_debug(5, "splicing direct\n"); 879 jbd_debug(5, "splicing direct\n");
880 } 880 }
881 return err; 881 return err;
@@ -921,9 +921,9 @@ err_out:
921 * blocks. 921 * blocks.
922 */ 922 */
923static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, 923static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
924 ext4_lblk_t iblock, unsigned int maxblocks, 924 ext4_lblk_t iblock, unsigned int maxblocks,
925 struct buffer_head *bh_result, 925 struct buffer_head *bh_result,
926 int flags) 926 int flags)
927{ 927{
928 int err = -EIO; 928 int err = -EIO;
929 ext4_lblk_t offsets[4]; 929 ext4_lblk_t offsets[4];
@@ -939,7 +939,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
939 J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); 939 J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
940 J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); 940 J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
941 depth = ext4_block_to_path(inode, iblock, offsets, 941 depth = ext4_block_to_path(inode, iblock, offsets,
942 &blocks_to_boundary); 942 &blocks_to_boundary);
943 943
944 if (depth == 0) 944 if (depth == 0)
945 goto out; 945 goto out;
@@ -987,8 +987,8 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
987 * Block out ext4_truncate while we alter the tree 987 * Block out ext4_truncate while we alter the tree
988 */ 988 */
989 err = ext4_alloc_branch(handle, inode, iblock, indirect_blks, 989 err = ext4_alloc_branch(handle, inode, iblock, indirect_blks,
990 &count, goal, 990 &count, goal,
991 offsets + (partial - chain), partial); 991 offsets + (partial - chain), partial);
992 992
993 /* 993 /*
994 * The ext4_splice_branch call will free and forget any buffers 994 * The ext4_splice_branch call will free and forget any buffers
@@ -999,8 +999,8 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
999 */ 999 */
1000 if (!err) 1000 if (!err)
1001 err = ext4_splice_branch(handle, inode, iblock, 1001 err = ext4_splice_branch(handle, inode, iblock,
1002 partial, indirect_blks, count); 1002 partial, indirect_blks, count);
1003 else 1003 else
1004 goto cleanup; 1004 goto cleanup;
1005 1005
1006 set_buffer_new(bh_result); 1006 set_buffer_new(bh_result);
@@ -1172,7 +1172,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1172 up_read((&EXT4_I(inode)->i_data_sem)); 1172 up_read((&EXT4_I(inode)->i_data_sem));
1173 1173
1174 if (retval > 0 && buffer_mapped(bh)) { 1174 if (retval > 0 && buffer_mapped(bh)) {
1175 int ret = check_block_validity(inode, block, 1175 int ret = check_block_validity(inode, block,
1176 bh->b_blocknr, retval); 1176 bh->b_blocknr, retval);
1177 if (ret != 0) 1177 if (ret != 0)
1178 return ret; 1178 return ret;
@@ -1254,7 +1254,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1254 1254
1255 up_write((&EXT4_I(inode)->i_data_sem)); 1255 up_write((&EXT4_I(inode)->i_data_sem));
1256 if (retval > 0 && buffer_mapped(bh)) { 1256 if (retval > 0 && buffer_mapped(bh)) {
1257 int ret = check_block_validity(inode, block, 1257 int ret = check_block_validity(inode, block,
1258 bh->b_blocknr, retval); 1258 bh->b_blocknr, retval);
1259 if (ret != 0) 1259 if (ret != 0)
1260 return ret; 1260 return ret;
@@ -1405,8 +1405,7 @@ static int walk_page_buffers(handle_t *handle,
1405 1405
1406 for (bh = head, block_start = 0; 1406 for (bh = head, block_start = 0;
1407 ret == 0 && (bh != head || !block_start); 1407 ret == 0 && (bh != head || !block_start);
1408 block_start = block_end, bh = next) 1408 block_start = block_end, bh = next) {
1409 {
1410 next = bh->b_this_page; 1409 next = bh->b_this_page;
1411 block_end = block_start + blocksize; 1410 block_end = block_start + blocksize;
1412 if (block_end <= from || block_start >= to) { 1411 if (block_end <= from || block_start >= to) {
@@ -1447,7 +1446,7 @@ static int walk_page_buffers(handle_t *handle,
1447 * write. 1446 * write.
1448 */ 1447 */
1449static int do_journal_get_write_access(handle_t *handle, 1448static int do_journal_get_write_access(handle_t *handle,
1450 struct buffer_head *bh) 1449 struct buffer_head *bh)
1451{ 1450{
1452 if (!buffer_mapped(bh) || buffer_freed(bh)) 1451 if (!buffer_mapped(bh) || buffer_freed(bh))
1453 return 0; 1452 return 0;
@@ -1455,27 +1454,24 @@ static int do_journal_get_write_access(handle_t *handle,
1455} 1454}
1456 1455
1457static int ext4_write_begin(struct file *file, struct address_space *mapping, 1456static int ext4_write_begin(struct file *file, struct address_space *mapping,
1458 loff_t pos, unsigned len, unsigned flags, 1457 loff_t pos, unsigned len, unsigned flags,
1459 struct page **pagep, void **fsdata) 1458 struct page **pagep, void **fsdata)
1460{ 1459{
1461 struct inode *inode = mapping->host; 1460 struct inode *inode = mapping->host;
1462 int ret, needed_blocks; 1461 int ret, needed_blocks;
1463 handle_t *handle; 1462 handle_t *handle;
1464 int retries = 0; 1463 int retries = 0;
1465 struct page *page; 1464 struct page *page;
1466 pgoff_t index; 1465 pgoff_t index;
1467 unsigned from, to; 1466 unsigned from, to;
1468 1467
1469 trace_mark(ext4_write_begin, 1468 trace_ext4_write_begin(inode, pos, len, flags);
1470 "dev %s ino %lu pos %llu len %u flags %u",
1471 inode->i_sb->s_id, inode->i_ino,
1472 (unsigned long long) pos, len, flags);
1473 /* 1469 /*
1474 * Reserve one block more for addition to orphan list in case 1470 * Reserve one block more for addition to orphan list in case
1475 * we allocate blocks but write fails for some reason 1471 * we allocate blocks but write fails for some reason
1476 */ 1472 */
1477 needed_blocks = ext4_writepage_trans_blocks(inode) + 1; 1473 needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
1478 index = pos >> PAGE_CACHE_SHIFT; 1474 index = pos >> PAGE_CACHE_SHIFT;
1479 from = pos & (PAGE_CACHE_SIZE - 1); 1475 from = pos & (PAGE_CACHE_SIZE - 1);
1480 to = from + len; 1476 to = from + len;
1481 1477
@@ -1523,7 +1519,7 @@ retry:
1523 ext4_journal_stop(handle); 1519 ext4_journal_stop(handle);
1524 if (pos + len > inode->i_size) { 1520 if (pos + len > inode->i_size) {
1525 vmtruncate(inode, inode->i_size); 1521 vmtruncate(inode, inode->i_size);
1526 /* 1522 /*
1527 * If vmtruncate failed early the inode might 1523 * If vmtruncate failed early the inode might
1528 * still be on the orphan list; we need to 1524 * still be on the orphan list; we need to
1529 * make sure the inode is removed from the 1525 * make sure the inode is removed from the
@@ -1550,9 +1546,9 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh)
1550} 1546}
1551 1547
1552static int ext4_generic_write_end(struct file *file, 1548static int ext4_generic_write_end(struct file *file,
1553 struct address_space *mapping, 1549 struct address_space *mapping,
1554 loff_t pos, unsigned len, unsigned copied, 1550 loff_t pos, unsigned len, unsigned copied,
1555 struct page *page, void *fsdata) 1551 struct page *page, void *fsdata)
1556{ 1552{
1557 int i_size_changed = 0; 1553 int i_size_changed = 0;
1558 struct inode *inode = mapping->host; 1554 struct inode *inode = mapping->host;
@@ -1603,18 +1599,15 @@ static int ext4_generic_write_end(struct file *file,
1603 * buffers are managed internally. 1599 * buffers are managed internally.
1604 */ 1600 */
1605static int ext4_ordered_write_end(struct file *file, 1601static int ext4_ordered_write_end(struct file *file,
1606 struct address_space *mapping, 1602 struct address_space *mapping,
1607 loff_t pos, unsigned len, unsigned copied, 1603 loff_t pos, unsigned len, unsigned copied,
1608 struct page *page, void *fsdata) 1604 struct page *page, void *fsdata)
1609{ 1605{
1610 handle_t *handle = ext4_journal_current_handle(); 1606 handle_t *handle = ext4_journal_current_handle();
1611 struct inode *inode = mapping->host; 1607 struct inode *inode = mapping->host;
1612 int ret = 0, ret2; 1608 int ret = 0, ret2;
1613 1609
1614 trace_mark(ext4_ordered_write_end, 1610 trace_ext4_ordered_write_end(inode, pos, len, copied);
1615 "dev %s ino %lu pos %llu len %u copied %u",
1616 inode->i_sb->s_id, inode->i_ino,
1617 (unsigned long long) pos, len, copied);
1618 ret = ext4_jbd2_file_inode(handle, inode); 1611 ret = ext4_jbd2_file_inode(handle, inode);
1619 1612
1620 if (ret == 0) { 1613 if (ret == 0) {
@@ -1636,7 +1629,7 @@ static int ext4_ordered_write_end(struct file *file,
1636 1629
1637 if (pos + len > inode->i_size) { 1630 if (pos + len > inode->i_size) {
1638 vmtruncate(inode, inode->i_size); 1631 vmtruncate(inode, inode->i_size);
1639 /* 1632 /*
1640 * If vmtruncate failed early the inode might still be 1633 * If vmtruncate failed early the inode might still be
1641 * on the orphan list; we need to make sure the inode 1634 * on the orphan list; we need to make sure the inode
1642 * is removed from the orphan list in that case. 1635 * is removed from the orphan list in that case.
@@ -1650,18 +1643,15 @@ static int ext4_ordered_write_end(struct file *file,
1650} 1643}
1651 1644
1652static int ext4_writeback_write_end(struct file *file, 1645static int ext4_writeback_write_end(struct file *file,
1653 struct address_space *mapping, 1646 struct address_space *mapping,
1654 loff_t pos, unsigned len, unsigned copied, 1647 loff_t pos, unsigned len, unsigned copied,
1655 struct page *page, void *fsdata) 1648 struct page *page, void *fsdata)
1656{ 1649{
1657 handle_t *handle = ext4_journal_current_handle(); 1650 handle_t *handle = ext4_journal_current_handle();
1658 struct inode *inode = mapping->host; 1651 struct inode *inode = mapping->host;
1659 int ret = 0, ret2; 1652 int ret = 0, ret2;
1660 1653
1661 trace_mark(ext4_writeback_write_end, 1654 trace_ext4_writeback_write_end(inode, pos, len, copied);
1662 "dev %s ino %lu pos %llu len %u copied %u",
1663 inode->i_sb->s_id, inode->i_ino,
1664 (unsigned long long) pos, len, copied);
1665 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, 1655 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1666 page, fsdata); 1656 page, fsdata);
1667 copied = ret2; 1657 copied = ret2;
@@ -1681,7 +1671,7 @@ static int ext4_writeback_write_end(struct file *file,
1681 1671
1682 if (pos + len > inode->i_size) { 1672 if (pos + len > inode->i_size) {
1683 vmtruncate(inode, inode->i_size); 1673 vmtruncate(inode, inode->i_size);
1684 /* 1674 /*
1685 * If vmtruncate failed early the inode might still be 1675 * If vmtruncate failed early the inode might still be
1686 * on the orphan list; we need to make sure the inode 1676 * on the orphan list; we need to make sure the inode
1687 * is removed from the orphan list in that case. 1677 * is removed from the orphan list in that case.
@@ -1694,9 +1684,9 @@ static int ext4_writeback_write_end(struct file *file,
1694} 1684}
1695 1685
1696static int ext4_journalled_write_end(struct file *file, 1686static int ext4_journalled_write_end(struct file *file,
1697 struct address_space *mapping, 1687 struct address_space *mapping,
1698 loff_t pos, unsigned len, unsigned copied, 1688 loff_t pos, unsigned len, unsigned copied,
1699 struct page *page, void *fsdata) 1689 struct page *page, void *fsdata)
1700{ 1690{
1701 handle_t *handle = ext4_journal_current_handle(); 1691 handle_t *handle = ext4_journal_current_handle();
1702 struct inode *inode = mapping->host; 1692 struct inode *inode = mapping->host;
@@ -1705,10 +1695,7 @@ static int ext4_journalled_write_end(struct file *file,
1705 unsigned from, to; 1695 unsigned from, to;
1706 loff_t new_i_size; 1696 loff_t new_i_size;
1707 1697
1708 trace_mark(ext4_journalled_write_end, 1698 trace_ext4_journalled_write_end(inode, pos, len, copied);
1709 "dev %s ino %lu pos %llu len %u copied %u",
1710 inode->i_sb->s_id, inode->i_ino,
1711 (unsigned long long) pos, len, copied);
1712 from = pos & (PAGE_CACHE_SIZE - 1); 1699 from = pos & (PAGE_CACHE_SIZE - 1);
1713 to = from + len; 1700 to = from + len;
1714 1701
@@ -1747,7 +1734,7 @@ static int ext4_journalled_write_end(struct file *file,
1747 ret = ret2; 1734 ret = ret2;
1748 if (pos + len > inode->i_size) { 1735 if (pos + len > inode->i_size) {
1749 vmtruncate(inode, inode->i_size); 1736 vmtruncate(inode, inode->i_size);
1750 /* 1737 /*
1751 * If vmtruncate failed early the inode might still be 1738 * If vmtruncate failed early the inode might still be
1752 * on the orphan list; we need to make sure the inode 1739 * on the orphan list; we need to make sure the inode
1753 * is removed from the orphan list in that case. 1740 * is removed from the orphan list in that case.
@@ -1854,7 +1841,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
1854} 1841}
1855 1842
1856static void ext4_da_page_release_reservation(struct page *page, 1843static void ext4_da_page_release_reservation(struct page *page,
1857 unsigned long offset) 1844 unsigned long offset)
1858{ 1845{
1859 int to_release = 0; 1846 int to_release = 0;
1860 struct buffer_head *head, *bh; 1847 struct buffer_head *head, *bh;
@@ -2554,9 +2541,7 @@ static int ext4_da_writepage(struct page *page,
2554 struct buffer_head *page_bufs; 2541 struct buffer_head *page_bufs;
2555 struct inode *inode = page->mapping->host; 2542 struct inode *inode = page->mapping->host;
2556 2543
2557 trace_mark(ext4_da_writepage, 2544 trace_ext4_da_writepage(inode, page);
2558 "dev %s ino %lu page_index %lu",
2559 inode->i_sb->s_id, inode->i_ino, page->index);
2560 size = i_size_read(inode); 2545 size = i_size_read(inode);
2561 if (page->index == size >> PAGE_CACHE_SHIFT) 2546 if (page->index == size >> PAGE_CACHE_SHIFT)
2562 len = size & ~PAGE_CACHE_MASK; 2547 len = size & ~PAGE_CACHE_MASK;
@@ -2667,19 +2652,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2667 int needed_blocks, ret = 0, nr_to_writebump = 0; 2652 int needed_blocks, ret = 0, nr_to_writebump = 0;
2668 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2653 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2669 2654
2670 trace_mark(ext4_da_writepages, 2655 trace_ext4_da_writepages(inode, wbc);
2671 "dev %s ino %lu nr_t_write %ld "
2672 "pages_skipped %ld range_start %llu "
2673 "range_end %llu nonblocking %d "
2674 "for_kupdate %d for_reclaim %d "
2675 "for_writepages %d range_cyclic %d",
2676 inode->i_sb->s_id, inode->i_ino,
2677 wbc->nr_to_write, wbc->pages_skipped,
2678 (unsigned long long) wbc->range_start,
2679 (unsigned long long) wbc->range_end,
2680 wbc->nonblocking, wbc->for_kupdate,
2681 wbc->for_reclaim, wbc->for_writepages,
2682 wbc->range_cyclic);
2683 2656
2684 /* 2657 /*
2685 * No pages to write? This is mainly a kludge to avoid starting 2658 * No pages to write? This is mainly a kludge to avoid starting
@@ -2693,13 +2666,13 @@ static int ext4_da_writepages(struct address_space *mapping,
2693 * If the filesystem has aborted, it is read-only, so return 2666 * If the filesystem has aborted, it is read-only, so return
2694 * right away instead of dumping stack traces later on that 2667 * right away instead of dumping stack traces later on that
2695 * will obscure the real source of the problem. We test 2668 * will obscure the real source of the problem. We test
2696 * EXT4_MOUNT_ABORT instead of sb->s_flag's MS_RDONLY because 2669 * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because
2697 * the latter could be true if the filesystem is mounted 2670 * the latter could be true if the filesystem is mounted
2698 * read-only, and in that case, ext4_da_writepages should 2671 * read-only, and in that case, ext4_da_writepages should
2699 * *never* be called, so if that ever happens, we would want 2672 * *never* be called, so if that ever happens, we would want
2700 * the stack trace. 2673 * the stack trace.
2701 */ 2674 */
2702 if (unlikely(sbi->s_mount_opt & EXT4_MOUNT_ABORT)) 2675 if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
2703 return -EROFS; 2676 return -EROFS;
2704 2677
2705 /* 2678 /*
@@ -2845,14 +2818,7 @@ out_writepages:
2845 if (!no_nrwrite_index_update) 2818 if (!no_nrwrite_index_update)
2846 wbc->no_nrwrite_index_update = 0; 2819 wbc->no_nrwrite_index_update = 0;
2847 wbc->nr_to_write -= nr_to_writebump; 2820 wbc->nr_to_write -= nr_to_writebump;
2848 trace_mark(ext4_da_writepage_result, 2821 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
2849 "dev %s ino %lu ret %d pages_written %d "
2850 "pages_skipped %ld congestion %d "
2851 "more_io %d no_nrwrite_index_update %d",
2852 inode->i_sb->s_id, inode->i_ino, ret,
2853 pages_written, wbc->pages_skipped,
2854 wbc->encountered_congestion, wbc->more_io,
2855 wbc->no_nrwrite_index_update);
2856 return ret; 2822 return ret;
2857} 2823}
2858 2824
@@ -2884,8 +2850,8 @@ static int ext4_nonda_switch(struct super_block *sb)
2884} 2850}
2885 2851
2886static int ext4_da_write_begin(struct file *file, struct address_space *mapping, 2852static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2887 loff_t pos, unsigned len, unsigned flags, 2853 loff_t pos, unsigned len, unsigned flags,
2888 struct page **pagep, void **fsdata) 2854 struct page **pagep, void **fsdata)
2889{ 2855{
2890 int ret, retries = 0; 2856 int ret, retries = 0;
2891 struct page *page; 2857 struct page *page;
@@ -2904,11 +2870,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2904 len, flags, pagep, fsdata); 2870 len, flags, pagep, fsdata);
2905 } 2871 }
2906 *fsdata = (void *)0; 2872 *fsdata = (void *)0;
2907 2873 trace_ext4_da_write_begin(inode, pos, len, flags);
2908 trace_mark(ext4_da_write_begin,
2909 "dev %s ino %lu pos %llu len %u flags %u",
2910 inode->i_sb->s_id, inode->i_ino,
2911 (unsigned long long) pos, len, flags);
2912retry: 2874retry:
2913 /* 2875 /*
2914 * With delayed allocation, we don't log the i_disksize update 2876 * With delayed allocation, we don't log the i_disksize update
@@ -2959,7 +2921,7 @@ out:
2959 * when write to the end of file but not require block allocation 2921 * when write to the end of file but not require block allocation
2960 */ 2922 */
2961static int ext4_da_should_update_i_disksize(struct page *page, 2923static int ext4_da_should_update_i_disksize(struct page *page,
2962 unsigned long offset) 2924 unsigned long offset)
2963{ 2925{
2964 struct buffer_head *bh; 2926 struct buffer_head *bh;
2965 struct inode *inode = page->mapping->host; 2927 struct inode *inode = page->mapping->host;
@@ -2978,9 +2940,9 @@ static int ext4_da_should_update_i_disksize(struct page *page,
2978} 2940}
2979 2941
2980static int ext4_da_write_end(struct file *file, 2942static int ext4_da_write_end(struct file *file,
2981 struct address_space *mapping, 2943 struct address_space *mapping,
2982 loff_t pos, unsigned len, unsigned copied, 2944 loff_t pos, unsigned len, unsigned copied,
2983 struct page *page, void *fsdata) 2945 struct page *page, void *fsdata)
2984{ 2946{
2985 struct inode *inode = mapping->host; 2947 struct inode *inode = mapping->host;
2986 int ret = 0, ret2; 2948 int ret = 0, ret2;
@@ -3001,10 +2963,7 @@ static int ext4_da_write_end(struct file *file,
3001 } 2963 }
3002 } 2964 }
3003 2965
3004 trace_mark(ext4_da_write_end, 2966 trace_ext4_da_write_end(inode, pos, len, copied);
3005 "dev %s ino %lu pos %llu len %u copied %u",
3006 inode->i_sb->s_id, inode->i_ino,
3007 (unsigned long long) pos, len, copied);
3008 start = pos & (PAGE_CACHE_SIZE - 1); 2967 start = pos & (PAGE_CACHE_SIZE - 1);
3009 end = start + copied - 1; 2968 end = start + copied - 1;
3010 2969
@@ -3081,7 +3040,7 @@ int ext4_alloc_da_blocks(struct inode *inode)
3081 * not strictly speaking necessary (and for users of 3040 * not strictly speaking necessary (and for users of
3082 * laptop_mode, not even desirable). However, to do otherwise 3041 * laptop_mode, not even desirable). However, to do otherwise
3083 * would require replicating code paths in: 3042 * would require replicating code paths in:
3084 * 3043 *
3085 * ext4_da_writepages() -> 3044 * ext4_da_writepages() ->
3086 * write_cache_pages() ---> (via passed in callback function) 3045 * write_cache_pages() ---> (via passed in callback function)
3087 * __mpage_da_writepage() --> 3046 * __mpage_da_writepage() -->
@@ -3101,7 +3060,7 @@ int ext4_alloc_da_blocks(struct inode *inode)
3101 * write out the pages, but rather only collect contiguous 3060 * write out the pages, but rather only collect contiguous
3102 * logical block extents, call the multi-block allocator, and 3061 * logical block extents, call the multi-block allocator, and
3103 * then update the buffer heads with the block allocations. 3062 * then update the buffer heads with the block allocations.
3104 * 3063 *
3105 * For now, though, we'll cheat by calling filemap_flush(), 3064 * For now, though, we'll cheat by calling filemap_flush(),
3106 * which will map the blocks, and start the I/O, but not 3065 * which will map the blocks, and start the I/O, but not
3107 * actually wait for the I/O to complete. 3066 * actually wait for the I/O to complete.
@@ -3237,7 +3196,7 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
3237 * 3196 *
3238 */ 3197 */
3239static int __ext4_normal_writepage(struct page *page, 3198static int __ext4_normal_writepage(struct page *page,
3240 struct writeback_control *wbc) 3199 struct writeback_control *wbc)
3241{ 3200{
3242 struct inode *inode = page->mapping->host; 3201 struct inode *inode = page->mapping->host;
3243 3202
@@ -3249,15 +3208,13 @@ static int __ext4_normal_writepage(struct page *page,
3249} 3208}
3250 3209
3251static int ext4_normal_writepage(struct page *page, 3210static int ext4_normal_writepage(struct page *page,
3252 struct writeback_control *wbc) 3211 struct writeback_control *wbc)
3253{ 3212{
3254 struct inode *inode = page->mapping->host; 3213 struct inode *inode = page->mapping->host;
3255 loff_t size = i_size_read(inode); 3214 loff_t size = i_size_read(inode);
3256 loff_t len; 3215 loff_t len;
3257 3216
3258 trace_mark(ext4_normal_writepage, 3217 trace_ext4_normal_writepage(inode, page);
3259 "dev %s ino %lu page_index %lu",
3260 inode->i_sb->s_id, inode->i_ino, page->index);
3261 J_ASSERT(PageLocked(page)); 3218 J_ASSERT(PageLocked(page));
3262 if (page->index == size >> PAGE_CACHE_SHIFT) 3219 if (page->index == size >> PAGE_CACHE_SHIFT)
3263 len = size & ~PAGE_CACHE_MASK; 3220 len = size & ~PAGE_CACHE_MASK;
@@ -3287,7 +3244,7 @@ static int ext4_normal_writepage(struct page *page,
3287} 3244}
3288 3245
3289static int __ext4_journalled_writepage(struct page *page, 3246static int __ext4_journalled_writepage(struct page *page,
3290 struct writeback_control *wbc) 3247 struct writeback_control *wbc)
3291{ 3248{
3292 struct address_space *mapping = page->mapping; 3249 struct address_space *mapping = page->mapping;
3293 struct inode *inode = mapping->host; 3250 struct inode *inode = mapping->host;
@@ -3337,15 +3294,13 @@ out:
3337} 3294}
3338 3295
3339static int ext4_journalled_writepage(struct page *page, 3296static int ext4_journalled_writepage(struct page *page,
3340 struct writeback_control *wbc) 3297 struct writeback_control *wbc)
3341{ 3298{
3342 struct inode *inode = page->mapping->host; 3299 struct inode *inode = page->mapping->host;
3343 loff_t size = i_size_read(inode); 3300 loff_t size = i_size_read(inode);
3344 loff_t len; 3301 loff_t len;
3345 3302
3346 trace_mark(ext4_journalled_writepage, 3303 trace_ext4_journalled_writepage(inode, page);
3347 "dev %s ino %lu page_index %lu",
3348 inode->i_sb->s_id, inode->i_ino, page->index);
3349 J_ASSERT(PageLocked(page)); 3304 J_ASSERT(PageLocked(page));
3350 if (page->index == size >> PAGE_CACHE_SHIFT) 3305 if (page->index == size >> PAGE_CACHE_SHIFT)
3351 len = size & ~PAGE_CACHE_MASK; 3306 len = size & ~PAGE_CACHE_MASK;
@@ -3442,8 +3397,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
3442 * VFS code falls back into buffered path in that case so we are safe. 3397 * VFS code falls back into buffered path in that case so we are safe.
3443 */ 3398 */
3444static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, 3399static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
3445 const struct iovec *iov, loff_t offset, 3400 const struct iovec *iov, loff_t offset,
3446 unsigned long nr_segs) 3401 unsigned long nr_segs)
3447{ 3402{
3448 struct file *file = iocb->ki_filp; 3403 struct file *file = iocb->ki_filp;
3449 struct inode *inode = file->f_mapping->host; 3404 struct inode *inode = file->f_mapping->host;
@@ -3763,7 +3718,8 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
3763 * (no partially truncated stuff there). */ 3718 * (no partially truncated stuff there). */
3764 3719
3765static Indirect *ext4_find_shared(struct inode *inode, int depth, 3720static Indirect *ext4_find_shared(struct inode *inode, int depth,
3766 ext4_lblk_t offsets[4], Indirect chain[4], __le32 *top) 3721 ext4_lblk_t offsets[4], Indirect chain[4],
3722 __le32 *top)
3767{ 3723{
3768 Indirect *partial, *p; 3724 Indirect *partial, *p;
3769 int k, err; 3725 int k, err;
@@ -3819,8 +3775,10 @@ no_top:
3819 * than `count' because there can be holes in there. 3775 * than `count' because there can be holes in there.
3820 */ 3776 */
3821static void ext4_clear_blocks(handle_t *handle, struct inode *inode, 3777static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
3822 struct buffer_head *bh, ext4_fsblk_t block_to_free, 3778 struct buffer_head *bh,
3823 unsigned long count, __le32 *first, __le32 *last) 3779 ext4_fsblk_t block_to_free,
3780 unsigned long count, __le32 *first,
3781 __le32 *last)
3824{ 3782{
3825 __le32 *p; 3783 __le32 *p;
3826 if (try_to_extend_transaction(handle, inode)) { 3784 if (try_to_extend_transaction(handle, inode)) {
@@ -3837,10 +3795,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
3837 } 3795 }
3838 3796
3839 /* 3797 /*
3840 * Any buffers which are on the journal will be in memory. We find 3798 * Any buffers which are on the journal will be in memory. We
3841 * them on the hash table so jbd2_journal_revoke() will run jbd2_journal_forget() 3799 * find them on the hash table so jbd2_journal_revoke() will
3842 * on them. We've already detached each block from the file, so 3800 * run jbd2_journal_forget() on them. We've already detached
3843 * bforget() in jbd2_journal_forget() should be safe. 3801 * each block from the file, so bforget() in
3802 * jbd2_journal_forget() should be safe.
3844 * 3803 *
3845 * AKPM: turn on bforget in jbd2_journal_forget()!!! 3804 * AKPM: turn on bforget in jbd2_journal_forget()!!!
3846 */ 3805 */
@@ -4212,7 +4171,7 @@ void ext4_truncate(struct inode *inode)
4212 (__le32*)partial->bh->b_data+addr_per_block, 4171 (__le32*)partial->bh->b_data+addr_per_block,
4213 (chain+n-1) - partial); 4172 (chain+n-1) - partial);
4214 BUFFER_TRACE(partial->bh, "call brelse"); 4173 BUFFER_TRACE(partial->bh, "call brelse");
4215 brelse (partial->bh); 4174 brelse(partial->bh);
4216 partial--; 4175 partial--;
4217 } 4176 }
4218do_indirects: 4177do_indirects:
@@ -4453,8 +4412,9 @@ void ext4_get_inode_flags(struct ext4_inode_info *ei)
4453 if (flags & S_DIRSYNC) 4412 if (flags & S_DIRSYNC)
4454 ei->i_flags |= EXT4_DIRSYNC_FL; 4413 ei->i_flags |= EXT4_DIRSYNC_FL;
4455} 4414}
4415
4456static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, 4416static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
4457 struct ext4_inode_info *ei) 4417 struct ext4_inode_info *ei)
4458{ 4418{
4459 blkcnt_t i_blocks ; 4419 blkcnt_t i_blocks ;
4460 struct inode *inode = &(ei->vfs_inode); 4420 struct inode *inode = &(ei->vfs_inode);
@@ -4569,7 +4529,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4569 EXT4_GOOD_OLD_INODE_SIZE + 4529 EXT4_GOOD_OLD_INODE_SIZE +
4570 ei->i_extra_isize; 4530 ei->i_extra_isize;
4571 if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) 4531 if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC))
4572 ei->i_state |= EXT4_STATE_XATTR; 4532 ei->i_state |= EXT4_STATE_XATTR;
4573 } 4533 }
4574 } else 4534 } else
4575 ei->i_extra_isize = 0; 4535 ei->i_extra_isize = 0;
@@ -4588,7 +4548,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4588 4548
4589 ret = 0; 4549 ret = 0;
4590 if (ei->i_file_acl && 4550 if (ei->i_file_acl &&
4591 ((ei->i_file_acl < 4551 ((ei->i_file_acl <
4592 (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + 4552 (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
4593 EXT4_SB(sb)->s_gdb_count)) || 4553 EXT4_SB(sb)->s_gdb_count)) ||
4594 (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) { 4554 (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
@@ -4603,15 +4563,15 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4603 !ext4_inode_is_fast_symlink(inode))) 4563 !ext4_inode_is_fast_symlink(inode)))
4604 /* Validate extent which is part of inode */ 4564 /* Validate extent which is part of inode */
4605 ret = ext4_ext_check_inode(inode); 4565 ret = ext4_ext_check_inode(inode);
4606 } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 4566 } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
4607 (S_ISLNK(inode->i_mode) && 4567 (S_ISLNK(inode->i_mode) &&
4608 !ext4_inode_is_fast_symlink(inode))) { 4568 !ext4_inode_is_fast_symlink(inode))) {
4609 /* Validate block references which are part of inode */ 4569 /* Validate block references which are part of inode */
4610 ret = ext4_check_inode_blockref(inode); 4570 ret = ext4_check_inode_blockref(inode);
4611 } 4571 }
4612 if (ret) { 4572 if (ret) {
4613 brelse(bh); 4573 brelse(bh);
4614 goto bad_inode; 4574 goto bad_inode;
4615 } 4575 }
4616 4576
4617 if (S_ISREG(inode->i_mode)) { 4577 if (S_ISREG(inode->i_mode)) {
@@ -4642,7 +4602,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4642 } else { 4602 } else {
4643 brelse(bh); 4603 brelse(bh);
4644 ret = -EIO; 4604 ret = -EIO;
4645 ext4_error(inode->i_sb, __func__, 4605 ext4_error(inode->i_sb, __func__,
4646 "bogus i_mode (%o) for inode=%lu", 4606 "bogus i_mode (%o) for inode=%lu",
4647 inode->i_mode, inode->i_ino); 4607 inode->i_mode, inode->i_ino);
4648 goto bad_inode; 4608 goto bad_inode;
@@ -4795,8 +4755,9 @@ static int ext4_do_update_inode(handle_t *handle,
4795 cpu_to_le32(new_encode_dev(inode->i_rdev)); 4755 cpu_to_le32(new_encode_dev(inode->i_rdev));
4796 raw_inode->i_block[2] = 0; 4756 raw_inode->i_block[2] = 0;
4797 } 4757 }
4798 } else for (block = 0; block < EXT4_N_BLOCKS; block++) 4758 } else
4799 raw_inode->i_block[block] = ei->i_data[block]; 4759 for (block = 0; block < EXT4_N_BLOCKS; block++)
4760 raw_inode->i_block[block] = ei->i_data[block];
4800 4761
4801 raw_inode->i_disk_version = cpu_to_le32(inode->i_version); 4762 raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
4802 if (ei->i_extra_isize) { 4763 if (ei->i_extra_isize) {
@@ -5150,7 +5111,7 @@ int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks)
5150 * Give this, we know that the caller already has write access to iloc->bh. 5111 * Give this, we know that the caller already has write access to iloc->bh.
5151 */ 5112 */
5152int ext4_mark_iloc_dirty(handle_t *handle, 5113int ext4_mark_iloc_dirty(handle_t *handle,
5153 struct inode *inode, struct ext4_iloc *iloc) 5114 struct inode *inode, struct ext4_iloc *iloc)
5154{ 5115{
5155 int err = 0; 5116 int err = 0;
5156 5117
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 91e75f7a9e73..bb415408fdb6 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -14,6 +14,7 @@
14#include <linux/compat.h> 14#include <linux/compat.h>
15#include <linux/smp_lock.h> 15#include <linux/smp_lock.h>
16#include <linux/mount.h> 16#include <linux/mount.h>
17#include <linux/file.h>
17#include <asm/uaccess.h> 18#include <asm/uaccess.h>
18#include "ext4_jbd2.h" 19#include "ext4_jbd2.h"
19#include "ext4.h" 20#include "ext4.h"
@@ -213,6 +214,41 @@ setversion_out:
213 214
214 return err; 215 return err;
215 } 216 }
217
218 case EXT4_IOC_MOVE_EXT: {
219 struct move_extent me;
220 struct file *donor_filp;
221 int err;
222
223 if (copy_from_user(&me,
224 (struct move_extent __user *)arg, sizeof(me)))
225 return -EFAULT;
226
227 donor_filp = fget(me.donor_fd);
228 if (!donor_filp)
229 return -EBADF;
230
231 if (!capable(CAP_DAC_OVERRIDE)) {
232 if ((current->real_cred->fsuid != inode->i_uid) ||
233 !(inode->i_mode & S_IRUSR) ||
234 !(donor_filp->f_dentry->d_inode->i_mode &
235 S_IRUSR)) {
236 fput(donor_filp);
237 return -EACCES;
238 }
239 }
240
241 err = ext4_move_extents(filp, donor_filp, me.orig_start,
242 me.donor_start, me.len, &me.moved_len);
243 fput(donor_filp);
244
245 if (!err)
246 if (copy_to_user((struct move_extent *)arg,
247 &me, sizeof(me)))
248 return -EFAULT;
249 return err;
250 }
251
216 case EXT4_IOC_GROUP_ADD: { 252 case EXT4_IOC_GROUP_ADD: {
217 struct ext4_new_group_data input; 253 struct ext4_new_group_data input;
218 struct super_block *sb = inode->i_sb; 254 struct super_block *sb = inode->i_sb;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index ed8482e22c0e..519a0a686d94 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -22,6 +22,8 @@
22 */ 22 */
23 23
24#include "mballoc.h" 24#include "mballoc.h"
25#include <trace/events/ext4.h>
26
25/* 27/*
26 * MUSTDO: 28 * MUSTDO:
27 * - test ext4_ext_search_left() and ext4_ext_search_right() 29 * - test ext4_ext_search_left() and ext4_ext_search_right()
@@ -340,8 +342,6 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
340 ext4_group_t group); 342 ext4_group_t group);
341static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); 343static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
342 344
343
344
345static inline void *mb_correct_addr_and_bit(int *bit, void *addr) 345static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
346{ 346{
347#if BITS_PER_LONG == 64 347#if BITS_PER_LONG == 64
@@ -2859,9 +2859,8 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2859 discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) 2859 discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
2860 + entry->start_blk 2860 + entry->start_blk
2861 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 2861 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
2862 trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", 2862 trace_ext4_discard_blocks(sb, (unsigned long long)discard_block,
2863 sb->s_id, (unsigned long long) discard_block, 2863 entry->count);
2864 entry->count);
2865 sb_issue_discard(sb, discard_block, entry->count); 2864 sb_issue_discard(sb, discard_block, entry->count);
2866 2865
2867 kmem_cache_free(ext4_free_ext_cachep, entry); 2866 kmem_cache_free(ext4_free_ext_cachep, entry);
@@ -3629,10 +3628,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3629 3628
3630 mb_debug("new inode pa %p: %llu/%u for %u\n", pa, 3629 mb_debug("new inode pa %p: %llu/%u for %u\n", pa,
3631 pa->pa_pstart, pa->pa_len, pa->pa_lstart); 3630 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3632 trace_mark(ext4_mb_new_inode_pa, 3631 trace_ext4_mb_new_inode_pa(ac, pa);
3633 "dev %s ino %lu pstart %llu len %u lstart %u",
3634 sb->s_id, ac->ac_inode->i_ino,
3635 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3636 3632
3637 ext4_mb_use_inode_pa(ac, pa); 3633 ext4_mb_use_inode_pa(ac, pa);
3638 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); 3634 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
@@ -3691,9 +3687,8 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3691 pa->pa_type = MB_GROUP_PA; 3687 pa->pa_type = MB_GROUP_PA;
3692 3688
3693 mb_debug("new group pa %p: %llu/%u for %u\n", pa, 3689 mb_debug("new group pa %p: %llu/%u for %u\n", pa,
3694 pa->pa_pstart, pa->pa_len, pa->pa_lstart); 3690 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3695 trace_mark(ext4_mb_new_group_pa, "dev %s pstart %llu len %u lstart %u", 3691 trace_ext4_mb_new_group_pa(ac, pa);
3696 sb->s_id, pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3697 3692
3698 ext4_mb_use_group_pa(ac, pa); 3693 ext4_mb_use_group_pa(ac, pa);
3699 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); 3694 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
@@ -3783,10 +3778,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3783 ext4_mb_store_history(ac); 3778 ext4_mb_store_history(ac);
3784 } 3779 }
3785 3780
3786 trace_mark(ext4_mb_release_inode_pa, 3781 trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit,
3787 "dev %s ino %lu block %llu count %u", 3782 next - bit);
3788 sb->s_id, pa->pa_inode->i_ino, grp_blk_start + bit,
3789 next - bit);
3790 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); 3783 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3791 bit = next + 1; 3784 bit = next + 1;
3792 } 3785 }
@@ -3820,8 +3813,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3820 if (ac) 3813 if (ac)
3821 ac->ac_op = EXT4_MB_HISTORY_DISCARD; 3814 ac->ac_op = EXT4_MB_HISTORY_DISCARD;
3822 3815
3823 trace_mark(ext4_mb_release_group_pa, "dev %s pstart %llu len %d", 3816 trace_ext4_mb_release_group_pa(ac, pa);
3824 sb->s_id, pa->pa_pstart, pa->pa_len);
3825 BUG_ON(pa->pa_deleted == 0); 3817 BUG_ON(pa->pa_deleted == 0);
3826 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); 3818 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3827 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3819 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
@@ -3889,6 +3881,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3889 3881
3890 INIT_LIST_HEAD(&list); 3882 INIT_LIST_HEAD(&list);
3891 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 3883 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3884 if (ac)
3885 ac->ac_sb = sb;
3892repeat: 3886repeat:
3893 ext4_lock_group(sb, group); 3887 ext4_lock_group(sb, group);
3894 list_for_each_entry_safe(pa, tmp, 3888 list_for_each_entry_safe(pa, tmp,
@@ -3987,12 +3981,15 @@ void ext4_discard_preallocations(struct inode *inode)
3987 } 3981 }
3988 3982
3989 mb_debug("discard preallocation for inode %lu\n", inode->i_ino); 3983 mb_debug("discard preallocation for inode %lu\n", inode->i_ino);
3990 trace_mark(ext4_discard_preallocations, "dev %s ino %lu", sb->s_id, 3984 trace_ext4_discard_preallocations(inode);
3991 inode->i_ino);
3992 3985
3993 INIT_LIST_HEAD(&list); 3986 INIT_LIST_HEAD(&list);
3994 3987
3995 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 3988 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3989 if (ac) {
3990 ac->ac_sb = sb;
3991 ac->ac_inode = inode;
3992 }
3996repeat: 3993repeat:
3997 /* first, collect all pa's in the inode */ 3994 /* first, collect all pa's in the inode */
3998 spin_lock(&ei->i_prealloc_lock); 3995 spin_lock(&ei->i_prealloc_lock);
@@ -4276,6 +4273,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
4276 4273
4277 INIT_LIST_HEAD(&discard_list); 4274 INIT_LIST_HEAD(&discard_list);
4278 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4275 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4276 if (ac)
4277 ac->ac_sb = sb;
4279 4278
4280 spin_lock(&lg->lg_prealloc_lock); 4279 spin_lock(&lg->lg_prealloc_lock);
4281 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], 4280 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
@@ -4445,8 +4444,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4445 int ret; 4444 int ret;
4446 int freed = 0; 4445 int freed = 0;
4447 4446
4448 trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d", 4447 trace_ext4_mb_discard_preallocations(sb, needed);
4449 sb->s_id, needed);
4450 for (i = 0; i < ngroups && needed > 0; i++) { 4448 for (i = 0; i < ngroups && needed > 0; i++) {
4451 ret = ext4_mb_discard_group_preallocations(sb, i, needed); 4449 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4452 freed += ret; 4450 freed += ret;
@@ -4475,17 +4473,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4475 sb = ar->inode->i_sb; 4473 sb = ar->inode->i_sb;
4476 sbi = EXT4_SB(sb); 4474 sbi = EXT4_SB(sb);
4477 4475
4478 trace_mark(ext4_request_blocks, "dev %s flags %u len %u ino %lu " 4476 trace_ext4_request_blocks(ar);
4479 "lblk %llu goal %llu lleft %llu lright %llu "
4480 "pleft %llu pright %llu ",
4481 sb->s_id, ar->flags, ar->len,
4482 ar->inode ? ar->inode->i_ino : 0,
4483 (unsigned long long) ar->logical,
4484 (unsigned long long) ar->goal,
4485 (unsigned long long) ar->lleft,
4486 (unsigned long long) ar->lright,
4487 (unsigned long long) ar->pleft,
4488 (unsigned long long) ar->pright);
4489 4477
4490 /* 4478 /*
4491 * For delayed allocation, we could skip the ENOSPC and 4479 * For delayed allocation, we could skip the ENOSPC and
@@ -4521,7 +4509,10 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4521 } 4509 }
4522 4510
4523 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4511 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4524 if (!ac) { 4512 if (ac) {
4513 ac->ac_sb = sb;
4514 ac->ac_inode = ar->inode;
4515 } else {
4525 ar->len = 0; 4516 ar->len = 0;
4526 *errp = -ENOMEM; 4517 *errp = -ENOMEM;
4527 goto out1; 4518 goto out1;
@@ -4594,18 +4585,7 @@ out3:
4594 reserv_blks); 4585 reserv_blks);
4595 } 4586 }
4596 4587
4597 trace_mark(ext4_allocate_blocks, 4588 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
4598 "dev %s block %llu flags %u len %u ino %lu "
4599 "logical %llu goal %llu lleft %llu lright %llu "
4600 "pleft %llu pright %llu ",
4601 sb->s_id, (unsigned long long) block,
4602 ar->flags, ar->len, ar->inode ? ar->inode->i_ino : 0,
4603 (unsigned long long) ar->logical,
4604 (unsigned long long) ar->goal,
4605 (unsigned long long) ar->lleft,
4606 (unsigned long long) ar->lright,
4607 (unsigned long long) ar->pleft,
4608 (unsigned long long) ar->pright);
4609 4589
4610 return block; 4590 return block;
4611} 4591}
@@ -4709,7 +4689,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4709 * Main entry point into mballoc to free blocks 4689 * Main entry point into mballoc to free blocks
4710 */ 4690 */
4711void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, 4691void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4712 unsigned long block, unsigned long count, 4692 ext4_fsblk_t block, unsigned long count,
4713 int metadata, unsigned long *freed) 4693 int metadata, unsigned long *freed)
4714{ 4694{
4715 struct buffer_head *bitmap_bh = NULL; 4695 struct buffer_head *bitmap_bh = NULL;
@@ -4735,15 +4715,12 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4735 block + count > ext4_blocks_count(es)) { 4715 block + count > ext4_blocks_count(es)) {
4736 ext4_error(sb, __func__, 4716 ext4_error(sb, __func__,
4737 "Freeing blocks not in datazone - " 4717 "Freeing blocks not in datazone - "
4738 "block = %lu, count = %lu", block, count); 4718 "block = %llu, count = %lu", block, count);
4739 goto error_return; 4719 goto error_return;
4740 } 4720 }
4741 4721
4742 ext4_debug("freeing block %lu\n", block); 4722 ext4_debug("freeing block %llu\n", block);
4743 trace_mark(ext4_free_blocks, 4723 trace_ext4_free_blocks(inode, block, count, metadata);
4744 "dev %s block %llu count %lu metadata %d ino %lu",
4745 sb->s_id, (unsigned long long) block, count, metadata,
4746 inode ? inode->i_ino : 0);
4747 4724
4748 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4725 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4749 if (ac) { 4726 if (ac) {
@@ -4784,7 +4761,7 @@ do_more:
4784 4761
4785 ext4_error(sb, __func__, 4762 ext4_error(sb, __func__,
4786 "Freeing blocks in system zone - " 4763 "Freeing blocks in system zone - "
4787 "Block = %lu, count = %lu", block, count); 4764 "Block = %llu, count = %lu", block, count);
4788 /* err = 0. ext4_std_error should be a no op */ 4765 /* err = 0. ext4_std_error should be a no op */
4789 goto error_return; 4766 goto error_return;
4790 } 4767 }
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 75e34f69215b..c96bb19f58f9 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -19,7 +19,6 @@
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/version.h> 20#include <linux/version.h>
21#include <linux/blkdev.h> 21#include <linux/blkdev.h>
22#include <linux/marker.h>
23#include <linux/mutex.h> 22#include <linux/mutex.h>
24#include "ext4_jbd2.h" 23#include "ext4_jbd2.h"
25#include "ext4.h" 24#include "ext4.h"
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index fe64d9f79852..313a50b39741 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -458,6 +458,7 @@ int ext4_ext_migrate(struct inode *inode)
458 struct inode *tmp_inode = NULL; 458 struct inode *tmp_inode = NULL;
459 struct list_blocks_struct lb; 459 struct list_blocks_struct lb;
460 unsigned long max_entries; 460 unsigned long max_entries;
461 __u32 goal;
461 462
462 /* 463 /*
463 * If the filesystem does not support extents, or the inode 464 * If the filesystem does not support extents, or the inode
@@ -483,9 +484,10 @@ int ext4_ext_migrate(struct inode *inode)
483 retval = PTR_ERR(handle); 484 retval = PTR_ERR(handle);
484 return retval; 485 return retval;
485 } 486 }
486 tmp_inode = ext4_new_inode(handle, 487 goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
487 inode->i_sb->s_root->d_inode, 488 EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
488 S_IFREG); 489 tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
490 S_IFREG, 0, goal);
489 if (IS_ERR(tmp_inode)) { 491 if (IS_ERR(tmp_inode)) {
490 retval = -ENOMEM; 492 retval = -ENOMEM;
491 ext4_journal_stop(handle); 493 ext4_journal_stop(handle);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
new file mode 100644
index 000000000000..bbf2dd9404dc
--- /dev/null
+++ b/fs/ext4/move_extent.c
@@ -0,0 +1,1320 @@
1/*
2 * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd.
3 * Written by Takashi Sato <t-sato@yk.jp.nec.com>
4 * Akira Fujita <a-fujita@rs.jp.nec.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of version 2.1 of the GNU Lesser General Public License
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */
15
16#include <linux/fs.h>
17#include <linux/quotaops.h>
18#include "ext4_jbd2.h"
19#include "ext4_extents.h"
20#include "ext4.h"
21
22#define get_ext_path(path, inode, block, ret) \
23 do { \
24 path = ext4_ext_find_extent(inode, block, path); \
25 if (IS_ERR(path)) { \
26 ret = PTR_ERR(path); \
27 path = NULL; \
28 } \
29 } while (0)
30
31/**
32 * copy_extent_status - Copy the extent's initialization status
33 *
34 * @src: an extent for getting initialize status
35 * @dest: an extent to be set the status
36 */
37static void
38copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)
39{
40 if (ext4_ext_is_uninitialized(src))
41 ext4_ext_mark_uninitialized(dest);
42 else
43 dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest));
44}
45
46/**
47 * mext_next_extent - Search for the next extent and set it to "extent"
48 *
49 * @inode: inode which is searched
50 * @path: this will obtain data for the next extent
51 * @extent: pointer to the next extent we have just gotten
52 *
53 * Search the next extent in the array of ext4_ext_path structure (@path)
54 * and set it to ext4_extent structure (@extent). In addition, the member of
55 * @path (->p_ext) also points the next extent. Return 0 on success, 1 if
56 * ext4_ext_path structure refers to the last extent, or a negative error
57 * value on failure.
58 */
59static int
60mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
61 struct ext4_extent **extent)
62{
63 int ppos, leaf_ppos = path->p_depth;
64
65 ppos = leaf_ppos;
66 if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
67 /* leaf block */
68 *extent = ++path[ppos].p_ext;
69 return 0;
70 }
71
72 while (--ppos >= 0) {
73 if (EXT_LAST_INDEX(path[ppos].p_hdr) >
74 path[ppos].p_idx) {
75 int cur_ppos = ppos;
76
77 /* index block */
78 path[ppos].p_idx++;
79 path[ppos].p_block = idx_pblock(path[ppos].p_idx);
80 if (path[ppos+1].p_bh)
81 brelse(path[ppos+1].p_bh);
82 path[ppos+1].p_bh =
83 sb_bread(inode->i_sb, path[ppos].p_block);
84 if (!path[ppos+1].p_bh)
85 return -EIO;
86 path[ppos+1].p_hdr =
87 ext_block_hdr(path[ppos+1].p_bh);
88
89 /* Halfway index block */
90 while (++cur_ppos < leaf_ppos) {
91 path[cur_ppos].p_idx =
92 EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
93 path[cur_ppos].p_block =
94 idx_pblock(path[cur_ppos].p_idx);
95 if (path[cur_ppos+1].p_bh)
96 brelse(path[cur_ppos+1].p_bh);
97 path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
98 path[cur_ppos].p_block);
99 if (!path[cur_ppos+1].p_bh)
100 return -EIO;
101 path[cur_ppos+1].p_hdr =
102 ext_block_hdr(path[cur_ppos+1].p_bh);
103 }
104
105 /* leaf block */
106 path[leaf_ppos].p_ext = *extent =
107 EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
108 return 0;
109 }
110 }
111 /* We found the last extent */
112 return 1;
113}
114
115/**
116 * mext_double_down_read - Acquire two inodes' read semaphore
117 *
118 * @orig_inode: original inode structure
119 * @donor_inode: donor inode structure
120 * Acquire read semaphore of the two inodes (orig and donor) by i_ino order.
121 */
122static void
123mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode)
124{
125 struct inode *first = orig_inode, *second = donor_inode;
126
127 BUG_ON(orig_inode == NULL || donor_inode == NULL);
128
129 /*
130 * Use the inode number to provide the stable locking order instead
131 * of its address, because the C language doesn't guarantee you can
132 * compare pointers that don't come from the same array.
133 */
134 if (donor_inode->i_ino < orig_inode->i_ino) {
135 first = donor_inode;
136 second = orig_inode;
137 }
138
139 down_read(&EXT4_I(first)->i_data_sem);
140 down_read(&EXT4_I(second)->i_data_sem);
141}
142
143/**
144 * mext_double_down_write - Acquire two inodes' write semaphore
145 *
146 * @orig_inode: original inode structure
147 * @donor_inode: donor inode structure
148 * Acquire write semaphore of the two inodes (orig and donor) by i_ino order.
149 */
150static void
151mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
152{
153 struct inode *first = orig_inode, *second = donor_inode;
154
155 BUG_ON(orig_inode == NULL || donor_inode == NULL);
156
157 /*
158 * Use the inode number to provide the stable locking order instead
159 * of its address, because the C language doesn't guarantee you can
160 * compare pointers that don't come from the same array.
161 */
162 if (donor_inode->i_ino < orig_inode->i_ino) {
163 first = donor_inode;
164 second = orig_inode;
165 }
166
167 down_write(&EXT4_I(first)->i_data_sem);
168 down_write(&EXT4_I(second)->i_data_sem);
169}
170
171/**
172 * mext_double_up_read - Release two inodes' read semaphore
173 *
174 * @orig_inode: original inode structure to be released its lock first
175 * @donor_inode: donor inode structure to be released its lock second
176 * Release read semaphore of two inodes (orig and donor).
177 */
178static void
179mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode)
180{
181 BUG_ON(orig_inode == NULL || donor_inode == NULL);
182
183 up_read(&EXT4_I(orig_inode)->i_data_sem);
184 up_read(&EXT4_I(donor_inode)->i_data_sem);
185}
186
187/**
188 * mext_double_up_write - Release two inodes' write semaphore
189 *
190 * @orig_inode: original inode structure to be released its lock first
191 * @donor_inode: donor inode structure to be released its lock second
192 * Release write semaphore of two inodes (orig and donor).
193 */
194static void
195mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode)
196{
197 BUG_ON(orig_inode == NULL || donor_inode == NULL);
198
199 up_write(&EXT4_I(orig_inode)->i_data_sem);
200 up_write(&EXT4_I(donor_inode)->i_data_sem);
201}
202
203/**
204 * mext_insert_across_blocks - Insert extents across leaf block
205 *
206 * @handle: journal handle
207 * @orig_inode: original inode
208 * @o_start: first original extent to be changed
209 * @o_end: last original extent to be changed
210 * @start_ext: first new extent to be inserted
211 * @new_ext: middle of new extent to be inserted
212 * @end_ext: last new extent to be inserted
213 *
214 * Allocate a new leaf block and insert extents into it. Return 0 on success,
215 * or a negative error value on failure.
216 */
217static int
218mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
219 struct ext4_extent *o_start, struct ext4_extent *o_end,
220 struct ext4_extent *start_ext, struct ext4_extent *new_ext,
221 struct ext4_extent *end_ext)
222{
223 struct ext4_ext_path *orig_path = NULL;
224 ext4_lblk_t eblock = 0;
225 int new_flag = 0;
226 int end_flag = 0;
227 int err = 0;
228
229 if (start_ext->ee_len && new_ext->ee_len && end_ext->ee_len) {
230 if (o_start == o_end) {
231
232 /* start_ext new_ext end_ext
233 * donor |---------|-----------|--------|
234 * orig |------------------------------|
235 */
236 end_flag = 1;
237 } else {
238
239 /* start_ext new_ext end_ext
240 * donor |---------|----------|---------|
241 * orig |---------------|--------------|
242 */
243 o_end->ee_block = end_ext->ee_block;
244 o_end->ee_len = end_ext->ee_len;
245 ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
246 }
247
248 o_start->ee_len = start_ext->ee_len;
249 new_flag = 1;
250
251 } else if (start_ext->ee_len && new_ext->ee_len &&
252 !end_ext->ee_len && o_start == o_end) {
253
254 /* start_ext new_ext
255 * donor |--------------|---------------|
256 * orig |------------------------------|
257 */
258 o_start->ee_len = start_ext->ee_len;
259 new_flag = 1;
260
261 } else if (!start_ext->ee_len && new_ext->ee_len &&
262 end_ext->ee_len && o_start == o_end) {
263
264 /* new_ext end_ext
265 * donor |--------------|---------------|
266 * orig |------------------------------|
267 */
268 o_end->ee_block = end_ext->ee_block;
269 o_end->ee_len = end_ext->ee_len;
270 ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
271
272 /*
273 * Set 0 to the extent block if new_ext was
274 * the first block.
275 */
276 if (new_ext->ee_block)
277 eblock = le32_to_cpu(new_ext->ee_block);
278
279 new_flag = 1;
280 } else {
281 ext4_debug("ext4 move extent: Unexpected insert case\n");
282 return -EIO;
283 }
284
285 if (new_flag) {
286 get_ext_path(orig_path, orig_inode, eblock, err);
287 if (orig_path == NULL)
288 goto out;
289
290 if (ext4_ext_insert_extent(handle, orig_inode,
291 orig_path, new_ext))
292 goto out;
293 }
294
295 if (end_flag) {
296 get_ext_path(orig_path, orig_inode,
297 le32_to_cpu(end_ext->ee_block) - 1, err);
298 if (orig_path == NULL)
299 goto out;
300
301 if (ext4_ext_insert_extent(handle, orig_inode,
302 orig_path, end_ext))
303 goto out;
304 }
305out:
306 if (orig_path) {
307 ext4_ext_drop_refs(orig_path);
308 kfree(orig_path);
309 }
310
311 return err;
312
313}
314
315/**
316 * mext_insert_inside_block - Insert new extent to the extent block
317 *
318 * @o_start: first original extent to be moved
319 * @o_end: last original extent to be moved
320 * @start_ext: first new extent to be inserted
321 * @new_ext: middle of new extent to be inserted
322 * @end_ext: last new extent to be inserted
323 * @eh: extent header of target leaf block
324 * @range_to_move: used to decide how to insert extent
325 *
326 * Insert extents into the leaf block. The extent (@o_start) is overwritten
327 * by inserted extents.
328 */
329static void
330mext_insert_inside_block(struct ext4_extent *o_start,
331 struct ext4_extent *o_end,
332 struct ext4_extent *start_ext,
333 struct ext4_extent *new_ext,
334 struct ext4_extent *end_ext,
335 struct ext4_extent_header *eh,
336 int range_to_move)
337{
338 int i = 0;
339 unsigned long len;
340
341 /* Move the existing extents */
342 if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) {
343 len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) -
344 (unsigned long)(o_end + 1);
345 memmove(o_end + 1 + range_to_move, o_end + 1, len);
346 }
347
348 /* Insert start entry */
349 if (start_ext->ee_len)
350 o_start[i++].ee_len = start_ext->ee_len;
351
352 /* Insert new entry */
353 if (new_ext->ee_len) {
354 o_start[i] = *new_ext;
355 ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext));
356 }
357
358 /* Insert end entry */
359 if (end_ext->ee_len)
360 o_start[i] = *end_ext;
361
362 /* Increment the total entries counter on the extent block */
363 le16_add_cpu(&eh->eh_entries, range_to_move);
364}
365
366/**
367 * mext_insert_extents - Insert new extent
368 *
369 * @handle: journal handle
370 * @orig_inode: original inode
371 * @orig_path: path indicates first extent to be changed
372 * @o_start: first original extent to be changed
373 * @o_end: last original extent to be changed
374 * @start_ext: first new extent to be inserted
375 * @new_ext: middle of new extent to be inserted
376 * @end_ext: last new extent to be inserted
377 *
378 * Call the function to insert extents. If we cannot add more extents into
379 * the leaf block, we call mext_insert_across_blocks() to create a
380 * new leaf block. Otherwise call mext_insert_inside_block(). Return 0
381 * on success, or a negative error value on failure.
382 */
383static int
384mext_insert_extents(handle_t *handle, struct inode *orig_inode,
385 struct ext4_ext_path *orig_path,
386 struct ext4_extent *o_start,
387 struct ext4_extent *o_end,
388 struct ext4_extent *start_ext,
389 struct ext4_extent *new_ext,
390 struct ext4_extent *end_ext)
391{
392 struct ext4_extent_header *eh;
393 unsigned long need_slots, slots_range;
394 int range_to_move, depth, ret;
395
396 /*
397 * The extents need to be inserted
398 * start_extent + new_extent + end_extent.
399 */
400 need_slots = (start_ext->ee_len ? 1 : 0) + (end_ext->ee_len ? 1 : 0) +
401 (new_ext->ee_len ? 1 : 0);
402
403 /* The number of slots between start and end */
404 slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1)
405 / sizeof(struct ext4_extent);
406
407 /* Range to move the end of extent */
408 range_to_move = need_slots - slots_range;
409 depth = orig_path->p_depth;
410 orig_path += depth;
411 eh = orig_path->p_hdr;
412
413 if (depth) {
414 /* Register to journal */
415 ret = ext4_journal_get_write_access(handle, orig_path->p_bh);
416 if (ret)
417 return ret;
418 }
419
420 /* Expansion */
421 if (range_to_move > 0 &&
422 (range_to_move > le16_to_cpu(eh->eh_max)
423 - le16_to_cpu(eh->eh_entries))) {
424
425 ret = mext_insert_across_blocks(handle, orig_inode, o_start,
426 o_end, start_ext, new_ext, end_ext);
427 if (ret < 0)
428 return ret;
429 } else
430 mext_insert_inside_block(o_start, o_end, start_ext, new_ext,
431 end_ext, eh, range_to_move);
432
433 if (depth) {
434 ret = ext4_handle_dirty_metadata(handle, orig_inode,
435 orig_path->p_bh);
436 if (ret)
437 return ret;
438 } else {
439 ret = ext4_mark_inode_dirty(handle, orig_inode);
440 if (ret < 0)
441 return ret;
442 }
443
444 return 0;
445}
446
447/**
448 * mext_leaf_block - Move one leaf extent block into the inode.
449 *
450 * @handle: journal handle
451 * @orig_inode: original inode
452 * @orig_path: path indicates first extent to be changed
453 * @dext: donor extent
454 * @from: start offset on the target file
455 *
456 * In order to insert extents into the leaf block, we must divide the extent
457 * in the leaf block into three extents. The one is located to be inserted
458 * extents, and the others are located around it.
459 *
460 * Therefore, this function creates structures to save extents of the leaf
461 * block, and inserts extents by calling mext_insert_extents() with
462 * created extents. Return 0 on success, or a negative error value on failure.
463 */
464static int
465mext_leaf_block(handle_t *handle, struct inode *orig_inode,
466 struct ext4_ext_path *orig_path, struct ext4_extent *dext,
467 ext4_lblk_t *from)
468{
469 struct ext4_extent *oext, *o_start, *o_end, *prev_ext;
470 struct ext4_extent new_ext, start_ext, end_ext;
471 ext4_lblk_t new_ext_end;
472 ext4_fsblk_t new_phys_end;
473 int oext_alen, new_ext_alen, end_ext_alen;
474 int depth = ext_depth(orig_inode);
475 int ret;
476
477 o_start = o_end = oext = orig_path[depth].p_ext;
478 oext_alen = ext4_ext_get_actual_len(oext);
479 start_ext.ee_len = end_ext.ee_len = 0;
480
481 new_ext.ee_block = cpu_to_le32(*from);
482 ext4_ext_store_pblock(&new_ext, ext_pblock(dext));
483 new_ext.ee_len = dext->ee_len;
484 new_ext_alen = ext4_ext_get_actual_len(&new_ext);
485 new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
486 new_phys_end = ext_pblock(&new_ext) + new_ext_alen - 1;
487
488 /*
489 * Case: original extent is first
490 * oext |--------|
491 * new_ext |--|
492 * start_ext |--|
493 */
494 if (le32_to_cpu(oext->ee_block) < le32_to_cpu(new_ext.ee_block) &&
495 le32_to_cpu(new_ext.ee_block) <
496 le32_to_cpu(oext->ee_block) + oext_alen) {
497 start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) -
498 le32_to_cpu(oext->ee_block));
499 copy_extent_status(oext, &start_ext);
500 } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) {
501 prev_ext = oext - 1;
502 /*
503 * We can merge new_ext into previous extent,
504 * if these are contiguous and same extent type.
505 */
506 if (ext4_can_extents_be_merged(orig_inode, prev_ext,
507 &new_ext)) {
508 o_start = prev_ext;
509 start_ext.ee_len = cpu_to_le16(
510 ext4_ext_get_actual_len(prev_ext) +
511 new_ext_alen);
512 copy_extent_status(prev_ext, &start_ext);
513 new_ext.ee_len = 0;
514 }
515 }
516
517 /*
518 * Case: new_ext_end must be less than oext
519 * oext |-----------|
520 * new_ext |-------|
521 */
522 BUG_ON(le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end);
523
524 /*
525 * Case: new_ext is smaller than original extent
526 * oext |---------------|
527 * new_ext |-----------|
528 * end_ext |---|
529 */
530 if (le32_to_cpu(oext->ee_block) <= new_ext_end &&
531 new_ext_end < le32_to_cpu(oext->ee_block) + oext_alen - 1) {
532 end_ext.ee_len =
533 cpu_to_le16(le32_to_cpu(oext->ee_block) +
534 oext_alen - 1 - new_ext_end);
535 copy_extent_status(oext, &end_ext);
536 end_ext_alen = ext4_ext_get_actual_len(&end_ext);
537 ext4_ext_store_pblock(&end_ext,
538 (ext_pblock(o_end) + oext_alen - end_ext_alen));
539 end_ext.ee_block =
540 cpu_to_le32(le32_to_cpu(o_end->ee_block) +
541 oext_alen - end_ext_alen);
542 }
543
544 ret = mext_insert_extents(handle, orig_inode, orig_path, o_start,
545 o_end, &start_ext, &new_ext, &end_ext);
546 return ret;
547}
548
549/**
550 * mext_calc_swap_extents - Calculate extents for extent swapping.
551 *
552 * @tmp_dext: the extent that will belong to the original inode
553 * @tmp_oext: the extent that will belong to the donor inode
554 * @orig_off: block offset of original inode
555 * @donor_off: block offset of donor inode
556 * @max_count: the maximun length of extents
557 */
558static void
559mext_calc_swap_extents(struct ext4_extent *tmp_dext,
560 struct ext4_extent *tmp_oext,
561 ext4_lblk_t orig_off, ext4_lblk_t donor_off,
562 ext4_lblk_t max_count)
563{
564 ext4_lblk_t diff, orig_diff;
565 struct ext4_extent dext_old, oext_old;
566
567 dext_old = *tmp_dext;
568 oext_old = *tmp_oext;
569
570 /* When tmp_dext is too large, pick up the target range. */
571 diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
572
573 ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff);
574 tmp_dext->ee_block =
575 cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff);
576 tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff);
577
578 if (max_count < ext4_ext_get_actual_len(tmp_dext))
579 tmp_dext->ee_len = cpu_to_le16(max_count);
580
581 orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
582 ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff);
583
584 /* Adjust extent length if donor extent is larger than orig */
585 if (ext4_ext_get_actual_len(tmp_dext) >
586 ext4_ext_get_actual_len(tmp_oext) - orig_diff)
587 tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_oext->ee_len) -
588 orig_diff);
589
590 tmp_oext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(tmp_dext));
591
592 copy_extent_status(&oext_old, tmp_dext);
593 copy_extent_status(&dext_old, tmp_oext);
594}
595
596/**
597 * mext_replace_branches - Replace original extents with new extents
598 *
599 * @handle: journal handle
600 * @orig_inode: original inode
601 * @donor_inode: donor inode
602 * @from: block offset of orig_inode
603 * @count: block count to be replaced
604 *
605 * Replace original inode extents and donor inode extents page by page.
606 * We implement this replacement in the following three steps:
607 * 1. Save the block information of original and donor inodes into
608 * dummy extents.
609 * 2. Change the block information of original inode to point at the
610 * donor inode blocks.
611 * 3. Change the block information of donor inode to point at the saved
612 * original inode blocks in the dummy extents.
613 *
614 * Return 0 on success, or a negative error value on failure.
615 */
616static int
617mext_replace_branches(handle_t *handle, struct inode *orig_inode,
618 struct inode *donor_inode, ext4_lblk_t from,
619 ext4_lblk_t count)
620{
621 struct ext4_ext_path *orig_path = NULL;
622 struct ext4_ext_path *donor_path = NULL;
623 struct ext4_extent *oext, *dext;
624 struct ext4_extent tmp_dext, tmp_oext;
625 ext4_lblk_t orig_off = from, donor_off = from;
626 int err = 0;
627 int depth;
628 int replaced_count = 0;
629 int dext_alen;
630
631 mext_double_down_write(orig_inode, donor_inode);
632
633 /* Get the original extent for the block "orig_off" */
634 get_ext_path(orig_path, orig_inode, orig_off, err);
635 if (orig_path == NULL)
636 goto out;
637
638 /* Get the donor extent for the head */
639 get_ext_path(donor_path, donor_inode, donor_off, err);
640 if (donor_path == NULL)
641 goto out;
642 depth = ext_depth(orig_inode);
643 oext = orig_path[depth].p_ext;
644 tmp_oext = *oext;
645
646 depth = ext_depth(donor_inode);
647 dext = donor_path[depth].p_ext;
648 tmp_dext = *dext;
649
650 mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
651 donor_off, count);
652
653 /* Loop for the donor extents */
654 while (1) {
655 /* The extent for donor must be found. */
656 BUG_ON(!dext || donor_off != le32_to_cpu(tmp_dext.ee_block));
657
658 /* Set donor extent to orig extent */
659 err = mext_leaf_block(handle, orig_inode,
660 orig_path, &tmp_dext, &orig_off);
661 if (err < 0)
662 goto out;
663
664 /* Set orig extent to donor extent */
665 err = mext_leaf_block(handle, donor_inode,
666 donor_path, &tmp_oext, &donor_off);
667 if (err < 0)
668 goto out;
669
670 dext_alen = ext4_ext_get_actual_len(&tmp_dext);
671 replaced_count += dext_alen;
672 donor_off += dext_alen;
673 orig_off += dext_alen;
674
675 /* Already moved the expected blocks */
676 if (replaced_count >= count)
677 break;
678
679 if (orig_path)
680 ext4_ext_drop_refs(orig_path);
681 get_ext_path(orig_path, orig_inode, orig_off, err);
682 if (orig_path == NULL)
683 goto out;
684 depth = ext_depth(orig_inode);
685 oext = orig_path[depth].p_ext;
686 if (le32_to_cpu(oext->ee_block) +
687 ext4_ext_get_actual_len(oext) <= orig_off) {
688 err = 0;
689 goto out;
690 }
691 tmp_oext = *oext;
692
693 if (donor_path)
694 ext4_ext_drop_refs(donor_path);
695 get_ext_path(donor_path, donor_inode,
696 donor_off, err);
697 if (donor_path == NULL)
698 goto out;
699 depth = ext_depth(donor_inode);
700 dext = donor_path[depth].p_ext;
701 if (le32_to_cpu(dext->ee_block) +
702 ext4_ext_get_actual_len(dext) <= donor_off) {
703 err = 0;
704 goto out;
705 }
706 tmp_dext = *dext;
707
708 mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
709 donor_off,
710 count - replaced_count);
711 }
712
713out:
714 if (orig_path) {
715 ext4_ext_drop_refs(orig_path);
716 kfree(orig_path);
717 }
718 if (donor_path) {
719 ext4_ext_drop_refs(donor_path);
720 kfree(donor_path);
721 }
722
723 mext_double_up_write(orig_inode, donor_inode);
724 return err;
725}
726
727/**
728 * move_extent_per_page - Move extent data per page
729 *
730 * @o_filp: file structure of original file
731 * @donor_inode: donor inode
732 * @orig_page_offset: page index on original file
733 * @data_offset_in_page: block index where data swapping starts
734 * @block_len_in_page: the number of blocks to be swapped
735 * @uninit: orig extent is uninitialized or not
736 *
737 * Save the data in original inode blocks and replace original inode extents
738 * with donor inode extents by calling mext_replace_branches().
739 * Finally, write out the saved data in new original inode blocks. Return 0
740 * on success, or a negative error value on failure.
741 */
742static int
743move_extent_par_page(struct file *o_filp, struct inode *donor_inode,
744 pgoff_t orig_page_offset, int data_offset_in_page,
745 int block_len_in_page, int uninit)
746{
747 struct inode *orig_inode = o_filp->f_dentry->d_inode;
748 struct address_space *mapping = orig_inode->i_mapping;
749 struct buffer_head *bh;
750 struct page *page = NULL;
751 const struct address_space_operations *a_ops = mapping->a_ops;
752 handle_t *handle;
753 ext4_lblk_t orig_blk_offset;
754 long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
755 unsigned long blocksize = orig_inode->i_sb->s_blocksize;
756 unsigned int w_flags = 0;
757 unsigned int tmp_data_len, data_len;
758 void *fsdata;
759 int ret, i, jblocks;
760 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
761
762 /*
763 * It needs twice the amount of ordinary journal buffers because
764 * inode and donor_inode may change each different metadata blocks.
765 */
766 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
767 handle = ext4_journal_start(orig_inode, jblocks);
768 if (IS_ERR(handle)) {
769 ret = PTR_ERR(handle);
770 return ret;
771 }
772
773 if (segment_eq(get_fs(), KERNEL_DS))
774 w_flags |= AOP_FLAG_UNINTERRUPTIBLE;
775
776 orig_blk_offset = orig_page_offset * blocks_per_page +
777 data_offset_in_page;
778
779 /*
780 * If orig extent is uninitialized one,
781 * it's not necessary force the page into memory
782 * and then force it to be written out again.
783 * Just swap data blocks between orig and donor.
784 */
785 if (uninit) {
786 ret = mext_replace_branches(handle, orig_inode,
787 donor_inode, orig_blk_offset,
788 block_len_in_page);
789
790 /* Clear the inode cache not to refer to the old data */
791 ext4_ext_invalidate_cache(orig_inode);
792 ext4_ext_invalidate_cache(donor_inode);
793 goto out2;
794 }
795
796 offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
797
798 /* Calculate data_len */
799 if ((orig_blk_offset + block_len_in_page - 1) ==
800 ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
801 /* Replace the last block */
802 tmp_data_len = orig_inode->i_size & (blocksize - 1);
803 /*
804 * If data_len equal zero, it shows data_len is multiples of
805 * blocksize. So we set appropriate value.
806 */
807 if (tmp_data_len == 0)
808 tmp_data_len = blocksize;
809
810 data_len = tmp_data_len +
811 ((block_len_in_page - 1) << orig_inode->i_blkbits);
812 } else {
813 data_len = block_len_in_page << orig_inode->i_blkbits;
814 }
815
816 ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags,
817 &page, &fsdata);
818 if (unlikely(ret < 0))
819 goto out;
820
821 if (!PageUptodate(page)) {
822 mapping->a_ops->readpage(o_filp, page);
823 lock_page(page);
824 }
825
826 /*
827 * try_to_release_page() doesn't call releasepage in writeback mode.
828 * We should care about the order of writing to the same file
829 * by multiple move extent processes.
830 * It needs to call wait_on_page_writeback() to wait for the
831 * writeback of the page.
832 */
833 if (PageWriteback(page))
834 wait_on_page_writeback(page);
835
836 /* Release old bh and drop refs */
837 try_to_release_page(page, 0);
838
839 ret = mext_replace_branches(handle, orig_inode, donor_inode,
840 orig_blk_offset, block_len_in_page);
841 if (ret < 0)
842 goto out;
843
844 /* Clear the inode cache not to refer to the old data */
845 ext4_ext_invalidate_cache(orig_inode);
846 ext4_ext_invalidate_cache(donor_inode);
847
848 if (!page_has_buffers(page))
849 create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
850
851 bh = page_buffers(page);
852 for (i = 0; i < data_offset_in_page; i++)
853 bh = bh->b_this_page;
854
855 for (i = 0; i < block_len_in_page; i++) {
856 ret = ext4_get_block(orig_inode,
857 (sector_t)(orig_blk_offset + i), bh, 0);
858 if (ret < 0)
859 goto out;
860
861 if (bh->b_this_page != NULL)
862 bh = bh->b_this_page;
863 }
864
865 ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len,
866 page, fsdata);
867 page = NULL;
868
869out:
870 if (unlikely(page)) {
871 if (PageLocked(page))
872 unlock_page(page);
873 page_cache_release(page);
874 }
875out2:
876 ext4_journal_stop(handle);
877
878 return ret < 0 ? ret : 0;
879}
880
881/**
882 * mext_check_argumants - Check whether move extent can be done
883 *
884 * @orig_inode: original inode
885 * @donor_inode: donor inode
886 * @orig_start: logical start offset in block for orig
887 * @donor_start: logical start offset in block for donor
888 * @len: the number of blocks to be moved
889 * @moved_len: moved block length
890 *
891 * Check the arguments of ext4_move_extents() whether the files can be
892 * exchanged with each other.
893 * Return 0 on success, or a negative error value on failure.
894 */
895static int
896mext_check_arguments(struct inode *orig_inode,
897 struct inode *donor_inode, __u64 orig_start,
898 __u64 donor_start, __u64 *len, __u64 moved_len)
899{
900 /* Regular file check */
901 if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
902 ext4_debug("ext4 move extent: The argument files should be "
903 "regular file [ino:orig %lu, donor %lu]\n",
904 orig_inode->i_ino, donor_inode->i_ino);
905 return -EINVAL;
906 }
907
908 /* Ext4 move extent does not support swapfile */
909 if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
910 ext4_debug("ext4 move extent: The argument files should "
911 "not be swapfile [ino:orig %lu, donor %lu]\n",
912 orig_inode->i_ino, donor_inode->i_ino);
913 return -EINVAL;
914 }
915
916 /* Files should be in the same ext4 FS */
917 if (orig_inode->i_sb != donor_inode->i_sb) {
918 ext4_debug("ext4 move extent: The argument files "
919 "should be in same FS [ino:orig %lu, donor %lu]\n",
920 orig_inode->i_ino, donor_inode->i_ino);
921 return -EINVAL;
922 }
923
924 /* orig and donor should be different file */
925 if (orig_inode->i_ino == donor_inode->i_ino) {
926 ext4_debug("ext4 move extent: The argument files should not "
927 "be same file [ino:orig %lu, donor %lu]\n",
928 orig_inode->i_ino, donor_inode->i_ino);
929 return -EINVAL;
930 }
931
932 /* Ext4 move extent supports only extent based file */
933 if (!(EXT4_I(orig_inode)->i_flags & EXT4_EXTENTS_FL)) {
934 ext4_debug("ext4 move extent: orig file is not extents "
935 "based file [ino:orig %lu]\n", orig_inode->i_ino);
936 return -EOPNOTSUPP;
937 } else if (!(EXT4_I(donor_inode)->i_flags & EXT4_EXTENTS_FL)) {
938 ext4_debug("ext4 move extent: donor file is not extents "
939 "based file [ino:donor %lu]\n", donor_inode->i_ino);
940 return -EOPNOTSUPP;
941 }
942
943 if ((!orig_inode->i_size) || (!donor_inode->i_size)) {
944 ext4_debug("ext4 move extent: File size is 0 byte\n");
945 return -EINVAL;
946 }
947
948 /* Start offset should be same */
949 if (orig_start != donor_start) {
950 ext4_debug("ext4 move extent: orig and donor's start "
951 "offset are not same [ino:orig %lu, donor %lu]\n",
952 orig_inode->i_ino, donor_inode->i_ino);
953 return -EINVAL;
954 }
955
956 if (moved_len) {
957 ext4_debug("ext4 move extent: moved_len should be 0 "
958 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
959 donor_inode->i_ino);
960 return -EINVAL;
961 }
962
963 if ((orig_start > MAX_DEFRAG_SIZE) ||
964 (donor_start > MAX_DEFRAG_SIZE) ||
965 (*len > MAX_DEFRAG_SIZE) ||
966 (orig_start + *len > MAX_DEFRAG_SIZE)) {
967 ext4_debug("ext4 move extent: Can't handle over [%lu] blocks "
968 "[ino:orig %lu, donor %lu]\n", MAX_DEFRAG_SIZE,
969 orig_inode->i_ino, donor_inode->i_ino);
970 return -EINVAL;
971 }
972
973 if (orig_inode->i_size > donor_inode->i_size) {
974 if (orig_start >= donor_inode->i_size) {
975 ext4_debug("ext4 move extent: orig start offset "
976 "[%llu] should be less than donor file size "
977 "[%lld] [ino:orig %lu, donor_inode %lu]\n",
978 orig_start, donor_inode->i_size,
979 orig_inode->i_ino, donor_inode->i_ino);
980 return -EINVAL;
981 }
982
983 if (orig_start + *len > donor_inode->i_size) {
984 ext4_debug("ext4 move extent: End offset [%llu] should "
985 "be less than donor file size [%lld]."
986 "So adjust length from %llu to %lld "
987 "[ino:orig %lu, donor %lu]\n",
988 orig_start + *len, donor_inode->i_size,
989 *len, donor_inode->i_size - orig_start,
990 orig_inode->i_ino, donor_inode->i_ino);
991 *len = donor_inode->i_size - orig_start;
992 }
993 } else {
994 if (orig_start >= orig_inode->i_size) {
995 ext4_debug("ext4 move extent: start offset [%llu] "
996 "should be less than original file size "
997 "[%lld] [inode:orig %lu, donor %lu]\n",
998 orig_start, orig_inode->i_size,
999 orig_inode->i_ino, donor_inode->i_ino);
1000 return -EINVAL;
1001 }
1002
1003 if (orig_start + *len > orig_inode->i_size) {
1004 ext4_debug("ext4 move extent: Adjust length "
1005 "from %llu to %lld. Because it should be "
1006 "less than original file size "
1007 "[ino:orig %lu, donor %lu]\n",
1008 *len, orig_inode->i_size - orig_start,
1009 orig_inode->i_ino, donor_inode->i_ino);
1010 *len = orig_inode->i_size - orig_start;
1011 }
1012 }
1013
1014 if (!*len) {
1015 ext4_debug("ext4 move extent: len shoudld not be 0 "
1016 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
1017 donor_inode->i_ino);
1018 return -EINVAL;
1019 }
1020
1021 return 0;
1022}
1023
1024/**
1025 * mext_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
1026 *
1027 * @inode1: the inode structure
1028 * @inode2: the inode structure
1029 *
1030 * Lock two inodes' i_mutex by i_ino order. This function is moved from
1031 * fs/inode.c.
1032 */
1033static void
1034mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
1035{
1036 if (inode1 == NULL || inode2 == NULL || inode1 == inode2) {
1037 if (inode1)
1038 mutex_lock(&inode1->i_mutex);
1039 else if (inode2)
1040 mutex_lock(&inode2->i_mutex);
1041 return;
1042 }
1043
1044 if (inode1->i_ino < inode2->i_ino) {
1045 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
1046 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
1047 } else {
1048 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
1049 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
1050 }
1051}
1052
1053/**
1054 * mext_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
1055 *
1056 * @inode1: the inode that is released first
1057 * @inode2: the inode that is released second
1058 *
1059 * This function is moved from fs/inode.c.
1060 */
1061
1062static void
1063mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
1064{
1065 if (inode1)
1066 mutex_unlock(&inode1->i_mutex);
1067
1068 if (inode2 && inode2 != inode1)
1069 mutex_unlock(&inode2->i_mutex);
1070}
1071
1072/**
1073 * ext4_move_extents - Exchange the specified range of a file
1074 *
1075 * @o_filp: file structure of the original file
1076 * @d_filp: file structure of the donor file
1077 * @orig_start: start offset in block for orig
1078 * @donor_start: start offset in block for donor
1079 * @len: the number of blocks to be moved
1080 * @moved_len: moved block length
1081 *
1082 * This function returns 0 and moved block length is set in moved_len
1083 * if succeed, otherwise returns error value.
1084 *
1085 * Note: ext4_move_extents() proceeds the following order.
1086 * 1:ext4_move_extents() calculates the last block number of moving extent
1087 * function by the start block number (orig_start) and the number of blocks
1088 * to be moved (len) specified as arguments.
1089 * If the {orig, donor}_start points a hole, the extent's start offset
1090 * pointed by ext_cur (current extent), holecheck_path, orig_path are set
1091 * after hole behind.
1092 * 2:Continue step 3 to step 5, until the holecheck_path points to last_extent
1093 * or the ext_cur exceeds the block_end which is last logical block number.
1094 * 3:To get the length of continues area, call mext_next_extent()
1095 * specified with the ext_cur (initial value is holecheck_path) re-cursive,
1096 * until find un-continuous extent, the start logical block number exceeds
1097 * the block_end or the extent points to the last extent.
1098 * 4:Exchange the original inode data with donor inode data
1099 * from orig_page_offset to seq_end_page.
1100 * The start indexes of data are specified as arguments.
1101 * That of the original inode is orig_page_offset,
1102 * and the donor inode is also orig_page_offset
1103 * (To easily handle blocksize != pagesize case, the offset for the
1104 * donor inode is block unit).
1105 * 5:Update holecheck_path and orig_path to points a next proceeding extent,
1106 * then returns to step 2.
1107 * 6:Release holecheck_path, orig_path and set the len to moved_len
1108 * which shows the number of moved blocks.
1109 * The moved_len is useful for the command to calculate the file offset
1110 * for starting next move extent ioctl.
1111 * 7:Return 0 on success, or a negative error value on failure.
1112 */
1113int
1114ext4_move_extents(struct file *o_filp, struct file *d_filp,
1115 __u64 orig_start, __u64 donor_start, __u64 len,
1116 __u64 *moved_len)
1117{
1118 struct inode *orig_inode = o_filp->f_dentry->d_inode;
1119 struct inode *donor_inode = d_filp->f_dentry->d_inode;
1120 struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL;
1121 struct ext4_extent *ext_prev, *ext_cur, *ext_dummy;
1122 ext4_lblk_t block_start = orig_start;
1123 ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
1124 ext4_lblk_t rest_blocks;
1125 pgoff_t orig_page_offset = 0, seq_end_page;
1126 int ret, depth, last_extent = 0;
1127 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
1128 int data_offset_in_page;
1129 int block_len_in_page;
1130 int uninit;
1131
1132 /* protect orig and donor against a truncate */
1133 mext_inode_double_lock(orig_inode, donor_inode);
1134
1135 mext_double_down_read(orig_inode, donor_inode);
1136 /* Check the filesystem environment whether move_extent can be done */
1137 ret = mext_check_arguments(orig_inode, donor_inode, orig_start,
1138 donor_start, &len, *moved_len);
1139 mext_double_up_read(orig_inode, donor_inode);
1140 if (ret)
1141 goto out2;
1142
1143 file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits;
1144 block_end = block_start + len - 1;
1145 if (file_end < block_end)
1146 len -= block_end - file_end;
1147
1148 get_ext_path(orig_path, orig_inode, block_start, ret);
1149 if (orig_path == NULL)
1150 goto out2;
1151
1152 /* Get path structure to check the hole */
1153 get_ext_path(holecheck_path, orig_inode, block_start, ret);
1154 if (holecheck_path == NULL)
1155 goto out;
1156
1157 depth = ext_depth(orig_inode);
1158 ext_cur = holecheck_path[depth].p_ext;
1159 if (ext_cur == NULL) {
1160 ret = -EINVAL;
1161 goto out;
1162 }
1163
1164 /*
1165 * Get proper extent whose ee_block is beyond block_start
1166 * if block_start was within the hole.
1167 */
1168 if (le32_to_cpu(ext_cur->ee_block) +
1169 ext4_ext_get_actual_len(ext_cur) - 1 < block_start) {
1170 last_extent = mext_next_extent(orig_inode,
1171 holecheck_path, &ext_cur);
1172 if (last_extent < 0) {
1173 ret = last_extent;
1174 goto out;
1175 }
1176 last_extent = mext_next_extent(orig_inode, orig_path,
1177 &ext_dummy);
1178 if (last_extent < 0) {
1179 ret = last_extent;
1180 goto out;
1181 }
1182 }
1183 seq_start = block_start;
1184
1185 /* No blocks within the specified range. */
1186 if (le32_to_cpu(ext_cur->ee_block) > block_end) {
1187 ext4_debug("ext4 move extent: The specified range of file "
1188 "may be the hole\n");
1189 ret = -EINVAL;
1190 goto out;
1191 }
1192
1193 /* Adjust start blocks */
1194 add_blocks = min(le32_to_cpu(ext_cur->ee_block) +
1195 ext4_ext_get_actual_len(ext_cur), block_end + 1) -
1196 max(le32_to_cpu(ext_cur->ee_block), block_start);
1197
1198 while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) {
1199 seq_blocks += add_blocks;
1200
1201 /* Adjust tail blocks */
1202 if (seq_start + seq_blocks - 1 > block_end)
1203 seq_blocks = block_end - seq_start + 1;
1204
1205 ext_prev = ext_cur;
1206 last_extent = mext_next_extent(orig_inode, holecheck_path,
1207 &ext_cur);
1208 if (last_extent < 0) {
1209 ret = last_extent;
1210 break;
1211 }
1212 add_blocks = ext4_ext_get_actual_len(ext_cur);
1213
1214 /*
1215 * Extend the length of contiguous block (seq_blocks)
1216 * if extents are contiguous.
1217 */
1218 if (ext4_can_extents_be_merged(orig_inode,
1219 ext_prev, ext_cur) &&
1220 block_end >= le32_to_cpu(ext_cur->ee_block) &&
1221 !last_extent)
1222 continue;
1223
1224 /* Is original extent is uninitialized */
1225 uninit = ext4_ext_is_uninitialized(ext_prev);
1226
1227 data_offset_in_page = seq_start % blocks_per_page;
1228
1229 /*
1230 * Calculate data blocks count that should be swapped
1231 * at the first page.
1232 */
1233 if (data_offset_in_page + seq_blocks > blocks_per_page) {
1234 /* Swapped blocks are across pages */
1235 block_len_in_page =
1236 blocks_per_page - data_offset_in_page;
1237 } else {
1238 /* Swapped blocks are in a page */
1239 block_len_in_page = seq_blocks;
1240 }
1241
1242 orig_page_offset = seq_start >>
1243 (PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
1244 seq_end_page = (seq_start + seq_blocks - 1) >>
1245 (PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
1246 seq_start = le32_to_cpu(ext_cur->ee_block);
1247 rest_blocks = seq_blocks;
1248
1249 /* Discard preallocations of two inodes */
1250 down_write(&EXT4_I(orig_inode)->i_data_sem);
1251 ext4_discard_preallocations(orig_inode);
1252 up_write(&EXT4_I(orig_inode)->i_data_sem);
1253
1254 down_write(&EXT4_I(donor_inode)->i_data_sem);
1255 ext4_discard_preallocations(donor_inode);
1256 up_write(&EXT4_I(donor_inode)->i_data_sem);
1257
1258 while (orig_page_offset <= seq_end_page) {
1259
1260 /* Swap original branches with new branches */
1261 ret = move_extent_par_page(o_filp, donor_inode,
1262 orig_page_offset,
1263 data_offset_in_page,
1264 block_len_in_page, uninit);
1265 if (ret < 0)
1266 goto out;
1267 orig_page_offset++;
1268 /* Count how many blocks we have exchanged */
1269 *moved_len += block_len_in_page;
1270 BUG_ON(*moved_len > len);
1271
1272 data_offset_in_page = 0;
1273 rest_blocks -= block_len_in_page;
1274 if (rest_blocks > blocks_per_page)
1275 block_len_in_page = blocks_per_page;
1276 else
1277 block_len_in_page = rest_blocks;
1278 }
1279
1280 /* Decrease buffer counter */
1281 if (holecheck_path)
1282 ext4_ext_drop_refs(holecheck_path);
1283 get_ext_path(holecheck_path, orig_inode,
1284 seq_start, ret);
1285 if (holecheck_path == NULL)
1286 break;
1287 depth = holecheck_path->p_depth;
1288
1289 /* Decrease buffer counter */
1290 if (orig_path)
1291 ext4_ext_drop_refs(orig_path);
1292 get_ext_path(orig_path, orig_inode, seq_start, ret);
1293 if (orig_path == NULL)
1294 break;
1295
1296 ext_cur = holecheck_path[depth].p_ext;
1297 add_blocks = ext4_ext_get_actual_len(ext_cur);
1298 seq_blocks = 0;
1299
1300 }
1301out:
1302 if (orig_path) {
1303 ext4_ext_drop_refs(orig_path);
1304 kfree(orig_path);
1305 }
1306 if (holecheck_path) {
1307 ext4_ext_drop_refs(holecheck_path);
1308 kfree(holecheck_path);
1309 }
1310out2:
1311 mext_inode_double_unlock(orig_inode, donor_inode);
1312
1313 if (ret)
1314 return ret;
1315
1316 /* All of the specified blocks must be exchanged in succeed */
1317 BUG_ON(*moved_len != len);
1318
1319 return 0;
1320}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 07eb6649e4fa..de04013d16ff 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1782,7 +1782,7 @@ retry:
1782 if (IS_DIRSYNC(dir)) 1782 if (IS_DIRSYNC(dir))
1783 ext4_handle_sync(handle); 1783 ext4_handle_sync(handle);
1784 1784
1785 inode = ext4_new_inode (handle, dir, mode); 1785 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
1786 err = PTR_ERR(inode); 1786 err = PTR_ERR(inode);
1787 if (!IS_ERR(inode)) { 1787 if (!IS_ERR(inode)) {
1788 inode->i_op = &ext4_file_inode_operations; 1788 inode->i_op = &ext4_file_inode_operations;
@@ -1816,7 +1816,7 @@ retry:
1816 if (IS_DIRSYNC(dir)) 1816 if (IS_DIRSYNC(dir))
1817 ext4_handle_sync(handle); 1817 ext4_handle_sync(handle);
1818 1818
1819 inode = ext4_new_inode(handle, dir, mode); 1819 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
1820 err = PTR_ERR(inode); 1820 err = PTR_ERR(inode);
1821 if (!IS_ERR(inode)) { 1821 if (!IS_ERR(inode)) {
1822 init_special_inode(inode, inode->i_mode, rdev); 1822 init_special_inode(inode, inode->i_mode, rdev);
@@ -1853,7 +1853,8 @@ retry:
1853 if (IS_DIRSYNC(dir)) 1853 if (IS_DIRSYNC(dir))
1854 ext4_handle_sync(handle); 1854 ext4_handle_sync(handle);
1855 1855
1856 inode = ext4_new_inode(handle, dir, S_IFDIR | mode); 1856 inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
1857 &dentry->d_name, 0);
1857 err = PTR_ERR(inode); 1858 err = PTR_ERR(inode);
1858 if (IS_ERR(inode)) 1859 if (IS_ERR(inode))
1859 goto out_stop; 1860 goto out_stop;
@@ -2264,7 +2265,8 @@ retry:
2264 if (IS_DIRSYNC(dir)) 2265 if (IS_DIRSYNC(dir))
2265 ext4_handle_sync(handle); 2266 ext4_handle_sync(handle);
2266 2267
2267 inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO); 2268 inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
2269 &dentry->d_name, 0);
2268 err = PTR_ERR(inode); 2270 err = PTR_ERR(inode);
2269 if (IS_ERR(inode)) 2271 if (IS_ERR(inode))
2270 goto out_stop; 2272 goto out_stop;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 27eb289eea37..68b0351fc647 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1002,7 +1002,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1002 " too large to resize to %llu blocks safely\n", 1002 " too large to resize to %llu blocks safely\n",
1003 sb->s_id, n_blocks_count); 1003 sb->s_id, n_blocks_count);
1004 if (sizeof(sector_t) < 8) 1004 if (sizeof(sector_t) < 8)
1005 ext4_warning(sb, __func__, "CONFIG_LBD not enabled"); 1005 ext4_warning(sb, __func__, "CONFIG_LBDAF not enabled");
1006 return -EINVAL; 1006 return -EINVAL;
1007 } 1007 }
1008 1008
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 012c4251397e..8bb9e2d3e4b8 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -37,7 +37,6 @@
37#include <linux/seq_file.h> 37#include <linux/seq_file.h>
38#include <linux/proc_fs.h> 38#include <linux/proc_fs.h>
39#include <linux/ctype.h> 39#include <linux/ctype.h>
40#include <linux/marker.h>
41#include <linux/log2.h> 40#include <linux/log2.h>
42#include <linux/crc16.h> 41#include <linux/crc16.h>
43#include <asm/uaccess.h> 42#include <asm/uaccess.h>
@@ -47,6 +46,9 @@
47#include "xattr.h" 46#include "xattr.h"
48#include "acl.h" 47#include "acl.h"
49 48
49#define CREATE_TRACE_POINTS
50#include <trace/events/ext4.h>
51
50static int default_mb_history_length = 1000; 52static int default_mb_history_length = 1000;
51 53
52module_param_named(default_mb_history_length, default_mb_history_length, 54module_param_named(default_mb_history_length, default_mb_history_length,
@@ -301,7 +303,7 @@ static void ext4_handle_error(struct super_block *sb)
301 if (!test_opt(sb, ERRORS_CONT)) { 303 if (!test_opt(sb, ERRORS_CONT)) {
302 journal_t *journal = EXT4_SB(sb)->s_journal; 304 journal_t *journal = EXT4_SB(sb)->s_journal;
303 305
304 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 306 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
305 if (journal) 307 if (journal)
306 jbd2_journal_abort(journal, -EIO); 308 jbd2_journal_abort(journal, -EIO);
307 } 309 }
@@ -414,7 +416,7 @@ void ext4_abort(struct super_block *sb, const char *function,
414 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 416 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
415 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 417 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
416 sb->s_flags |= MS_RDONLY; 418 sb->s_flags |= MS_RDONLY;
417 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 419 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
418 if (EXT4_SB(sb)->s_journal) 420 if (EXT4_SB(sb)->s_journal)
419 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 421 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
420} 422}
@@ -1474,7 +1476,7 @@ set_qf_format:
1474 break; 1476 break;
1475#endif 1477#endif
1476 case Opt_abort: 1478 case Opt_abort:
1477 set_opt(sbi->s_mount_opt, ABORT); 1479 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1478 break; 1480 break;
1479 case Opt_nobarrier: 1481 case Opt_nobarrier:
1480 clear_opt(sbi->s_mount_opt, BARRIER); 1482 clear_opt(sbi->s_mount_opt, BARRIER);
@@ -1653,7 +1655,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1653 ext4_commit_super(sb, 1); 1655 ext4_commit_super(sb, 1);
1654 if (test_opt(sb, DEBUG)) 1656 if (test_opt(sb, DEBUG))
1655 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " 1657 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1656 "bpg=%lu, ipg=%lu, mo=%04lx]\n", 1658 "bpg=%lu, ipg=%lu, mo=%04x]\n",
1657 sb->s_blocksize, 1659 sb->s_blocksize,
1658 sbi->s_groups_count, 1660 sbi->s_groups_count,
1659 EXT4_BLOCKS_PER_GROUP(sb), 1661 EXT4_BLOCKS_PER_GROUP(sb),
@@ -1957,7 +1959,7 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
1957 /* small i_blocks in vfs inode? */ 1959 /* small i_blocks in vfs inode? */
1958 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 1960 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1959 /* 1961 /*
1960 * CONFIG_LBD is not enabled implies the inode 1962 * CONFIG_LBDAF is not enabled implies the inode
1961 * i_block represent total blocks in 512 bytes 1963 * i_block represent total blocks in 512 bytes
1962 * 32 == size of vfs inode i_blocks * 8 1964 * 32 == size of vfs inode i_blocks * 8
1963 */ 1965 */
@@ -2000,7 +2002,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
2000 2002
2001 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 2003 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
2002 /* 2004 /*
2003 * !has_huge_files or CONFIG_LBD not enabled implies that 2005 * !has_huge_files or CONFIG_LBDAF not enabled implies that
2004 * the inode i_block field represents total file blocks in 2006 * the inode i_block field represents total file blocks in
2005 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8 2007 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
2006 */ 2008 */
@@ -2204,6 +2206,7 @@ EXT4_RO_ATTR(session_write_kbytes);
2204EXT4_RO_ATTR(lifetime_write_kbytes); 2206EXT4_RO_ATTR(lifetime_write_kbytes);
2205EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, 2207EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
2206 inode_readahead_blks_store, s_inode_readahead_blks); 2208 inode_readahead_blks_store, s_inode_readahead_blks);
2209EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
2207EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); 2210EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
2208EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); 2211EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
2209EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); 2212EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
@@ -2216,6 +2219,7 @@ static struct attribute *ext4_attrs[] = {
2216 ATTR_LIST(session_write_kbytes), 2219 ATTR_LIST(session_write_kbytes),
2217 ATTR_LIST(lifetime_write_kbytes), 2220 ATTR_LIST(lifetime_write_kbytes),
2218 ATTR_LIST(inode_readahead_blks), 2221 ATTR_LIST(inode_readahead_blks),
2222 ATTR_LIST(inode_goal),
2219 ATTR_LIST(mb_stats), 2223 ATTR_LIST(mb_stats),
2220 ATTR_LIST(mb_max_to_scan), 2224 ATTR_LIST(mb_max_to_scan),
2221 ATTR_LIST(mb_min_to_scan), 2225 ATTR_LIST(mb_min_to_scan),
@@ -2436,13 +2440,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2436 if (has_huge_files) { 2440 if (has_huge_files) {
2437 /* 2441 /*
2438 * Large file size enabled file system can only be 2442 * Large file size enabled file system can only be
2439 * mount if kernel is build with CONFIG_LBD 2443 * mount if kernel is build with CONFIG_LBDAF
2440 */ 2444 */
2441 if (sizeof(root->i_blocks) < sizeof(u64) && 2445 if (sizeof(root->i_blocks) < sizeof(u64) &&
2442 !(sb->s_flags & MS_RDONLY)) { 2446 !(sb->s_flags & MS_RDONLY)) {
2443 ext4_msg(sb, KERN_ERR, "Filesystem with huge " 2447 ext4_msg(sb, KERN_ERR, "Filesystem with huge "
2444 "files cannot be mounted read-write " 2448 "files cannot be mounted read-write "
2445 "without CONFIG_LBD"); 2449 "without CONFIG_LBDAF");
2446 goto failed_mount; 2450 goto failed_mount;
2447 } 2451 }
2448 } 2452 }
@@ -2566,7 +2570,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2566 ext4_msg(sb, KERN_ERR, "filesystem" 2570 ext4_msg(sb, KERN_ERR, "filesystem"
2567 " too large to mount safely"); 2571 " too large to mount safely");
2568 if (sizeof(sector_t) < 8) 2572 if (sizeof(sector_t) < 8)
2569 ext4_msg(sb, KERN_WARNING, "CONFIG_LBD not enabled"); 2573 ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
2570 goto failed_mount; 2574 goto failed_mount;
2571 } 2575 }
2572 2576
@@ -3346,7 +3350,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
3346 int ret = 0; 3350 int ret = 0;
3347 tid_t target; 3351 tid_t target;
3348 3352
3349 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); 3353 trace_ext4_sync_fs(sb, wait);
3350 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { 3354 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
3351 if (wait) 3355 if (wait)
3352 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); 3356 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
@@ -3450,7 +3454,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3450 goto restore_opts; 3454 goto restore_opts;
3451 } 3455 }
3452 3456
3453 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) 3457 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
3454 ext4_abort(sb, __func__, "Abort forced by user"); 3458 ext4_abort(sb, __func__, "Abort forced by user");
3455 3459
3456 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 3460 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -3465,7 +3469,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3465 3469
3466 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 3470 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
3467 n_blocks_count > ext4_blocks_count(es)) { 3471 n_blocks_count > ext4_blocks_count(es)) {
3468 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) { 3472 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
3469 err = -EROFS; 3473 err = -EROFS;
3470 goto restore_opts; 3474 goto restore_opts;
3471 } 3475 }
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 304b411cb8bc..8970d8c49bb0 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -966,7 +966,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
966 966
967 opts->fs_uid = current_uid(); 967 opts->fs_uid = current_uid();
968 opts->fs_gid = current_gid(); 968 opts->fs_gid = current_gid();
969 opts->fs_fmask = current_umask(); 969 opts->fs_fmask = opts->fs_dmask = current_umask();
970 opts->allow_utime = -1; 970 opts->allow_utime = -1;
971 opts->codepage = fat_default_codepage; 971 opts->codepage = fat_default_codepage;
972 opts->iocharset = fat_default_iocharset; 972 opts->iocharset = fat_default_iocharset;
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index cad957cdb1e5..5971359d2090 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -1,6 +1,6 @@
1config GFS2_FS 1config GFS2_FS
2 tristate "GFS2 file system support" 2 tristate "GFS2 file system support"
3 depends on EXPERIMENTAL && (64BIT || LBD) 3 depends on EXPERIMENTAL && (64BIT || LBDAF)
4 select DLM if GFS2_FS_LOCKING_DLM 4 select DLM if GFS2_FS_LOCKING_DLM
5 select CONFIGFS_FS if GFS2_FS_LOCKING_DLM 5 select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
6 select SYSFS if GFS2_FS_LOCKING_DLM 6 select SYSFS if GFS2_FS_LOCKING_DLM
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 2f0dc5a14633..8ba5441063be 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -195,9 +195,8 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp,
195 * Do not report hidden files if so instructed, or associated 195 * Do not report hidden files if so instructed, or associated
196 * files unless instructed to do so 196 * files unless instructed to do so
197 */ 197 */
198 if ((sbi->s_hide == 'y' && 198 if ((sbi->s_hide && (de->flags[-sbi->s_high_sierra] & 1)) ||
199 (de->flags[-sbi->s_high_sierra] & 1)) || 199 (!sbi->s_showassoc &&
200 (sbi->s_showassoc =='n' &&
201 (de->flags[-sbi->s_high_sierra] & 4))) { 200 (de->flags[-sbi->s_high_sierra] & 4))) {
202 filp->f_pos += de_len; 201 filp->f_pos += de_len;
203 continue; 202 continue;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 068b34b5a107..58a7963e168a 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -141,13 +141,17 @@ static const struct dentry_operations isofs_dentry_ops[] = {
141}; 141};
142 142
143struct iso9660_options{ 143struct iso9660_options{
144 char map; 144 unsigned int rock:1;
145 char rock; 145 unsigned int cruft:1;
146 unsigned int hide:1;
147 unsigned int showassoc:1;
148 unsigned int nocompress:1;
149 unsigned int overriderockperm:1;
150 unsigned int uid_set:1;
151 unsigned int gid_set:1;
152 unsigned int utf8:1;
153 unsigned char map;
146 char joliet; 154 char joliet;
147 char cruft;
148 char hide;
149 char showassoc;
150 char nocompress;
151 unsigned char check; 155 unsigned char check;
152 unsigned int blocksize; 156 unsigned int blocksize;
153 mode_t fmode; 157 mode_t fmode;
@@ -155,7 +159,6 @@ struct iso9660_options{
155 gid_t gid; 159 gid_t gid;
156 uid_t uid; 160 uid_t uid;
157 char *iocharset; 161 char *iocharset;
158 unsigned char utf8;
159 /* LVE */ 162 /* LVE */
160 s32 session; 163 s32 session;
161 s32 sbsector; 164 s32 sbsector;
@@ -312,7 +315,7 @@ enum {
312 Opt_block, Opt_check_r, Opt_check_s, Opt_cruft, Opt_gid, Opt_ignore, 315 Opt_block, Opt_check_r, Opt_check_s, Opt_cruft, Opt_gid, Opt_ignore,
313 Opt_iocharset, Opt_map_a, Opt_map_n, Opt_map_o, Opt_mode, Opt_nojoliet, 316 Opt_iocharset, Opt_map_a, Opt_map_n, Opt_map_o, Opt_mode, Opt_nojoliet,
314 Opt_norock, Opt_sb, Opt_session, Opt_uid, Opt_unhide, Opt_utf8, Opt_err, 317 Opt_norock, Opt_sb, Opt_session, Opt_uid, Opt_unhide, Opt_utf8, Opt_err,
315 Opt_nocompress, Opt_hide, Opt_showassoc, Opt_dmode, 318 Opt_nocompress, Opt_hide, Opt_showassoc, Opt_dmode, Opt_overriderockperm,
316}; 319};
317 320
318static const match_table_t tokens = { 321static const match_table_t tokens = {
@@ -340,6 +343,7 @@ static const match_table_t tokens = {
340 {Opt_gid, "gid=%u"}, 343 {Opt_gid, "gid=%u"},
341 {Opt_mode, "mode=%u"}, 344 {Opt_mode, "mode=%u"},
342 {Opt_dmode, "dmode=%u"}, 345 {Opt_dmode, "dmode=%u"},
346 {Opt_overriderockperm, "overriderockperm"},
343 {Opt_block, "block=%u"}, 347 {Opt_block, "block=%u"},
344 {Opt_ignore, "conv=binary"}, 348 {Opt_ignore, "conv=binary"},
345 {Opt_ignore, "conv=b"}, 349 {Opt_ignore, "conv=b"},
@@ -359,24 +363,22 @@ static int parse_options(char *options, struct iso9660_options *popt)
359 int option; 363 int option;
360 364
361 popt->map = 'n'; 365 popt->map = 'n';
362 popt->rock = 'y'; 366 popt->rock = 1;
363 popt->joliet = 'y'; 367 popt->joliet = 1;
364 popt->cruft = 'n'; 368 popt->cruft = 0;
365 popt->hide = 'n'; 369 popt->hide = 0;
366 popt->showassoc = 'n'; 370 popt->showassoc = 0;
367 popt->check = 'u'; /* unset */ 371 popt->check = 'u'; /* unset */
368 popt->nocompress = 0; 372 popt->nocompress = 0;
369 popt->blocksize = 1024; 373 popt->blocksize = 1024;
370 popt->fmode = popt->dmode = S_IRUGO | S_IXUGO; /* 374 popt->fmode = popt->dmode = ISOFS_INVALID_MODE;
371 * r-x for all. The disc could 375 popt->uid_set = 0;
372 * be shared with DOS machines so 376 popt->gid_set = 0;
373 * virtually anything could be
374 * a valid executable.
375 */
376 popt->gid = 0; 377 popt->gid = 0;
377 popt->uid = 0; 378 popt->uid = 0;
378 popt->iocharset = NULL; 379 popt->iocharset = NULL;
379 popt->utf8 = 0; 380 popt->utf8 = 0;
381 popt->overriderockperm = 0;
380 popt->session=-1; 382 popt->session=-1;
381 popt->sbsector=-1; 383 popt->sbsector=-1;
382 if (!options) 384 if (!options)
@@ -393,20 +395,20 @@ static int parse_options(char *options, struct iso9660_options *popt)
393 token = match_token(p, tokens, args); 395 token = match_token(p, tokens, args);
394 switch (token) { 396 switch (token) {
395 case Opt_norock: 397 case Opt_norock:
396 popt->rock = 'n'; 398 popt->rock = 0;
397 break; 399 break;
398 case Opt_nojoliet: 400 case Opt_nojoliet:
399 popt->joliet = 'n'; 401 popt->joliet = 0;
400 break; 402 break;
401 case Opt_hide: 403 case Opt_hide:
402 popt->hide = 'y'; 404 popt->hide = 1;
403 break; 405 break;
404 case Opt_unhide: 406 case Opt_unhide:
405 case Opt_showassoc: 407 case Opt_showassoc:
406 popt->showassoc = 'y'; 408 popt->showassoc = 1;
407 break; 409 break;
408 case Opt_cruft: 410 case Opt_cruft:
409 popt->cruft = 'y'; 411 popt->cruft = 1;
410 break; 412 break;
411 case Opt_utf8: 413 case Opt_utf8:
412 popt->utf8 = 1; 414 popt->utf8 = 1;
@@ -450,11 +452,13 @@ static int parse_options(char *options, struct iso9660_options *popt)
450 if (match_int(&args[0], &option)) 452 if (match_int(&args[0], &option))
451 return 0; 453 return 0;
452 popt->uid = option; 454 popt->uid = option;
455 popt->uid_set = 1;
453 break; 456 break;
454 case Opt_gid: 457 case Opt_gid:
455 if (match_int(&args[0], &option)) 458 if (match_int(&args[0], &option))
456 return 0; 459 return 0;
457 popt->gid = option; 460 popt->gid = option;
461 popt->gid_set = 1;
458 break; 462 break;
459 case Opt_mode: 463 case Opt_mode:
460 if (match_int(&args[0], &option)) 464 if (match_int(&args[0], &option))
@@ -466,6 +470,9 @@ static int parse_options(char *options, struct iso9660_options *popt)
466 return 0; 470 return 0;
467 popt->dmode = option; 471 popt->dmode = option;
468 break; 472 break;
473 case Opt_overriderockperm:
474 popt->overriderockperm = 1;
475 break;
469 case Opt_block: 476 case Opt_block:
470 if (match_int(&args[0], &option)) 477 if (match_int(&args[0], &option))
471 return 0; 478 return 0;
@@ -650,7 +657,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
650 goto out_freebh; 657 goto out_freebh;
651 658
652 sbi->s_high_sierra = 1; 659 sbi->s_high_sierra = 1;
653 opt.rock = 'n'; 660 opt.rock = 0;
654 h_pri = (struct hs_primary_descriptor *)vdp; 661 h_pri = (struct hs_primary_descriptor *)vdp;
655 goto root_found; 662 goto root_found;
656 } 663 }
@@ -673,7 +680,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
673 680
674root_found: 681root_found:
675 682
676 if (joliet_level && (pri == NULL || opt.rock == 'n')) { 683 if (joliet_level && (pri == NULL || !opt.rock)) {
677 /* This is the case of Joliet with the norock mount flag. 684 /* This is the case of Joliet with the norock mount flag.
678 * A disc with both Joliet and Rock Ridge is handled later 685 * A disc with both Joliet and Rock Ridge is handled later
679 */ 686 */
@@ -802,22 +809,31 @@ root_found:
802 s->s_op = &isofs_sops; 809 s->s_op = &isofs_sops;
803 s->s_export_op = &isofs_export_ops; 810 s->s_export_op = &isofs_export_ops;
804 sbi->s_mapping = opt.map; 811 sbi->s_mapping = opt.map;
805 sbi->s_rock = (opt.rock == 'y' ? 2 : 0); 812 sbi->s_rock = (opt.rock ? 2 : 0);
806 sbi->s_rock_offset = -1; /* initial offset, will guess until SP is found*/ 813 sbi->s_rock_offset = -1; /* initial offset, will guess until SP is found*/
807 sbi->s_cruft = opt.cruft; 814 sbi->s_cruft = opt.cruft;
808 sbi->s_hide = opt.hide; 815 sbi->s_hide = opt.hide;
809 sbi->s_showassoc = opt.showassoc; 816 sbi->s_showassoc = opt.showassoc;
810 sbi->s_uid = opt.uid; 817 sbi->s_uid = opt.uid;
811 sbi->s_gid = opt.gid; 818 sbi->s_gid = opt.gid;
819 sbi->s_uid_set = opt.uid_set;
820 sbi->s_gid_set = opt.gid_set;
812 sbi->s_utf8 = opt.utf8; 821 sbi->s_utf8 = opt.utf8;
813 sbi->s_nocompress = opt.nocompress; 822 sbi->s_nocompress = opt.nocompress;
823 sbi->s_overriderockperm = opt.overriderockperm;
814 /* 824 /*
815 * It would be incredibly stupid to allow people to mark every file 825 * It would be incredibly stupid to allow people to mark every file
816 * on the disk as suid, so we merely allow them to set the default 826 * on the disk as suid, so we merely allow them to set the default
817 * permissions. 827 * permissions.
818 */ 828 */
819 sbi->s_fmode = opt.fmode & 0777; 829 if (opt.fmode != ISOFS_INVALID_MODE)
820 sbi->s_dmode = opt.dmode & 0777; 830 sbi->s_fmode = opt.fmode & 0777;
831 else
832 sbi->s_fmode = ISOFS_INVALID_MODE;
833 if (opt.dmode != ISOFS_INVALID_MODE)
834 sbi->s_dmode = opt.dmode & 0777;
835 else
836 sbi->s_dmode = ISOFS_INVALID_MODE;
821 837
822 /* 838 /*
823 * Read the root inode, which _may_ result in changing 839 * Read the root inode, which _may_ result in changing
@@ -1095,18 +1111,6 @@ static const struct address_space_operations isofs_aops = {
1095 .bmap = _isofs_bmap 1111 .bmap = _isofs_bmap
1096}; 1112};
1097 1113
1098static inline void test_and_set_uid(uid_t *p, uid_t value)
1099{
1100 if (value)
1101 *p = value;
1102}
1103
1104static inline void test_and_set_gid(gid_t *p, gid_t value)
1105{
1106 if (value)
1107 *p = value;
1108}
1109
1110static int isofs_read_level3_size(struct inode *inode) 1114static int isofs_read_level3_size(struct inode *inode)
1111{ 1115{
1112 unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); 1116 unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
@@ -1261,7 +1265,10 @@ static int isofs_read_inode(struct inode *inode)
1261 ei->i_file_format = isofs_file_normal; 1265 ei->i_file_format = isofs_file_normal;
1262 1266
1263 if (de->flags[-high_sierra] & 2) { 1267 if (de->flags[-high_sierra] & 2) {
1264 inode->i_mode = sbi->s_dmode | S_IFDIR; 1268 if (sbi->s_dmode != ISOFS_INVALID_MODE)
1269 inode->i_mode = S_IFDIR | sbi->s_dmode;
1270 else
1271 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
1265 inode->i_nlink = 1; /* 1272 inode->i_nlink = 1; /*
1266 * Set to 1. We know there are 2, but 1273 * Set to 1. We know there are 2, but
1267 * the find utility tries to optimize 1274 * the find utility tries to optimize
@@ -1270,8 +1277,16 @@ static int isofs_read_inode(struct inode *inode)
1270 * do it the hard way. 1277 * do it the hard way.
1271 */ 1278 */
1272 } else { 1279 } else {
1273 /* Everybody gets to read the file. */ 1280 if (sbi->s_fmode != ISOFS_INVALID_MODE) {
1274 inode->i_mode = sbi->s_fmode | S_IFREG; 1281 inode->i_mode = S_IFREG | sbi->s_fmode;
1282 } else {
1283 /*
1284 * Set default permissions: r-x for all. The disc
1285 * could be shared with DOS machines so virtually
1286 * anything could be a valid executable.
1287 */
1288 inode->i_mode = S_IFREG | S_IRUGO | S_IXUGO;
1289 }
1275 inode->i_nlink = 1; 1290 inode->i_nlink = 1;
1276 } 1291 }
1277 inode->i_uid = sbi->s_uid; 1292 inode->i_uid = sbi->s_uid;
@@ -1300,7 +1315,7 @@ static int isofs_read_inode(struct inode *inode)
1300 * this CDROM was mounted with the cruft option. 1315 * this CDROM was mounted with the cruft option.
1301 */ 1316 */
1302 1317
1303 if (sbi->s_cruft == 'y') 1318 if (sbi->s_cruft)
1304 inode->i_size &= 0x00ffffff; 1319 inode->i_size &= 0x00ffffff;
1305 1320
1306 if (de->interleave[0]) { 1321 if (de->interleave[0]) {
@@ -1346,9 +1361,18 @@ static int isofs_read_inode(struct inode *inode)
1346 if (!high_sierra) { 1361 if (!high_sierra) {
1347 parse_rock_ridge_inode(de, inode); 1362 parse_rock_ridge_inode(de, inode);
1348 /* if we want uid/gid set, override the rock ridge setting */ 1363 /* if we want uid/gid set, override the rock ridge setting */
1349 test_and_set_uid(&inode->i_uid, sbi->s_uid); 1364 if (sbi->s_uid_set)
1350 test_and_set_gid(&inode->i_gid, sbi->s_gid); 1365 inode->i_uid = sbi->s_uid;
1366 if (sbi->s_gid_set)
1367 inode->i_gid = sbi->s_gid;
1351 } 1368 }
1369 /* Now set final access rights if overriding rock ridge setting */
1370 if (S_ISDIR(inode->i_mode) && sbi->s_overriderockperm &&
1371 sbi->s_dmode != ISOFS_INVALID_MODE)
1372 inode->i_mode = S_IFDIR | sbi->s_dmode;
1373 if (S_ISREG(inode->i_mode) && sbi->s_overriderockperm &&
1374 sbi->s_fmode != ISOFS_INVALID_MODE)
1375 inode->i_mode = S_IFREG | sbi->s_fmode;
1352 1376
1353 /* Install the inode operations vector */ 1377 /* Install the inode operations vector */
1354 if (S_ISREG(inode->i_mode)) { 1378 if (S_ISREG(inode->i_mode)) {
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index ccbf72faf27a..7d33de84f52a 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -35,21 +35,20 @@ struct isofs_sb_info {
35 unsigned long s_log_zone_size; 35 unsigned long s_log_zone_size;
36 unsigned long s_max_size; 36 unsigned long s_max_size;
37 37
38 unsigned char s_high_sierra; /* A simple flag */
39 unsigned char s_mapping;
40 int s_rock_offset; /* offset of SUSP fields within SU area */ 38 int s_rock_offset; /* offset of SUSP fields within SU area */
41 unsigned char s_rock;
42 unsigned char s_joliet_level; 39 unsigned char s_joliet_level;
43 unsigned char s_utf8; 40 unsigned char s_mapping;
44 unsigned char s_cruft; /* Broken disks with high 41 unsigned int s_high_sierra:1;
45 byte of length containing 42 unsigned int s_rock:2;
46 junk */ 43 unsigned int s_utf8:1;
47 unsigned char s_unhide; 44 unsigned int s_cruft:1; /* Broken disks with high byte of length
48 unsigned char s_nosuid; 45 * containing junk */
49 unsigned char s_nodev; 46 unsigned int s_nocompress:1;
50 unsigned char s_nocompress; 47 unsigned int s_hide:1;
51 unsigned char s_hide; 48 unsigned int s_showassoc:1;
52 unsigned char s_showassoc; 49 unsigned int s_overriderockperm:1;
50 unsigned int s_uid_set:1;
51 unsigned int s_gid_set:1;
53 52
54 mode_t s_fmode; 53 mode_t s_fmode;
55 mode_t s_dmode; 54 mode_t s_dmode;
@@ -58,6 +57,8 @@ struct isofs_sb_info {
58 struct nls_table *s_nls_iocharset; /* Native language support table */ 57 struct nls_table *s_nls_iocharset; /* Native language support table */
59}; 58};
60 59
60#define ISOFS_INVALID_MODE ((mode_t) -1)
61
61static inline struct isofs_sb_info *ISOFS_SB(struct super_block *sb) 62static inline struct isofs_sb_info *ISOFS_SB(struct super_block *sb)
62{ 63{
63 return sb->s_fs_info; 64 return sb->s_fs_info;
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 8299889a835e..eaa831311c9c 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -142,9 +142,9 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
142 */ 142 */
143 match = 0; 143 match = 0;
144 if (dlen > 0 && 144 if (dlen > 0 &&
145 (sbi->s_hide =='n' || 145 (!sbi->s_hide ||
146 (!(de->flags[-sbi->s_high_sierra] & 1))) && 146 (!(de->flags[-sbi->s_high_sierra] & 1))) &&
147 (sbi->s_showassoc =='y' || 147 (sbi->s_showassoc ||
148 (!(de->flags[-sbi->s_high_sierra] & 4)))) { 148 (!(de->flags[-sbi->s_high_sierra] & 4)))) {
149 match = (isofs_cmp(dentry, dpnt, dlen) == 0); 149 match = (isofs_cmp(dentry, dpnt, dlen) == 0);
150 } 150 }
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index ed886e6db399..73242ba7c7b1 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1686,35 +1686,6 @@ out:
1686 return; 1686 return;
1687} 1687}
1688 1688
1689/*
1690 * journal_try_to_free_buffers() could race with journal_commit_transaction()
1691 * The latter might still hold the a count on buffers when inspecting
1692 * them on t_syncdata_list or t_locked_list.
1693 *
1694 * journal_try_to_free_buffers() will call this function to
1695 * wait for the current transaction to finish syncing data buffers, before
1696 * tryinf to free that buffer.
1697 *
1698 * Called with journal->j_state_lock held.
1699 */
1700static void journal_wait_for_transaction_sync_data(journal_t *journal)
1701{
1702 transaction_t *transaction = NULL;
1703 tid_t tid;
1704
1705 spin_lock(&journal->j_state_lock);
1706 transaction = journal->j_committing_transaction;
1707
1708 if (!transaction) {
1709 spin_unlock(&journal->j_state_lock);
1710 return;
1711 }
1712
1713 tid = transaction->t_tid;
1714 spin_unlock(&journal->j_state_lock);
1715 log_wait_commit(journal, tid);
1716}
1717
1718/** 1689/**
1719 * int journal_try_to_free_buffers() - try to free page buffers. 1690 * int journal_try_to_free_buffers() - try to free page buffers.
1720 * @journal: journal for operation 1691 * @journal: journal for operation
@@ -1786,25 +1757,6 @@ int journal_try_to_free_buffers(journal_t *journal,
1786 1757
1787 ret = try_to_free_buffers(page); 1758 ret = try_to_free_buffers(page);
1788 1759
1789 /*
1790 * There are a number of places where journal_try_to_free_buffers()
1791 * could race with journal_commit_transaction(), the later still
1792 * holds the reference to the buffers to free while processing them.
1793 * try_to_free_buffers() failed to free those buffers. Some of the
1794 * caller of releasepage() request page buffers to be dropped, otherwise
1795 * treat the fail-to-free as errors (such as generic_file_direct_IO())
1796 *
1797 * So, if the caller of try_to_release_page() wants the synchronous
1798 * behaviour(i.e make sure buffers are dropped upon return),
1799 * let's wait for the current transaction to finish flush of
1800 * dirty data buffers, then try to free those buffers again,
1801 * with the journal locked.
1802 */
1803 if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
1804 journal_wait_for_transaction_sync_data(journal);
1805 ret = try_to_free_buffers(page);
1806 }
1807
1808busy: 1760busy:
1809 return ret; 1761 return ret;
1810} 1762}
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 17159cacbd9e..5d70b3e6d49b 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -20,9 +20,9 @@
20#include <linux/time.h> 20#include <linux/time.h>
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/jbd2.h> 22#include <linux/jbd2.h>
23#include <linux/marker.h>
24#include <linux/errno.h> 23#include <linux/errno.h>
25#include <linux/slab.h> 24#include <linux/slab.h>
25#include <trace/events/jbd2.h>
26 26
27/* 27/*
28 * Unlink a buffer from a transaction checkpoint list. 28 * Unlink a buffer from a transaction checkpoint list.
@@ -358,8 +358,7 @@ int jbd2_log_do_checkpoint(journal_t *journal)
358 * journal straight away. 358 * journal straight away.
359 */ 359 */
360 result = jbd2_cleanup_journal_tail(journal); 360 result = jbd2_cleanup_journal_tail(journal);
361 trace_mark(jbd2_checkpoint, "dev %s need_checkpoint %d", 361 trace_jbd2_checkpoint(journal, result);
362 journal->j_devname, result);
363 jbd_debug(1, "cleanup_journal_tail returned %d\n", result); 362 jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
364 if (result <= 0) 363 if (result <= 0)
365 return result; 364 return result;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 0b7d3b8226fd..7b4088b2364d 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -16,7 +16,6 @@
16#include <linux/time.h> 16#include <linux/time.h>
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/jbd2.h> 18#include <linux/jbd2.h>
19#include <linux/marker.h>
20#include <linux/errno.h> 19#include <linux/errno.h>
21#include <linux/slab.h> 20#include <linux/slab.h>
22#include <linux/mm.h> 21#include <linux/mm.h>
@@ -26,6 +25,7 @@
26#include <linux/writeback.h> 25#include <linux/writeback.h>
27#include <linux/backing-dev.h> 26#include <linux/backing-dev.h>
28#include <linux/bio.h> 27#include <linux/bio.h>
28#include <trace/events/jbd2.h>
29 29
30/* 30/*
31 * Default IO end handler for temporary BJ_IO buffer_heads. 31 * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -253,6 +253,7 @@ static int journal_submit_data_buffers(journal_t *journal,
253 * block allocation with delalloc. We need to write 253 * block allocation with delalloc. We need to write
254 * only allocated blocks here. 254 * only allocated blocks here.
255 */ 255 */
256 trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
256 err = journal_submit_inode_data_buffers(mapping); 257 err = journal_submit_inode_data_buffers(mapping);
257 if (!ret) 258 if (!ret)
258 ret = err; 259 ret = err;
@@ -394,8 +395,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
394 commit_transaction = journal->j_running_transaction; 395 commit_transaction = journal->j_running_transaction;
395 J_ASSERT(commit_transaction->t_state == T_RUNNING); 396 J_ASSERT(commit_transaction->t_state == T_RUNNING);
396 397
397 trace_mark(jbd2_start_commit, "dev %s transaction %d", 398 trace_jbd2_start_commit(journal, commit_transaction);
398 journal->j_devname, commit_transaction->t_tid);
399 jbd_debug(1, "JBD: starting commit of transaction %d\n", 399 jbd_debug(1, "JBD: starting commit of transaction %d\n",
400 commit_transaction->t_tid); 400 commit_transaction->t_tid);
401 401
@@ -409,6 +409,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
409 */ 409 */
410 if (commit_transaction->t_synchronous_commit) 410 if (commit_transaction->t_synchronous_commit)
411 write_op = WRITE_SYNC_PLUG; 411 write_op = WRITE_SYNC_PLUG;
412 trace_jbd2_commit_locking(journal, commit_transaction);
412 stats.u.run.rs_wait = commit_transaction->t_max_wait; 413 stats.u.run.rs_wait = commit_transaction->t_max_wait;
413 stats.u.run.rs_locked = jiffies; 414 stats.u.run.rs_locked = jiffies;
414 stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start, 415 stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
@@ -484,6 +485,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
484 */ 485 */
485 jbd2_journal_switch_revoke_table(journal); 486 jbd2_journal_switch_revoke_table(journal);
486 487
488 trace_jbd2_commit_flushing(journal, commit_transaction);
487 stats.u.run.rs_flushing = jiffies; 489 stats.u.run.rs_flushing = jiffies;
488 stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked, 490 stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked,
489 stats.u.run.rs_flushing); 491 stats.u.run.rs_flushing);
@@ -520,6 +522,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
520 commit_transaction->t_state = T_COMMIT; 522 commit_transaction->t_state = T_COMMIT;
521 spin_unlock(&journal->j_state_lock); 523 spin_unlock(&journal->j_state_lock);
522 524
525 trace_jbd2_commit_logging(journal, commit_transaction);
523 stats.u.run.rs_logging = jiffies; 526 stats.u.run.rs_logging = jiffies;
524 stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing, 527 stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing,
525 stats.u.run.rs_logging); 528 stats.u.run.rs_logging);
@@ -1054,9 +1057,7 @@ restart_loop:
1054 if (journal->j_commit_callback) 1057 if (journal->j_commit_callback)
1055 journal->j_commit_callback(journal, commit_transaction); 1058 journal->j_commit_callback(journal, commit_transaction);
1056 1059
1057 trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", 1060 trace_jbd2_end_commit(journal, commit_transaction);
1058 journal->j_devname, commit_transaction->t_tid,
1059 journal->j_tail_sequence);
1060 jbd_debug(1, "JBD: commit %d complete, head %d\n", 1061 jbd_debug(1, "JBD: commit %d complete, head %d\n",
1061 journal->j_commit_sequence, journal->j_tail_sequence); 1062 journal->j_commit_sequence, journal->j_tail_sequence);
1062 if (to_free) 1063 if (to_free)
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 62be7d294ec2..18bfd5dab642 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -38,6 +38,10 @@
38#include <linux/debugfs.h> 38#include <linux/debugfs.h>
39#include <linux/seq_file.h> 39#include <linux/seq_file.h>
40#include <linux/math64.h> 40#include <linux/math64.h>
41#include <linux/hash.h>
42
43#define CREATE_TRACE_POINTS
44#include <trace/events/jbd2.h>
41 45
42#include <asm/uaccess.h> 46#include <asm/uaccess.h>
43#include <asm/page.h> 47#include <asm/page.h>
@@ -2377,6 +2381,71 @@ static void __exit journal_exit(void)
2377 jbd2_journal_destroy_caches(); 2381 jbd2_journal_destroy_caches();
2378} 2382}
2379 2383
2384/*
2385 * jbd2_dev_to_name is a utility function used by the jbd2 and ext4
2386 * tracing infrastructure to map a dev_t to a device name.
2387 *
2388 * The caller should use rcu_read_lock() in order to make sure the
2389 * device name stays valid until its done with it. We use
2390 * rcu_read_lock() as well to make sure we're safe in case the caller
2391 * gets sloppy, and because rcu_read_lock() is cheap and can be safely
2392 * nested.
2393 */
2394struct devname_cache {
2395 struct rcu_head rcu;
2396 dev_t device;
2397 char devname[BDEVNAME_SIZE];
2398};
2399#define CACHE_SIZE_BITS 6
2400static struct devname_cache *devcache[1 << CACHE_SIZE_BITS];
2401static DEFINE_SPINLOCK(devname_cache_lock);
2402
2403static void free_devcache(struct rcu_head *rcu)
2404{
2405 kfree(rcu);
2406}
2407
2408const char *jbd2_dev_to_name(dev_t device)
2409{
2410 int i = hash_32(device, CACHE_SIZE_BITS);
2411 char *ret;
2412 struct block_device *bd;
2413
2414 rcu_read_lock();
2415 if (devcache[i] && devcache[i]->device == device) {
2416 ret = devcache[i]->devname;
2417 rcu_read_unlock();
2418 return ret;
2419 }
2420 rcu_read_unlock();
2421
2422 spin_lock(&devname_cache_lock);
2423 if (devcache[i]) {
2424 if (devcache[i]->device == device) {
2425 ret = devcache[i]->devname;
2426 spin_unlock(&devname_cache_lock);
2427 return ret;
2428 }
2429 call_rcu(&devcache[i]->rcu, free_devcache);
2430 }
2431 devcache[i] = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
2432 if (!devcache[i]) {
2433 spin_unlock(&devname_cache_lock);
2434 return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
2435 }
2436 devcache[i]->device = device;
2437 bd = bdget(device);
2438 if (bd) {
2439 bdevname(bd, devcache[i]->devname);
2440 bdput(bd);
2441 } else
2442 __bdevname(device, devcache[i]->devname);
2443 ret = devcache[i]->devname;
2444 spin_unlock(&devname_cache_lock);
2445 return ret;
2446}
2447EXPORT_SYMBOL(jbd2_dev_to_name);
2448
2380MODULE_LICENSE("GPL"); 2449MODULE_LICENSE("GPL");
2381module_init(journal_init); 2450module_init(journal_init);
2382module_exit(journal_exit); 2451module_exit(journal_exit);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 996ffda06bf3..494501edba6b 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1547,36 +1547,6 @@ out:
1547 return; 1547 return;
1548} 1548}
1549 1549
1550/*
1551 * jbd2_journal_try_to_free_buffers() could race with
1552 * jbd2_journal_commit_transaction(). The later might still hold the
1553 * reference count to the buffers when inspecting them on
1554 * t_syncdata_list or t_locked_list.
1555 *
1556 * jbd2_journal_try_to_free_buffers() will call this function to
1557 * wait for the current transaction to finish syncing data buffers, before
1558 * try to free that buffer.
1559 *
1560 * Called with journal->j_state_lock hold.
1561 */
1562static void jbd2_journal_wait_for_transaction_sync_data(journal_t *journal)
1563{
1564 transaction_t *transaction;
1565 tid_t tid;
1566
1567 spin_lock(&journal->j_state_lock);
1568 transaction = journal->j_committing_transaction;
1569
1570 if (!transaction) {
1571 spin_unlock(&journal->j_state_lock);
1572 return;
1573 }
1574
1575 tid = transaction->t_tid;
1576 spin_unlock(&journal->j_state_lock);
1577 jbd2_log_wait_commit(journal, tid);
1578}
1579
1580/** 1550/**
1581 * int jbd2_journal_try_to_free_buffers() - try to free page buffers. 1551 * int jbd2_journal_try_to_free_buffers() - try to free page buffers.
1582 * @journal: journal for operation 1552 * @journal: journal for operation
@@ -1649,25 +1619,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
1649 1619
1650 ret = try_to_free_buffers(page); 1620 ret = try_to_free_buffers(page);
1651 1621
1652 /*
1653 * There are a number of places where jbd2_journal_try_to_free_buffers()
1654 * could race with jbd2_journal_commit_transaction(), the later still
1655 * holds the reference to the buffers to free while processing them.
1656 * try_to_free_buffers() failed to free those buffers. Some of the
1657 * caller of releasepage() request page buffers to be dropped, otherwise
1658 * treat the fail-to-free as errors (such as generic_file_direct_IO())
1659 *
1660 * So, if the caller of try_to_release_page() wants the synchronous
1661 * behaviour(i.e make sure buffers are dropped upon return),
1662 * let's wait for the current transaction to finish flush of
1663 * dirty data buffers, then try to free those buffers again,
1664 * with the journal locked.
1665 */
1666 if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
1667 jbd2_journal_wait_for_transaction_sync_data(journal);
1668 ret = try_to_free_buffers(page);
1669 }
1670
1671busy: 1622busy:
1672 return ret; 1623 return ret;
1673} 1624}
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index ea2605a58b8a..f234f3a4c8ca 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -15,7 +15,8 @@ struct inotify_inode_mark_entry {
15 int wd; 15 int wd;
16}; 16};
17 17
18extern void inotify_destroy_mark_entry(struct fsnotify_mark_entry *entry, struct fsnotify_group *group); 18extern void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
19 struct fsnotify_group *group);
19extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv); 20extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv);
20 21
21extern const struct fsnotify_ops inotify_fsnotify_ops; 22extern const struct fsnotify_ops inotify_fsnotify_ops;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 7ef75b83247e..47cd258fd24d 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -81,7 +81,7 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
81 81
82static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group) 82static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group)
83{ 83{
84 inotify_destroy_mark_entry(entry, group); 84 inotify_ignored_and_remove_idr(entry, group);
85} 85}
86 86
87static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, __u32 mask) 87static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, __u32 mask)
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 982a412ac5bc..ff231ad23895 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -363,39 +363,17 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns
363} 363}
364 364
365/* 365/*
366 * When, for whatever reason, inotify is done with a mark (or what used to be a 366 * Send IN_IGNORED for this wd, remove this wd from the idr, and drop the
367 * watch) we need to remove that watch from the idr and we need to send IN_IGNORED 367 * internal reference help on the mark because it is in the idr.
368 * for the given wd.
369 *
370 * There is a bit of recursion here. The loop looks like:
371 * inotify_destroy_mark_entry -> fsnotify_destroy_mark_by_entry ->
372 * inotify_freeing_mark -> inotify_destory_mark_entry -> restart
373 * But the loop is broken in 2 places. fsnotify_destroy_mark_by_entry sets
374 * entry->group = NULL before the call to inotify_freeing_mark, so the if (egroup)
375 * test below will not call back to fsnotify again. But even if that test wasn't
376 * there this would still be safe since fsnotify_destroy_mark_by_entry() is
377 * safe from recursion.
378 */ 368 */
379void inotify_destroy_mark_entry(struct fsnotify_mark_entry *entry, struct fsnotify_group *group) 369void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
370 struct fsnotify_group *group)
380{ 371{
381 struct inotify_inode_mark_entry *ientry; 372 struct inotify_inode_mark_entry *ientry;
382 struct inotify_event_private_data *event_priv; 373 struct inotify_event_private_data *event_priv;
383 struct fsnotify_event_private_data *fsn_event_priv; 374 struct fsnotify_event_private_data *fsn_event_priv;
384 struct fsnotify_group *egroup;
385 struct idr *idr; 375 struct idr *idr;
386 376
387 spin_lock(&entry->lock);
388 egroup = entry->group;
389
390 /* if egroup we aren't really done and something might still send events
391 * for this inode, on the callback we'll send the IN_IGNORED */
392 if (egroup) {
393 spin_unlock(&entry->lock);
394 fsnotify_destroy_mark_by_entry(entry);
395 return;
396 }
397 spin_unlock(&entry->lock);
398
399 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); 377 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
400 378
401 event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL); 379 event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
@@ -699,7 +677,7 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
699 fsnotify_get_mark(entry); 677 fsnotify_get_mark(entry);
700 spin_unlock(&group->inotify_data.idr_lock); 678 spin_unlock(&group->inotify_data.idr_lock);
701 679
702 inotify_destroy_mark_entry(entry, group); 680 fsnotify_destroy_mark_by_entry(entry);
703 fsnotify_put_mark(entry); 681 fsnotify_put_mark(entry);
704 682
705out: 683out:
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index d33767f17ba3..0d3ed7407a04 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -552,7 +552,7 @@ static unsigned long long ocfs2_max_file_offset(unsigned int bbits,
552 */ 552 */
553 553
554#if BITS_PER_LONG == 32 554#if BITS_PER_LONG == 32
555# if defined(CONFIG_LBD) 555# if defined(CONFIG_LBDAF)
556 BUILD_BUG_ON(sizeof(sector_t) != 8); 556 BUILD_BUG_ON(sizeof(sector_t) != 8);
557 /* 557 /*
558 * We might be limited by page cache size. 558 * We might be limited by page cache size.
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 63d965193b22..11a7b5c68153 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -18,6 +18,7 @@ proc-y += meminfo.o
18proc-y += stat.o 18proc-y += stat.o
19proc-y += uptime.o 19proc-y += uptime.o
20proc-y += version.o 20proc-y += version.o
21proc-y += softirqs.o
21proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o 22proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
22proc-$(CONFIG_NET) += proc_net.o 23proc-$(CONFIG_NET) += proc_net.o
23proc-$(CONFIG_PROC_KCORE) += kcore.o 24proc-$(CONFIG_PROC_KCORE) += kcore.o
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index fc6c3025befd..7ba79a54948c 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -195,20 +195,20 @@ void proc_device_tree_add_node(struct device_node *np,
195 p = fixup_name(np, de, p); 195 p = fixup_name(np, de, p);
196 196
197 ent = proc_mkdir(p, de); 197 ent = proc_mkdir(p, de);
198 if (ent == 0) 198 if (ent == NULL)
199 break; 199 break;
200 proc_device_tree_add_node(child, ent); 200 proc_device_tree_add_node(child, ent);
201 } 201 }
202 of_node_put(child); 202 of_node_put(child);
203 203
204 for (pp = np->properties; pp != 0; pp = pp->next) { 204 for (pp = np->properties; pp != NULL; pp = pp->next) {
205 p = pp->name; 205 p = pp->name;
206 206
207 if (duplicate_name(de, p)) 207 if (duplicate_name(de, p))
208 p = fixup_name(np, de, p); 208 p = fixup_name(np, de, p);
209 209
210 ent = __proc_device_tree_add_prop(de, pp, p); 210 ent = __proc_device_tree_add_prop(de, pp, p);
211 if (ent == 0) 211 if (ent == NULL)
212 break; 212 break;
213 } 213 }
214} 214}
@@ -221,10 +221,10 @@ void __init proc_device_tree_init(void)
221 struct device_node *root; 221 struct device_node *root;
222 222
223 proc_device_tree = proc_mkdir("device-tree", NULL); 223 proc_device_tree = proc_mkdir("device-tree", NULL);
224 if (proc_device_tree == 0) 224 if (proc_device_tree == NULL)
225 return; 225 return;
226 root = of_find_node_by_path("/"); 226 root = of_find_node_by_path("/");
227 if (root == 0) { 227 if (root == NULL) {
228 printk(KERN_ERR "/proc/device-tree: can't find root\n"); 228 printk(KERN_ERR "/proc/device-tree: can't find root\n");
229 return; 229 return;
230 } 230 }
diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c
new file mode 100644
index 000000000000..1807c2419f17
--- /dev/null
+++ b/fs/proc/softirqs.c
@@ -0,0 +1,44 @@
1#include <linux/init.h>
2#include <linux/kernel_stat.h>
3#include <linux/proc_fs.h>
4#include <linux/seq_file.h>
5
6/*
7 * /proc/softirqs ... display the number of softirqs
8 */
9static int show_softirqs(struct seq_file *p, void *v)
10{
11 int i, j;
12
13 seq_printf(p, " ");
14 for_each_possible_cpu(i)
15 seq_printf(p, "CPU%-8d", i);
16 seq_printf(p, "\n");
17
18 for (i = 0; i < NR_SOFTIRQS; i++) {
19 seq_printf(p, "%8s:", softirq_to_name[i]);
20 for_each_possible_cpu(j)
21 seq_printf(p, " %10u", kstat_softirqs_cpu(i, j));
22 seq_printf(p, "\n");
23 }
24 return 0;
25}
26
27static int softirqs_open(struct inode *inode, struct file *file)
28{
29 return single_open(file, show_softirqs, NULL);
30}
31
32static const struct file_operations proc_softirqs_operations = {
33 .open = softirqs_open,
34 .read = seq_read,
35 .llseek = seq_lseek,
36 .release = single_release,
37};
38
39static int __init proc_softirqs_init(void)
40{
41 proc_create("softirqs", 0, NULL, &proc_softirqs_operations);
42 return 0;
43}
44module_init(proc_softirqs_init);
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 81e4eb60972e..7cc726c6d70a 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -29,6 +29,8 @@ static int show_stat(struct seq_file *p, void *v)
29 cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; 29 cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
30 cputime64_t guest; 30 cputime64_t guest;
31 u64 sum = 0; 31 u64 sum = 0;
32 u64 sum_softirq = 0;
33 unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
32 struct timespec boottime; 34 struct timespec boottime;
33 unsigned int per_irq_sum; 35 unsigned int per_irq_sum;
34 36
@@ -53,6 +55,13 @@ static int show_stat(struct seq_file *p, void *v)
53 sum += kstat_irqs_cpu(j, i); 55 sum += kstat_irqs_cpu(j, i);
54 } 56 }
55 sum += arch_irq_stat_cpu(i); 57 sum += arch_irq_stat_cpu(i);
58
59 for (j = 0; j < NR_SOFTIRQS; j++) {
60 unsigned int softirq_stat = kstat_softirqs_cpu(j, i);
61
62 per_softirq_sums[j] += softirq_stat;
63 sum_softirq += softirq_stat;
64 }
56 } 65 }
57 sum += arch_irq_stat(); 66 sum += arch_irq_stat();
58 67
@@ -115,6 +124,12 @@ static int show_stat(struct seq_file *p, void *v)
115 nr_running(), 124 nr_running(),
116 nr_iowait()); 125 nr_iowait());
117 126
127 seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq);
128
129 for (i = 0; i < NR_SOFTIRQS; i++)
130 seq_printf(p, " %u", per_softirq_sums[i]);
131 seq_printf(p, "\n");
132
118 return 0; 133 return 0;
119} 134}
120 135
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 5edcc3f92ba7..0872afa58d39 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -166,12 +166,7 @@ static const struct file_operations proc_vmcore_operations = {
166 166
167static struct vmcore* __init get_new_element(void) 167static struct vmcore* __init get_new_element(void)
168{ 168{
169 struct vmcore *p; 169 return kzalloc(sizeof(struct vmcore), GFP_KERNEL);
170
171 p = kmalloc(sizeof(*p), GFP_KERNEL);
172 if (p)
173 memset(p, 0, sizeof(*p));
174 return p;
175} 170}
176 171
177static u64 __init get_vmcore_size_elf64(char *elfptr) 172static u64 __init get_vmcore_size_elf64(char *elfptr)
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 4beb964a2a3e..128d3f7c8aa5 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -1270,9 +1270,8 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1270 1270
1271 RFALSE(ih, "PAP-12210: ih must be 0"); 1271 RFALSE(ih, "PAP-12210: ih must be 0");
1272 1272
1273 if (is_direntry_le_ih 1273 aux_ih = B_N_PITEM_HEAD(tbS0, item_pos);
1274 (aux_ih = 1274 if (is_direntry_le_ih(aux_ih)) {
1275 B_N_PITEM_HEAD(tbS0, item_pos))) {
1276 /* we append to directory item */ 1275 /* we append to directory item */
1277 1276
1278 int entry_count; 1277 int entry_count;
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index 381750a155f6..03d85cbf90bf 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -390,7 +390,8 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
390 390
391 if (last_first == FIRST_TO_LAST) { 391 if (last_first == FIRST_TO_LAST) {
392 /* if ( if item in position item_num in buffer SOURCE is directory item ) */ 392 /* if ( if item in position item_num in buffer SOURCE is directory item ) */
393 if (is_direntry_le_ih(ih = B_N_PITEM_HEAD(src, item_num))) 393 ih = B_N_PITEM_HEAD(src, item_num);
394 if (is_direntry_le_ih(ih))
394 leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST, 395 leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST,
395 item_num, 0, cpy_bytes); 396 item_num, 0, cpy_bytes);
396 else { 397 else {
@@ -418,7 +419,8 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
418 } 419 }
419 } else { 420 } else {
420 /* if ( if item in position item_num in buffer SOURCE is directory item ) */ 421 /* if ( if item in position item_num in buffer SOURCE is directory item ) */
421 if (is_direntry_le_ih(ih = B_N_PITEM_HEAD(src, item_num))) 422 ih = B_N_PITEM_HEAD(src, item_num);
423 if (is_direntry_le_ih(ih))
422 leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST, 424 leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST,
423 item_num, 425 item_num,
424 I_ENTRY_COUNT(ih) - cpy_bytes, 426 I_ENTRY_COUNT(ih) - cpy_bytes,
@@ -774,8 +776,8 @@ void leaf_delete_items(struct buffer_info *cur_bi, int last_first,
774 leaf_delete_items_entirely(cur_bi, first + 1, 776 leaf_delete_items_entirely(cur_bi, first + 1,
775 del_num - 1); 777 del_num - 1);
776 778
777 if (is_direntry_le_ih 779 ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh) - 1);
778 (ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh) - 1))) 780 if (is_direntry_le_ih(ih))
779 /* the last item is directory */ 781 /* the last item is directory */
780 /* len = numbers of directory entries in this item */ 782 /* len = numbers of directory entries in this item */
781 len = ih_entry_count(ih); 783 len = ih_entry_count(ih);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 7f40f30c55c5..6c959275f2d0 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -640,6 +640,26 @@ int seq_puts(struct seq_file *m, const char *s)
640} 640}
641EXPORT_SYMBOL(seq_puts); 641EXPORT_SYMBOL(seq_puts);
642 642
643/**
644 * seq_write - write arbitrary data to buffer
645 * @seq: seq_file identifying the buffer to which data should be written
646 * @data: data address
647 * @len: number of bytes
648 *
649 * Return 0 on success, non-zero otherwise.
650 */
651int seq_write(struct seq_file *seq, const void *data, size_t len)
652{
653 if (seq->count + len < seq->size) {
654 memcpy(seq->buf + seq->count, data, len);
655 seq->count += len;
656 return 0;
657 }
658 seq->count = seq->size;
659 return -1;
660}
661EXPORT_SYMBOL(seq_write);
662
643struct list_head *seq_list_start(struct list_head *head, loff_t pos) 663struct list_head *seq_list_start(struct list_head *head, loff_t pos)
644{ 664{
645 struct list_head *lh; 665 struct list_head *lh;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 3d2512c21f05..7cf33379fd46 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -56,9 +56,7 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off
56 56
57 57
58 UFSD("ptrs=uspi->s_apb = %d,double_blocks=%ld \n",ptrs,double_blocks); 58 UFSD("ptrs=uspi->s_apb = %d,double_blocks=%ld \n",ptrs,double_blocks);
59 if (i_block < 0) { 59 if (i_block < direct_blocks) {
60 ufs_warning(inode->i_sb, "ufs_block_to_path", "block < 0");
61 } else if (i_block < direct_blocks) {
62 offsets[n++] = i_block; 60 offsets[n++] = i_block;
63 } else if ((i_block -= direct_blocks) < indirect_blocks) { 61 } else if ((i_block -= direct_blocks) < indirect_blocks) {
64 offsets[n++] = UFS_IND_BLOCK; 62 offsets[n++] = UFS_IND_BLOCK;
@@ -440,8 +438,6 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
440 lock_kernel(); 438 lock_kernel();
441 439
442 UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment); 440 UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment);
443 if (fragment < 0)
444 goto abort_negative;
445 if (fragment > 441 if (fragment >
446 ((UFS_NDADDR + uspi->s_apb + uspi->s_2apb + uspi->s_3apb) 442 ((UFS_NDADDR + uspi->s_apb + uspi->s_2apb + uspi->s_3apb)
447 << uspi->s_fpbshift)) 443 << uspi->s_fpbshift))
@@ -504,10 +500,6 @@ abort:
504 unlock_kernel(); 500 unlock_kernel();
505 return err; 501 return err;
506 502
507abort_negative:
508 ufs_warning(sb, "ufs_get_block", "block < 0");
509 goto abort;
510
511abort_too_big: 503abort_too_big:
512 ufs_warning(sb, "ufs_get_block", "block > big"); 504 ufs_warning(sb, "ufs_get_block", "block > big");
513 goto abort; 505 goto abort;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index f65a53f8752f..6127e24062d0 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -24,7 +24,7 @@
24 * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits. 24 * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
25 * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set. 25 * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set.
26 */ 26 */
27#if defined(CONFIG_LBD) || (BITS_PER_LONG == 64) 27#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64)
28# define XFS_BIG_BLKNOS 1 28# define XFS_BIG_BLKNOS 1
29# define XFS_BIG_INUMS 1 29# define XFS_BIG_INUMS 1
30#else 30#else
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 2e09efbca8db..a220d36f789b 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -616,7 +616,7 @@ xfs_max_file_offset(
616 */ 616 */
617 617
618#if BITS_PER_LONG == 32 618#if BITS_PER_LONG == 32
619# if defined(CONFIG_LBD) 619# if defined(CONFIG_LBDAF)
620 ASSERT(sizeof(sector_t) == 8); 620 ASSERT(sizeof(sector_t) == 8);
621 pagefactor = PAGE_CACHE_SIZE; 621 pagefactor = PAGE_CACHE_SIZE;
622 bitshift = BITS_PER_LONG; 622 bitshift = BITS_PER_LONG;