aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2014-05-07 07:15:46 -0400
committerIngo Molnar <mingo@kernel.org>2014-05-07 07:15:46 -0400
commit2fe5de9ce7d57498abc14b375cad2fcf8c3ee6cc (patch)
tree9478e8cf470c1d5bdb2d89b57a7e35919ab95e72 /fs/ocfs2
parent08f8aeb55d7727d644dbbbbfb798fe937d47751d (diff)
parent2b4cfe64dee0d84506b951d81bf55d9891744d25 (diff)
Merge branch 'sched/urgent' into sched/core, to avoid conflicts
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/acl.c1
-rw-r--r--fs/ocfs2/alloc.c3
-rw-r--r--fs/ocfs2/aops.c7
-rw-r--r--fs/ocfs2/aops.h5
-rw-r--r--fs/ocfs2/buffer_head_io.c2
-rw-r--r--fs/ocfs2/cluster/sys.c2
-rw-r--r--fs/ocfs2/cluster/tcp.c90
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h2
-rw-r--r--fs/ocfs2/dcache.c61
-rw-r--r--fs/ocfs2/dcache.h12
-rw-r--r--fs/ocfs2/dir.c6
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c27
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c29
-rw-r--r--fs/ocfs2/dlmglue.c44
-rw-r--r--fs/ocfs2/dlmglue.h3
-rw-r--r--fs/ocfs2/file.c78
-rw-r--r--fs/ocfs2/inode.c61
-rw-r--r--fs/ocfs2/inode.h17
-rw-r--r--fs/ocfs2/ioctl.c5
-rw-r--r--fs/ocfs2/journal.c6
-rw-r--r--fs/ocfs2/journal.h11
-rw-r--r--fs/ocfs2/locks.c2
-rw-r--r--fs/ocfs2/move_extents.c7
-rw-r--r--fs/ocfs2/namei.c8
-rw-r--r--fs/ocfs2/ocfs2.h33
-rw-r--r--fs/ocfs2/quota.h2
-rw-r--r--fs/ocfs2/quota_global.c35
-rw-r--r--fs/ocfs2/stackglue.c22
-rw-r--r--fs/ocfs2/suballoc.c29
-rw-r--r--fs/ocfs2/suballoc.h4
-rw-r--r--fs/ocfs2/super.c55
-rw-r--r--fs/ocfs2/sysfile.c3
-rw-r--r--fs/ocfs2/xattr.c35
33 files changed, 394 insertions, 313 deletions
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 555f4cddefe3..7e8282dcea2a 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -205,6 +205,7 @@ static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh,
205 di->i_mode = cpu_to_le16(inode->i_mode); 205 di->i_mode = cpu_to_le16(inode->i_mode);
206 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 206 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
207 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 207 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
208 ocfs2_update_inode_fsync_trans(handle, inode, 0);
208 209
209 ocfs2_journal_dirty(handle, di_bh); 210 ocfs2_journal_dirty(handle, di_bh);
210 211
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index e2edff38be52..b4deb5f750d9 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5728,6 +5728,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
5728 } 5728 }
5729 5729
5730 ocfs2_et_update_clusters(et, -len); 5730 ocfs2_et_update_clusters(et, -len);
5731 ocfs2_update_inode_fsync_trans(handle, inode, 1);
5731 5732
5732 ocfs2_journal_dirty(handle, et->et_root_bh); 5733 ocfs2_journal_dirty(handle, et->et_root_bh);
5733 5734
@@ -6932,6 +6933,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6932 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 6933 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
6933 spin_unlock(&oi->ip_lock); 6934 spin_unlock(&oi->ip_lock);
6934 6935
6936 ocfs2_update_inode_fsync_trans(handle, inode, 1);
6935 ocfs2_dinode_new_extent_list(inode, di); 6937 ocfs2_dinode_new_extent_list(inode, di);
6936 6938
6937 ocfs2_journal_dirty(handle, di_bh); 6939 ocfs2_journal_dirty(handle, di_bh);
@@ -7208,6 +7210,7 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
7208 di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); 7210 di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
7209 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 7211 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
7210 7212
7213 ocfs2_update_inode_fsync_trans(handle, inode, 1);
7211 ocfs2_journal_dirty(handle, di_bh); 7214 ocfs2_journal_dirty(handle, di_bh);
7212 7215
7213out_commit: 7216out_commit:
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index aeb44e879c51..d310d12a9adc 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -571,7 +571,6 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
571{ 571{
572 struct inode *inode = file_inode(iocb->ki_filp); 572 struct inode *inode = file_inode(iocb->ki_filp);
573 int level; 573 int level;
574 wait_queue_head_t *wq = ocfs2_ioend_wq(inode);
575 574
576 /* this io's submitter should not have unlocked this before we could */ 575 /* this io's submitter should not have unlocked this before we could */
577 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); 576 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
@@ -582,10 +581,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
582 if (ocfs2_iocb_is_unaligned_aio(iocb)) { 581 if (ocfs2_iocb_is_unaligned_aio(iocb)) {
583 ocfs2_iocb_clear_unaligned_aio(iocb); 582 ocfs2_iocb_clear_unaligned_aio(iocb);
584 583
585 if (atomic_dec_and_test(&OCFS2_I(inode)->ip_unaligned_aio) && 584 mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio);
586 waitqueue_active(wq)) {
587 wake_up_all(wq);
588 }
589 } 585 }
590 586
591 ocfs2_iocb_clear_rw_locked(iocb); 587 ocfs2_iocb_clear_rw_locked(iocb);
@@ -2043,6 +2039,7 @@ out_write_size:
2043 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 2039 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
2044 di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); 2040 di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
2045 di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); 2041 di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
2042 ocfs2_update_inode_fsync_trans(handle, inode, 1);
2046 ocfs2_journal_dirty(handle, wc->w_di_bh); 2043 ocfs2_journal_dirty(handle, wc->w_di_bh);
2047 2044
2048 ocfs2_commit_trans(osb, handle); 2045 ocfs2_commit_trans(osb, handle);
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index f671e49beb34..6cae155d54df 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -102,9 +102,4 @@ enum ocfs2_iocb_lock_bits {
102#define ocfs2_iocb_is_unaligned_aio(iocb) \ 102#define ocfs2_iocb_is_unaligned_aio(iocb) \
103 test_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private) 103 test_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private)
104 104
105#define OCFS2_IOEND_WQ_HASH_SZ 37
106#define ocfs2_ioend_wq(v) (&ocfs2__ioend_wq[((unsigned long)(v)) %\
107 OCFS2_IOEND_WQ_HASH_SZ])
108extern wait_queue_head_t ocfs2__ioend_wq[OCFS2_IOEND_WQ_HASH_SZ];
109
110#endif /* OCFS2_FILE_H */ 105#endif /* OCFS2_FILE_H */
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 5b704c63a103..1edcb141f639 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -90,7 +90,6 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
90 * information for this bh as it's not marked locally 90 * information for this bh as it's not marked locally
91 * uptodate. */ 91 * uptodate. */
92 ret = -EIO; 92 ret = -EIO;
93 put_bh(bh);
94 mlog_errno(ret); 93 mlog_errno(ret);
95 } 94 }
96 95
@@ -420,7 +419,6 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
420 419
421 if (!buffer_uptodate(bh)) { 420 if (!buffer_uptodate(bh)) {
422 ret = -EIO; 421 ret = -EIO;
423 put_bh(bh);
424 mlog_errno(ret); 422 mlog_errno(ret);
425 } 423 }
426 424
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c
index a4b07730b2e1..b7f57271d49c 100644
--- a/fs/ocfs2/cluster/sys.c
+++ b/fs/ocfs2/cluster/sys.c
@@ -41,7 +41,7 @@ static ssize_t version_show(struct kobject *kobj, struct kobj_attribute *attr,
41 return snprintf(buf, PAGE_SIZE, "%u\n", O2NM_API_VERSION); 41 return snprintf(buf, PAGE_SIZE, "%u\n", O2NM_API_VERSION);
42} 42}
43static struct kobj_attribute attr_version = 43static struct kobj_attribute attr_version =
44 __ATTR(interface_revision, S_IFREG | S_IRUGO, version_show, NULL); 44 __ATTR(interface_revision, S_IRUGO, version_show, NULL);
45 45
46static struct attribute *o2cb_attrs[] = { 46static struct attribute *o2cb_attrs[] = {
47 &attr_version.attr, 47 &attr_version.attr,
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 2cd2406b4140..c6b90e670389 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -137,7 +137,7 @@ static int o2net_sys_err_translations[O2NET_ERR_MAX] =
137static void o2net_sc_connect_completed(struct work_struct *work); 137static void o2net_sc_connect_completed(struct work_struct *work);
138static void o2net_rx_until_empty(struct work_struct *work); 138static void o2net_rx_until_empty(struct work_struct *work);
139static void o2net_shutdown_sc(struct work_struct *work); 139static void o2net_shutdown_sc(struct work_struct *work);
140static void o2net_listen_data_ready(struct sock *sk, int bytes); 140static void o2net_listen_data_ready(struct sock *sk);
141static void o2net_sc_send_keep_req(struct work_struct *work); 141static void o2net_sc_send_keep_req(struct work_struct *work);
142static void o2net_idle_timer(unsigned long data); 142static void o2net_idle_timer(unsigned long data);
143static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); 143static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
@@ -262,17 +262,17 @@ static void o2net_update_recv_stats(struct o2net_sock_container *sc)
262 262
263#endif /* CONFIG_OCFS2_FS_STATS */ 263#endif /* CONFIG_OCFS2_FS_STATS */
264 264
265static inline int o2net_reconnect_delay(void) 265static inline unsigned int o2net_reconnect_delay(void)
266{ 266{
267 return o2nm_single_cluster->cl_reconnect_delay_ms; 267 return o2nm_single_cluster->cl_reconnect_delay_ms;
268} 268}
269 269
270static inline int o2net_keepalive_delay(void) 270static inline unsigned int o2net_keepalive_delay(void)
271{ 271{
272 return o2nm_single_cluster->cl_keepalive_delay_ms; 272 return o2nm_single_cluster->cl_keepalive_delay_ms;
273} 273}
274 274
275static inline int o2net_idle_timeout(void) 275static inline unsigned int o2net_idle_timeout(void)
276{ 276{
277 return o2nm_single_cluster->cl_idle_timeout_ms; 277 return o2nm_single_cluster->cl_idle_timeout_ms;
278} 278}
@@ -597,9 +597,9 @@ static void o2net_set_nn_state(struct o2net_node *nn,
597} 597}
598 598
599/* see o2net_register_callbacks() */ 599/* see o2net_register_callbacks() */
600static void o2net_data_ready(struct sock *sk, int bytes) 600static void o2net_data_ready(struct sock *sk)
601{ 601{
602 void (*ready)(struct sock *sk, int bytes); 602 void (*ready)(struct sock *sk);
603 603
604 read_lock(&sk->sk_callback_lock); 604 read_lock(&sk->sk_callback_lock);
605 if (sk->sk_user_data) { 605 if (sk->sk_user_data) {
@@ -613,7 +613,7 @@ static void o2net_data_ready(struct sock *sk, int bytes)
613 } 613 }
614 read_unlock(&sk->sk_callback_lock); 614 read_unlock(&sk->sk_callback_lock);
615 615
616 ready(sk, bytes); 616 ready(sk);
617} 617}
618 618
619/* see o2net_register_callbacks() */ 619/* see o2net_register_callbacks() */
@@ -916,57 +916,30 @@ static struct o2net_msg_handler *o2net_handler_get(u32 msg_type, u32 key)
916 916
917static int o2net_recv_tcp_msg(struct socket *sock, void *data, size_t len) 917static int o2net_recv_tcp_msg(struct socket *sock, void *data, size_t len)
918{ 918{
919 int ret; 919 struct kvec vec = { .iov_len = len, .iov_base = data, };
920 mm_segment_t oldfs; 920 struct msghdr msg = { .msg_flags = MSG_DONTWAIT, };
921 struct kvec vec = { 921 return kernel_recvmsg(sock, &msg, &vec, 1, len, msg.msg_flags);
922 .iov_len = len,
923 .iov_base = data,
924 };
925 struct msghdr msg = {
926 .msg_iovlen = 1,
927 .msg_iov = (struct iovec *)&vec,
928 .msg_flags = MSG_DONTWAIT,
929 };
930
931 oldfs = get_fs();
932 set_fs(get_ds());
933 ret = sock_recvmsg(sock, &msg, len, msg.msg_flags);
934 set_fs(oldfs);
935
936 return ret;
937} 922}
938 923
939static int o2net_send_tcp_msg(struct socket *sock, struct kvec *vec, 924static int o2net_send_tcp_msg(struct socket *sock, struct kvec *vec,
940 size_t veclen, size_t total) 925 size_t veclen, size_t total)
941{ 926{
942 int ret; 927 int ret;
943 mm_segment_t oldfs; 928 struct msghdr msg;
944 struct msghdr msg = {
945 .msg_iov = (struct iovec *)vec,
946 .msg_iovlen = veclen,
947 };
948 929
949 if (sock == NULL) { 930 if (sock == NULL) {
950 ret = -EINVAL; 931 ret = -EINVAL;
951 goto out; 932 goto out;
952 } 933 }
953 934
954 oldfs = get_fs(); 935 ret = kernel_sendmsg(sock, &msg, vec, veclen, total);
955 set_fs(get_ds()); 936 if (likely(ret == total))
956 ret = sock_sendmsg(sock, &msg, total); 937 return 0;
957 set_fs(oldfs); 938 mlog(ML_ERROR, "sendmsg returned %d instead of %zu\n", ret, total);
958 if (ret != total) { 939 if (ret >= 0)
959 mlog(ML_ERROR, "sendmsg returned %d instead of %zu\n", ret, 940 ret = -EPIPE; /* should be smarter, I bet */
960 total);
961 if (ret >= 0)
962 ret = -EPIPE; /* should be smarter, I bet */
963 goto out;
964 }
965
966 ret = 0;
967out: 941out:
968 if (ret < 0) 942 mlog(0, "returning error: %d\n", ret);
969 mlog(0, "returning error: %d\n", ret);
970 return ret; 943 return ret;
971} 944}
972 945
@@ -1953,9 +1926,9 @@ static void o2net_accept_many(struct work_struct *work)
1953 cond_resched(); 1926 cond_resched();
1954} 1927}
1955 1928
1956static void o2net_listen_data_ready(struct sock *sk, int bytes) 1929static void o2net_listen_data_ready(struct sock *sk)
1957{ 1930{
1958 void (*ready)(struct sock *sk, int bytes); 1931 void (*ready)(struct sock *sk);
1959 1932
1960 read_lock(&sk->sk_callback_lock); 1933 read_lock(&sk->sk_callback_lock);
1961 ready = sk->sk_user_data; 1934 ready = sk->sk_user_data;
@@ -1964,18 +1937,29 @@ static void o2net_listen_data_ready(struct sock *sk, int bytes)
1964 goto out; 1937 goto out;
1965 } 1938 }
1966 1939
1967 /* ->sk_data_ready is also called for a newly established child socket 1940 /* This callback may called twice when a new connection
1968 * before it has been accepted and the acceptor has set up their 1941 * is being established as a child socket inherits everything
1969 * data_ready.. we only want to queue listen work for our listening 1942 * from a parent LISTEN socket, including the data_ready cb of
1970 * socket */ 1943 * the parent. This leads to a hazard. In o2net_accept_one()
1944 * we are still initializing the child socket but have not
1945 * changed the inherited data_ready callback yet when
1946 * data starts arriving.
1947 * We avoid this hazard by checking the state.
1948 * For the listening socket, the state will be TCP_LISTEN; for the new
1949 * socket, will be TCP_ESTABLISHED. Also, in this case,
1950 * sk->sk_user_data is not a valid function pointer.
1951 */
1952
1971 if (sk->sk_state == TCP_LISTEN) { 1953 if (sk->sk_state == TCP_LISTEN) {
1972 mlog(ML_TCP, "bytes: %d\n", bytes);
1973 queue_work(o2net_wq, &o2net_listen_work); 1954 queue_work(o2net_wq, &o2net_listen_work);
1955 } else {
1956 ready = NULL;
1974 } 1957 }
1975 1958
1976out: 1959out:
1977 read_unlock(&sk->sk_callback_lock); 1960 read_unlock(&sk->sk_callback_lock);
1978 ready(sk, bytes); 1961 if (ready != NULL)
1962 ready(sk);
1979} 1963}
1980 1964
1981static int o2net_open_listening_sock(__be32 addr, __be16 port) 1965static int o2net_open_listening_sock(__be32 addr, __be16 port)
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 4cbcb65784a3..dc024367110a 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -165,7 +165,7 @@ struct o2net_sock_container {
165 165
166 /* original handlers for the sockets */ 166 /* original handlers for the sockets */
167 void (*sc_state_change)(struct sock *sk); 167 void (*sc_state_change)(struct sock *sk);
168 void (*sc_data_ready)(struct sock *sk, int bytes); 168 void (*sc_data_ready)(struct sock *sk);
169 169
170 u32 sc_msg_key; 170 u32 sc_msg_key;
171 u16 sc_msg_type; 171 u16 sc_msg_type;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 0d3a97d2d5f6..e2e05a106beb 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -37,7 +37,6 @@
37#include "dlmglue.h" 37#include "dlmglue.h"
38#include "file.h" 38#include "file.h"
39#include "inode.h" 39#include "inode.h"
40#include "super.h"
41#include "ocfs2_trace.h" 40#include "ocfs2_trace.h"
42 41
43void ocfs2_dentry_attach_gen(struct dentry *dentry) 42void ocfs2_dentry_attach_gen(struct dentry *dentry)
@@ -346,52 +345,6 @@ out_attach:
346 return ret; 345 return ret;
347} 346}
348 347
349DEFINE_SPINLOCK(dentry_list_lock);
350
351/* We limit the number of dentry locks to drop in one go. We have
352 * this limit so that we don't starve other users of ocfs2_wq. */
353#define DL_INODE_DROP_COUNT 64
354
355/* Drop inode references from dentry locks */
356static void __ocfs2_drop_dl_inodes(struct ocfs2_super *osb, int drop_count)
357{
358 struct ocfs2_dentry_lock *dl;
359
360 spin_lock(&dentry_list_lock);
361 while (osb->dentry_lock_list && (drop_count < 0 || drop_count--)) {
362 dl = osb->dentry_lock_list;
363 osb->dentry_lock_list = dl->dl_next;
364 spin_unlock(&dentry_list_lock);
365 iput(dl->dl_inode);
366 kfree(dl);
367 spin_lock(&dentry_list_lock);
368 }
369 spin_unlock(&dentry_list_lock);
370}
371
372void ocfs2_drop_dl_inodes(struct work_struct *work)
373{
374 struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
375 dentry_lock_work);
376
377 __ocfs2_drop_dl_inodes(osb, DL_INODE_DROP_COUNT);
378 /*
379 * Don't queue dropping if umount is in progress. We flush the
380 * list in ocfs2_dismount_volume
381 */
382 spin_lock(&dentry_list_lock);
383 if (osb->dentry_lock_list &&
384 !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
385 queue_work(ocfs2_wq, &osb->dentry_lock_work);
386 spin_unlock(&dentry_list_lock);
387}
388
389/* Flush the whole work queue */
390void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb)
391{
392 __ocfs2_drop_dl_inodes(osb, -1);
393}
394
395/* 348/*
396 * ocfs2_dentry_iput() and friends. 349 * ocfs2_dentry_iput() and friends.
397 * 350 *
@@ -416,24 +369,16 @@ void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb)
416static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb, 369static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
417 struct ocfs2_dentry_lock *dl) 370 struct ocfs2_dentry_lock *dl)
418{ 371{
372 iput(dl->dl_inode);
419 ocfs2_simple_drop_lockres(osb, &dl->dl_lockres); 373 ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
420 ocfs2_lock_res_free(&dl->dl_lockres); 374 ocfs2_lock_res_free(&dl->dl_lockres);
421 375 kfree(dl);
422 /* We leave dropping of inode reference to ocfs2_wq as that can
423 * possibly lead to inode deletion which gets tricky */
424 spin_lock(&dentry_list_lock);
425 if (!osb->dentry_lock_list &&
426 !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
427 queue_work(ocfs2_wq, &osb->dentry_lock_work);
428 dl->dl_next = osb->dentry_lock_list;
429 osb->dentry_lock_list = dl;
430 spin_unlock(&dentry_list_lock);
431} 376}
432 377
433void ocfs2_dentry_lock_put(struct ocfs2_super *osb, 378void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
434 struct ocfs2_dentry_lock *dl) 379 struct ocfs2_dentry_lock *dl)
435{ 380{
436 int unlock; 381 int unlock = 0;
437 382
438 BUG_ON(dl->dl_count == 0); 383 BUG_ON(dl->dl_count == 0);
439 384
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index b79eff709958..55f58892b153 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -29,13 +29,8 @@
29extern const struct dentry_operations ocfs2_dentry_ops; 29extern const struct dentry_operations ocfs2_dentry_ops;
30 30
31struct ocfs2_dentry_lock { 31struct ocfs2_dentry_lock {
32 /* Use count of dentry lock */
33 unsigned int dl_count; 32 unsigned int dl_count;
34 union { 33 u64 dl_parent_blkno;
35 /* Linked list of dentry locks to release */
36 struct ocfs2_dentry_lock *dl_next;
37 u64 dl_parent_blkno;
38 };
39 34
40 /* 35 /*
41 * The ocfs2_dentry_lock keeps an inode reference until 36 * The ocfs2_dentry_lock keeps an inode reference until
@@ -49,14 +44,9 @@ struct ocfs2_dentry_lock {
49int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode, 44int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
50 u64 parent_blkno); 45 u64 parent_blkno);
51 46
52extern spinlock_t dentry_list_lock;
53
54void ocfs2_dentry_lock_put(struct ocfs2_super *osb, 47void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
55 struct ocfs2_dentry_lock *dl); 48 struct ocfs2_dentry_lock *dl);
56 49
57void ocfs2_drop_dl_inodes(struct work_struct *work);
58void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb);
59
60struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, 50struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
61 int skip_unhashed); 51 int skip_unhashed);
62 52
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 91a7e85ac8fd..0717662b4aef 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2957,6 +2957,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
2957 ocfs2_init_dir_trailer(dir, dirdata_bh, i); 2957 ocfs2_init_dir_trailer(dir, dirdata_bh, i);
2958 } 2958 }
2959 2959
2960 ocfs2_update_inode_fsync_trans(handle, dir, 1);
2960 ocfs2_journal_dirty(handle, dirdata_bh); 2961 ocfs2_journal_dirty(handle, dirdata_bh);
2961 2962
2962 if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) { 2963 if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
@@ -3005,6 +3006,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
3005 di->i_size = cpu_to_le64(sb->s_blocksize); 3006 di->i_size = cpu_to_le64(sb->s_blocksize);
3006 di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec); 3007 di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec);
3007 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec); 3008 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec);
3009 ocfs2_update_inode_fsync_trans(handle, dir, 1);
3008 3010
3009 /* 3011 /*
3010 * This should never fail as our extent list is empty and all 3012 * This should never fail as our extent list is empty and all
@@ -3338,6 +3340,7 @@ do_extend:
3338 } else { 3340 } else {
3339 de->rec_len = cpu_to_le16(sb->s_blocksize); 3341 de->rec_len = cpu_to_le16(sb->s_blocksize);
3340 } 3342 }
3343 ocfs2_update_inode_fsync_trans(handle, dir, 1);
3341 ocfs2_journal_dirty(handle, new_bh); 3344 ocfs2_journal_dirty(handle, new_bh);
3342 3345
3343 dir_i_size += dir->i_sb->s_blocksize; 3346 dir_i_size += dir->i_sb->s_blocksize;
@@ -3896,6 +3899,7 @@ out_commit:
3896 dquot_free_space_nodirty(dir, 3899 dquot_free_space_nodirty(dir,
3897 ocfs2_clusters_to_bytes(dir->i_sb, 1)); 3900 ocfs2_clusters_to_bytes(dir->i_sb, 1));
3898 3901
3902 ocfs2_update_inode_fsync_trans(handle, dir, 1);
3899 ocfs2_commit_trans(osb, handle); 3903 ocfs2_commit_trans(osb, handle);
3900 3904
3901out: 3905out:
@@ -4134,6 +4138,7 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir,
4134 mlog_errno(ret); 4138 mlog_errno(ret);
4135 did_quota = 0; 4139 did_quota = 0;
4136 4140
4141 ocfs2_update_inode_fsync_trans(handle, dir, 1);
4137 ocfs2_journal_dirty(handle, dx_root_bh); 4142 ocfs2_journal_dirty(handle, dx_root_bh);
4138 4143
4139out_commit: 4144out_commit:
@@ -4401,6 +4406,7 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
4401 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); 4406 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
4402 spin_unlock(&OCFS2_I(dir)->ip_lock); 4407 spin_unlock(&OCFS2_I(dir)->ip_lock);
4403 di->i_dx_root = cpu_to_le64(0ULL); 4408 di->i_dx_root = cpu_to_le64(0ULL);
4409 ocfs2_update_inode_fsync_trans(handle, dir, 1);
4404 4410
4405 ocfs2_journal_dirty(handle, di_bh); 4411 ocfs2_journal_dirty(handle, di_bh);
4406 4412
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 33660a4a52fa..c973690dc0bc 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1123,7 +1123,6 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
1123 struct dlm_ctxt *dlm = NULL; 1123 struct dlm_ctxt *dlm = NULL;
1124 char *local = NULL; 1124 char *local = NULL;
1125 int status = 0; 1125 int status = 0;
1126 int locked = 0;
1127 1126
1128 qr = (struct dlm_query_region *) msg->buf; 1127 qr = (struct dlm_query_region *) msg->buf;
1129 1128
@@ -1132,10 +1131,8 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
1132 1131
1133 /* buffer used in dlm_mast_regions() */ 1132 /* buffer used in dlm_mast_regions() */
1134 local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL); 1133 local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL);
1135 if (!local) { 1134 if (!local)
1136 status = -ENOMEM; 1135 return -ENOMEM;
1137 goto bail;
1138 }
1139 1136
1140 status = -EINVAL; 1137 status = -EINVAL;
1141 1138
@@ -1144,16 +1141,15 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
1144 if (!dlm) { 1141 if (!dlm) {
1145 mlog(ML_ERROR, "Node %d queried hb regions on domain %s " 1142 mlog(ML_ERROR, "Node %d queried hb regions on domain %s "
1146 "before join domain\n", qr->qr_node, qr->qr_domain); 1143 "before join domain\n", qr->qr_node, qr->qr_domain);
1147 goto bail; 1144 goto out_domain_lock;
1148 } 1145 }
1149 1146
1150 spin_lock(&dlm->spinlock); 1147 spin_lock(&dlm->spinlock);
1151 locked = 1;
1152 if (dlm->joining_node != qr->qr_node) { 1148 if (dlm->joining_node != qr->qr_node) {
1153 mlog(ML_ERROR, "Node %d queried hb regions on domain %s " 1149 mlog(ML_ERROR, "Node %d queried hb regions on domain %s "
1154 "but joining node is %d\n", qr->qr_node, qr->qr_domain, 1150 "but joining node is %d\n", qr->qr_node, qr->qr_domain,
1155 dlm->joining_node); 1151 dlm->joining_node);
1156 goto bail; 1152 goto out_dlm_lock;
1157 } 1153 }
1158 1154
1159 /* Support for global heartbeat was added in 1.1 */ 1155 /* Support for global heartbeat was added in 1.1 */
@@ -1163,14 +1159,15 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
1163 "but active dlm protocol is %d.%d\n", qr->qr_node, 1159 "but active dlm protocol is %d.%d\n", qr->qr_node,
1164 qr->qr_domain, dlm->dlm_locking_proto.pv_major, 1160 qr->qr_domain, dlm->dlm_locking_proto.pv_major,
1165 dlm->dlm_locking_proto.pv_minor); 1161 dlm->dlm_locking_proto.pv_minor);
1166 goto bail; 1162 goto out_dlm_lock;
1167 } 1163 }
1168 1164
1169 status = dlm_match_regions(dlm, qr, local, sizeof(qr->qr_regions)); 1165 status = dlm_match_regions(dlm, qr, local, sizeof(qr->qr_regions));
1170 1166
1171bail: 1167out_dlm_lock:
1172 if (locked) 1168 spin_unlock(&dlm->spinlock);
1173 spin_unlock(&dlm->spinlock); 1169
1170out_domain_lock:
1174 spin_unlock(&dlm_domain_lock); 1171 spin_unlock(&dlm_domain_lock);
1175 1172
1176 kfree(local); 1173 kfree(local);
@@ -1877,19 +1874,19 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
1877 goto bail; 1874 goto bail;
1878 } 1875 }
1879 1876
1880 status = dlm_debug_init(dlm); 1877 status = dlm_launch_thread(dlm);
1881 if (status < 0) { 1878 if (status < 0) {
1882 mlog_errno(status); 1879 mlog_errno(status);
1883 goto bail; 1880 goto bail;
1884 } 1881 }
1885 1882
1886 status = dlm_launch_thread(dlm); 1883 status = dlm_launch_recovery_thread(dlm);
1887 if (status < 0) { 1884 if (status < 0) {
1888 mlog_errno(status); 1885 mlog_errno(status);
1889 goto bail; 1886 goto bail;
1890 } 1887 }
1891 1888
1892 status = dlm_launch_recovery_thread(dlm); 1889 status = dlm_debug_init(dlm);
1893 if (status < 0) { 1890 if (status < 0) {
1894 mlog_errno(status); 1891 mlog_errno(status);
1895 goto bail; 1892 goto bail;
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 7035af09cc03..fe29f7978f81 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -537,7 +537,10 @@ master_here:
537 /* success! see if any other nodes need recovery */ 537 /* success! see if any other nodes need recovery */
538 mlog(0, "DONE mastering recovery of %s:%u here(this=%u)!\n", 538 mlog(0, "DONE mastering recovery of %s:%u here(this=%u)!\n",
539 dlm->name, dlm->reco.dead_node, dlm->node_num); 539 dlm->name, dlm->reco.dead_node, dlm->node_num);
540 dlm_reset_recovery(dlm); 540 spin_lock(&dlm->spinlock);
541 __dlm_reset_recovery(dlm);
542 dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
543 spin_unlock(&dlm->spinlock);
541 } 544 }
542 dlm_end_recovery(dlm); 545 dlm_end_recovery(dlm);
543 546
@@ -695,6 +698,14 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
695 if (all_nodes_done) { 698 if (all_nodes_done) {
696 int ret; 699 int ret;
697 700
701 /* Set this flag on recovery master to avoid
702 * a new recovery for another dead node start
703 * before the recovery is not done. That may
704 * cause recovery hung.*/
705 spin_lock(&dlm->spinlock);
706 dlm->reco.state |= DLM_RECO_STATE_FINALIZE;
707 spin_unlock(&dlm->spinlock);
708
698 /* all nodes are now in DLM_RECO_NODE_DATA_DONE state 709 /* all nodes are now in DLM_RECO_NODE_DATA_DONE state
699 * just send a finalize message to everyone and 710 * just send a finalize message to everyone and
700 * clean up */ 711 * clean up */
@@ -1750,13 +1761,13 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1750 struct dlm_migratable_lockres *mres) 1761 struct dlm_migratable_lockres *mres)
1751{ 1762{
1752 struct dlm_migratable_lock *ml; 1763 struct dlm_migratable_lock *ml;
1753 struct list_head *queue; 1764 struct list_head *queue, *iter;
1754 struct list_head *tmpq = NULL; 1765 struct list_head *tmpq = NULL;
1755 struct dlm_lock *newlock = NULL; 1766 struct dlm_lock *newlock = NULL;
1756 struct dlm_lockstatus *lksb = NULL; 1767 struct dlm_lockstatus *lksb = NULL;
1757 int ret = 0; 1768 int ret = 0;
1758 int i, j, bad; 1769 int i, j, bad;
1759 struct dlm_lock *lock = NULL; 1770 struct dlm_lock *lock;
1760 u8 from = O2NM_MAX_NODES; 1771 u8 from = O2NM_MAX_NODES;
1761 unsigned int added = 0; 1772 unsigned int added = 0;
1762 __be64 c; 1773 __be64 c;
@@ -1791,14 +1802,16 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1791 /* MIGRATION ONLY! */ 1802 /* MIGRATION ONLY! */
1792 BUG_ON(!(mres->flags & DLM_MRES_MIGRATION)); 1803 BUG_ON(!(mres->flags & DLM_MRES_MIGRATION));
1793 1804
1805 lock = NULL;
1794 spin_lock(&res->spinlock); 1806 spin_lock(&res->spinlock);
1795 for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) { 1807 for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) {
1796 tmpq = dlm_list_idx_to_ptr(res, j); 1808 tmpq = dlm_list_idx_to_ptr(res, j);
1797 list_for_each_entry(lock, tmpq, list) { 1809 list_for_each(iter, tmpq) {
1798 if (lock->ml.cookie != ml->cookie) 1810 lock = list_entry(iter,
1799 lock = NULL; 1811 struct dlm_lock, list);
1800 else 1812 if (lock->ml.cookie == ml->cookie)
1801 break; 1813 break;
1814 lock = NULL;
1802 } 1815 }
1803 if (lock) 1816 if (lock)
1804 break; 1817 break;
@@ -2882,8 +2895,8 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data,
2882 BUG(); 2895 BUG();
2883 } 2896 }
2884 dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE; 2897 dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
2898 __dlm_reset_recovery(dlm);
2885 spin_unlock(&dlm->spinlock); 2899 spin_unlock(&dlm->spinlock);
2886 dlm_reset_recovery(dlm);
2887 dlm_kick_recovery_thread(dlm); 2900 dlm_kick_recovery_thread(dlm);
2888 break; 2901 break;
2889 default: 2902 default:
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 19986959d149..6bd690b5a061 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3144,22 +3144,60 @@ out:
3144 return 0; 3144 return 0;
3145} 3145}
3146 3146
3147static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
3148 struct ocfs2_lock_res *lockres);
3149
3147/* Mark the lockres as being dropped. It will no longer be 3150/* Mark the lockres as being dropped. It will no longer be
3148 * queued if blocking, but we still may have to wait on it 3151 * queued if blocking, but we still may have to wait on it
3149 * being dequeued from the downconvert thread before we can consider 3152 * being dequeued from the downconvert thread before we can consider
3150 * it safe to drop. 3153 * it safe to drop.
3151 * 3154 *
3152 * You can *not* attempt to call cluster_lock on this lockres anymore. */ 3155 * You can *not* attempt to call cluster_lock on this lockres anymore. */
3153void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) 3156void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
3157 struct ocfs2_lock_res *lockres)
3154{ 3158{
3155 int status; 3159 int status;
3156 struct ocfs2_mask_waiter mw; 3160 struct ocfs2_mask_waiter mw;
3157 unsigned long flags; 3161 unsigned long flags, flags2;
3158 3162
3159 ocfs2_init_mask_waiter(&mw); 3163 ocfs2_init_mask_waiter(&mw);
3160 3164
3161 spin_lock_irqsave(&lockres->l_lock, flags); 3165 spin_lock_irqsave(&lockres->l_lock, flags);
3162 lockres->l_flags |= OCFS2_LOCK_FREEING; 3166 lockres->l_flags |= OCFS2_LOCK_FREEING;
3167 if (lockres->l_flags & OCFS2_LOCK_QUEUED && current == osb->dc_task) {
3168 /*
3169 * We know the downconvert is queued but not in progress
3170 * because we are the downconvert thread and processing
3171 * different lock. So we can just remove the lock from the
3172 * queue. This is not only an optimization but also a way
3173 * to avoid the following deadlock:
3174 * ocfs2_dentry_post_unlock()
3175 * ocfs2_dentry_lock_put()
3176 * ocfs2_drop_dentry_lock()
3177 * iput()
3178 * ocfs2_evict_inode()
3179 * ocfs2_clear_inode()
3180 * ocfs2_mark_lockres_freeing()
3181 * ... blocks waiting for OCFS2_LOCK_QUEUED
3182 * since we are the downconvert thread which
3183 * should clear the flag.
3184 */
3185 spin_unlock_irqrestore(&lockres->l_lock, flags);
3186 spin_lock_irqsave(&osb->dc_task_lock, flags2);
3187 list_del_init(&lockres->l_blocked_list);
3188 osb->blocked_lock_count--;
3189 spin_unlock_irqrestore(&osb->dc_task_lock, flags2);
3190 /*
3191 * Warn if we recurse into another post_unlock call. Strictly
3192 * speaking it isn't a problem but we need to be careful if
3193 * that happens (stack overflow, deadlocks, ...) so warn if
3194 * ocfs2 grows a path for which this can happen.
3195 */
3196 WARN_ON_ONCE(lockres->l_ops->post_unlock);
3197 /* Since the lock is freeing we don't do much in the fn below */
3198 ocfs2_process_blocked_lock(osb, lockres);
3199 return;
3200 }
3163 while (lockres->l_flags & OCFS2_LOCK_QUEUED) { 3201 while (lockres->l_flags & OCFS2_LOCK_QUEUED) {
3164 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); 3202 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0);
3165 spin_unlock_irqrestore(&lockres->l_lock, flags); 3203 spin_unlock_irqrestore(&lockres->l_lock, flags);
@@ -3180,7 +3218,7 @@ void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
3180{ 3218{
3181 int ret; 3219 int ret;
3182 3220
3183 ocfs2_mark_lockres_freeing(lockres); 3221 ocfs2_mark_lockres_freeing(osb, lockres);
3184 ret = ocfs2_drop_lock(osb, lockres); 3222 ret = ocfs2_drop_lock(osb, lockres);
3185 if (ret) 3223 if (ret)
3186 mlog_errno(ret); 3224 mlog_errno(ret);
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 1d596d8c4a4a..d293a22c32c5 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -157,7 +157,8 @@ int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex);
157void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex); 157void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex);
158 158
159 159
160void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); 160void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
161 struct ocfs2_lock_res *lockres);
161void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, 162void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
162 struct ocfs2_lock_res *lockres); 163 struct ocfs2_lock_res *lockres);
163 164
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 51632c40e896..8970dcf74de5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -175,9 +175,13 @@ static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end,
175 int datasync) 175 int datasync)
176{ 176{
177 int err = 0; 177 int err = 0;
178 journal_t *journal;
179 struct inode *inode = file->f_mapping->host; 178 struct inode *inode = file->f_mapping->host;
180 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 179 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
180 struct ocfs2_inode_info *oi = OCFS2_I(inode);
181 journal_t *journal = osb->journal->j_journal;
182 int ret;
183 tid_t commit_tid;
184 bool needs_barrier = false;
181 185
182 trace_ocfs2_sync_file(inode, file, file->f_path.dentry, 186 trace_ocfs2_sync_file(inode, file, file->f_path.dentry,
183 OCFS2_I(inode)->ip_blkno, 187 OCFS2_I(inode)->ip_blkno,
@@ -192,29 +196,19 @@ static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end,
192 if (err) 196 if (err)
193 return err; 197 return err;
194 198
195 /* 199 commit_tid = datasync ? oi->i_datasync_tid : oi->i_sync_tid;
196 * Probably don't need the i_mutex at all in here, just putting it here 200 if (journal->j_flags & JBD2_BARRIER &&
197 * to be consistent with how fsync used to be called, someone more 201 !jbd2_trans_will_send_data_barrier(journal, commit_tid))
198 * familiar with the fs could possibly remove it. 202 needs_barrier = true;
199 */ 203 err = jbd2_complete_transaction(journal, commit_tid);
200 mutex_lock(&inode->i_mutex); 204 if (needs_barrier) {
201 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { 205 ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
202 /* 206 if (!err)
203 * We still have to flush drive's caches to get data to the 207 err = ret;
204 * platter
205 */
206 if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
207 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
208 goto bail;
209 } 208 }
210 209
211 journal = osb->journal->j_journal;
212 err = jbd2_journal_force_commit(journal);
213
214bail:
215 if (err) 210 if (err)
216 mlog_errno(err); 211 mlog_errno(err);
217 mutex_unlock(&inode->i_mutex);
218 212
219 return (err < 0) ? -EIO : 0; 213 return (err < 0) ? -EIO : 0;
220} 214}
@@ -292,6 +286,7 @@ int ocfs2_update_inode_atime(struct inode *inode,
292 inode->i_atime = CURRENT_TIME; 286 inode->i_atime = CURRENT_TIME;
293 di->i_atime = cpu_to_le64(inode->i_atime.tv_sec); 287 di->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
294 di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); 288 di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
289 ocfs2_update_inode_fsync_trans(handle, inode, 0);
295 ocfs2_journal_dirty(handle, bh); 290 ocfs2_journal_dirty(handle, bh);
296 291
297out_commit: 292out_commit:
@@ -341,6 +336,7 @@ int ocfs2_simple_size_update(struct inode *inode,
341 if (ret < 0) 336 if (ret < 0)
342 mlog_errno(ret); 337 mlog_errno(ret);
343 338
339 ocfs2_update_inode_fsync_trans(handle, inode, 0);
344 ocfs2_commit_trans(osb, handle); 340 ocfs2_commit_trans(osb, handle);
345out: 341out:
346 return ret; 342 return ret;
@@ -435,6 +431,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
435 di->i_size = cpu_to_le64(new_i_size); 431 di->i_size = cpu_to_le64(new_i_size);
436 di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); 432 di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
437 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 433 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
434 ocfs2_update_inode_fsync_trans(handle, inode, 0);
438 435
439 ocfs2_journal_dirty(handle, fe_bh); 436 ocfs2_journal_dirty(handle, fe_bh);
440 437
@@ -650,7 +647,7 @@ restarted_transaction:
650 mlog_errno(status); 647 mlog_errno(status);
651 goto leave; 648 goto leave;
652 } 649 }
653 650 ocfs2_update_inode_fsync_trans(handle, inode, 1);
654 ocfs2_journal_dirty(handle, bh); 651 ocfs2_journal_dirty(handle, bh);
655 652
656 spin_lock(&OCFS2_I(inode)->ip_lock); 653 spin_lock(&OCFS2_I(inode)->ip_lock);
@@ -743,6 +740,7 @@ static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode,
743 OCFS2_JOURNAL_ACCESS_WRITE); 740 OCFS2_JOURNAL_ACCESS_WRITE);
744 if (ret) 741 if (ret)
745 mlog_errno(ret); 742 mlog_errno(ret);
743 ocfs2_update_inode_fsync_trans(handle, inode, 1);
746 744
747out: 745out:
748 if (ret) { 746 if (ret) {
@@ -840,6 +838,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
840 di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); 838 di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
841 di->i_mtime_nsec = di->i_ctime_nsec; 839 di->i_mtime_nsec = di->i_ctime_nsec;
842 ocfs2_journal_dirty(handle, di_bh); 840 ocfs2_journal_dirty(handle, di_bh);
841 ocfs2_update_inode_fsync_trans(handle, inode, 1);
843 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 842 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
844 } 843 }
845 844
@@ -1344,6 +1343,7 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
1344 1343
1345 di = (struct ocfs2_dinode *) bh->b_data; 1344 di = (struct ocfs2_dinode *) bh->b_data;
1346 di->i_mode = cpu_to_le16(inode->i_mode); 1345 di->i_mode = cpu_to_le16(inode->i_mode);
1346 ocfs2_update_inode_fsync_trans(handle, inode, 0);
1347 1347
1348 ocfs2_journal_dirty(handle, bh); 1348 ocfs2_journal_dirty(handle, bh);
1349 1349
@@ -1576,6 +1576,7 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
1576 if (ret) 1576 if (ret)
1577 mlog_errno(ret); 1577 mlog_errno(ret);
1578 } 1578 }
1579 ocfs2_update_inode_fsync_trans(handle, inode, 1);
1579 1580
1580 ocfs2_commit_trans(osb, handle); 1581 ocfs2_commit_trans(osb, handle);
1581out: 1582out:
@@ -2061,13 +2062,6 @@ out:
2061 return ret; 2062 return ret;
2062} 2063}
2063 2064
2064static void ocfs2_aiodio_wait(struct inode *inode)
2065{
2066 wait_queue_head_t *wq = ocfs2_ioend_wq(inode);
2067
2068 wait_event(*wq, (atomic_read(&OCFS2_I(inode)->ip_unaligned_aio) == 0));
2069}
2070
2071static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos) 2065static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos)
2072{ 2066{
2073 int blockmask = inode->i_sb->s_blocksize - 1; 2067 int blockmask = inode->i_sb->s_blocksize - 1;
@@ -2345,10 +2339,8 @@ relock:
2345 * Wait on previous unaligned aio to complete before 2339 * Wait on previous unaligned aio to complete before
2346 * proceeding. 2340 * proceeding.
2347 */ 2341 */
2348 ocfs2_aiodio_wait(inode); 2342 mutex_lock(&OCFS2_I(inode)->ip_unaligned_aio);
2349 2343 /* Mark the iocb as needing an unlock in ocfs2_dio_end_io */
2350 /* Mark the iocb as needing a decrement in ocfs2_dio_end_io */
2351 atomic_inc(&OCFS2_I(inode)->ip_unaligned_aio);
2352 ocfs2_iocb_set_unaligned_aio(iocb); 2344 ocfs2_iocb_set_unaligned_aio(iocb);
2353 } 2345 }
2354 2346
@@ -2375,15 +2367,18 @@ relock:
2375 2367
2376 if (direct_io) { 2368 if (direct_io) {
2377 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, 2369 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
2378 ppos, count, ocount); 2370 count, ocount);
2379 if (written < 0) { 2371 if (written < 0) {
2380 ret = written; 2372 ret = written;
2381 goto out_dio; 2373 goto out_dio;
2382 } 2374 }
2383 } else { 2375 } else {
2376 struct iov_iter from;
2377 iov_iter_init(&from, iov, nr_segs, count, 0);
2384 current->backing_dev_info = file->f_mapping->backing_dev_info; 2378 current->backing_dev_info = file->f_mapping->backing_dev_info;
2385 written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos, 2379 written = generic_perform_write(file, &from, *ppos);
2386 ppos, count, 0); 2380 if (likely(written >= 0))
2381 iocb->ki_pos = *ppos + written;
2387 current->backing_dev_info = NULL; 2382 current->backing_dev_info = NULL;
2388 } 2383 }
2389 2384
@@ -2428,7 +2423,7 @@ out_dio:
2428 2423
2429 if (unaligned_dio) { 2424 if (unaligned_dio) {
2430 ocfs2_iocb_clear_unaligned_aio(iocb); 2425 ocfs2_iocb_clear_unaligned_aio(iocb);
2431 atomic_dec(&OCFS2_I(inode)->ip_unaligned_aio); 2426 mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio);
2432 } 2427 }
2433 2428
2434out: 2429out:
@@ -2645,7 +2640,16 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
2645 case SEEK_SET: 2640 case SEEK_SET:
2646 break; 2641 break;
2647 case SEEK_END: 2642 case SEEK_END:
2648 offset += inode->i_size; 2643 /* SEEK_END requires the OCFS2 inode lock for the file
2644 * because it references the file's size.
2645 */
2646 ret = ocfs2_inode_lock(inode, NULL, 0);
2647 if (ret < 0) {
2648 mlog_errno(ret);
2649 goto out;
2650 }
2651 offset += i_size_read(inode);
2652 ocfs2_inode_unlock(inode, 0);
2649 break; 2653 break;
2650 case SEEK_CUR: 2654 case SEEK_CUR:
2651 if (offset == 0) { 2655 if (offset == 0) {
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index f29a90fde619..437de7f768c6 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -130,6 +130,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
130 struct inode *inode = NULL; 130 struct inode *inode = NULL;
131 struct super_block *sb = osb->sb; 131 struct super_block *sb = osb->sb;
132 struct ocfs2_find_inode_args args; 132 struct ocfs2_find_inode_args args;
133 journal_t *journal = OCFS2_SB(sb)->journal->j_journal;
133 134
134 trace_ocfs2_iget_begin((unsigned long long)blkno, flags, 135 trace_ocfs2_iget_begin((unsigned long long)blkno, flags,
135 sysfile_type); 136 sysfile_type);
@@ -169,6 +170,32 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
169 goto bail; 170 goto bail;
170 } 171 }
171 172
173 /*
174 * Set transaction id's of transactions that have to be committed
175 * to finish f[data]sync. We set them to currently running transaction
176 * as we cannot be sure that the inode or some of its metadata isn't
177 * part of the transaction - the inode could have been reclaimed and
178 * now it is reread from disk.
179 */
180 if (journal) {
181 transaction_t *transaction;
182 tid_t tid;
183 struct ocfs2_inode_info *oi = OCFS2_I(inode);
184
185 read_lock(&journal->j_state_lock);
186 if (journal->j_running_transaction)
187 transaction = journal->j_running_transaction;
188 else
189 transaction = journal->j_committing_transaction;
190 if (transaction)
191 tid = transaction->t_tid;
192 else
193 tid = journal->j_commit_sequence;
194 read_unlock(&journal->j_state_lock);
195 oi->i_sync_tid = tid;
196 oi->i_datasync_tid = tid;
197 }
198
172bail: 199bail:
173 if (!IS_ERR(inode)) { 200 if (!IS_ERR(inode)) {
174 trace_ocfs2_iget_end(inode, 201 trace_ocfs2_iget_end(inode,
@@ -804,11 +831,13 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode)
804 goto bail; 831 goto bail;
805 } 832 }
806 833
807 /* If we're coming from downconvert_thread we can't go into our own 834 /*
808 * voting [hello, deadlock city!], so unforuntately we just 835 * If we're coming from downconvert_thread we can't go into our own
809 * have to skip deleting this guy. That's OK though because 836 * voting [hello, deadlock city!] so we cannot delete the inode. But
810 * the node who's doing the actual deleting should handle it 837 * since we dropped last inode ref when downconverting dentry lock,
811 * anyway. */ 838 * we cannot have the file open and thus the node doing unlink will
839 * take care of deleting the inode.
840 */
812 if (current == osb->dc_task) 841 if (current == osb->dc_task)
813 goto bail; 842 goto bail;
814 843
@@ -822,12 +851,6 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode)
822 goto bail_unlock; 851 goto bail_unlock;
823 } 852 }
824 853
825 /* If we have allowd wipe of this inode for another node, it
826 * will be marked here so we can safely skip it. Recovery will
827 * cleanup any inodes we might inadvertently skip here. */
828 if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE)
829 goto bail_unlock;
830
831 ret = 1; 854 ret = 1;
832bail_unlock: 855bail_unlock:
833 spin_unlock(&oi->ip_lock); 856 spin_unlock(&oi->ip_lock);
@@ -941,7 +964,7 @@ static void ocfs2_cleanup_delete_inode(struct inode *inode,
941 (unsigned long long)OCFS2_I(inode)->ip_blkno, sync_data); 964 (unsigned long long)OCFS2_I(inode)->ip_blkno, sync_data);
942 if (sync_data) 965 if (sync_data)
943 filemap_write_and_wait(inode->i_mapping); 966 filemap_write_and_wait(inode->i_mapping);
944 truncate_inode_pages(&inode->i_data, 0); 967 truncate_inode_pages_final(&inode->i_data);
945} 968}
946 969
947static void ocfs2_delete_inode(struct inode *inode) 970static void ocfs2_delete_inode(struct inode *inode)
@@ -960,8 +983,6 @@ static void ocfs2_delete_inode(struct inode *inode)
960 if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno) 983 if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno)
961 goto bail; 984 goto bail;
962 985
963 dquot_initialize(inode);
964
965 if (!ocfs2_inode_is_valid_to_delete(inode)) { 986 if (!ocfs2_inode_is_valid_to_delete(inode)) {
966 /* It's probably not necessary to truncate_inode_pages 987 /* It's probably not necessary to truncate_inode_pages
967 * here but we do it for safety anyway (it will most 988 * here but we do it for safety anyway (it will most
@@ -970,6 +991,8 @@ static void ocfs2_delete_inode(struct inode *inode)
970 goto bail; 991 goto bail;
971 } 992 }
972 993
994 dquot_initialize(inode);
995
973 /* We want to block signals in delete_inode as the lock and 996 /* We want to block signals in delete_inode as the lock and
974 * messaging paths may return us -ERESTARTSYS. Which would 997 * messaging paths may return us -ERESTARTSYS. Which would
975 * cause us to exit early, resulting in inodes being orphaned 998 * cause us to exit early, resulting in inodes being orphaned
@@ -1057,6 +1080,7 @@ static void ocfs2_clear_inode(struct inode *inode)
1057{ 1080{
1058 int status; 1081 int status;
1059 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1082 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1083 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1060 1084
1061 clear_inode(inode); 1085 clear_inode(inode);
1062 trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno, 1086 trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno,
@@ -1073,9 +1097,9 @@ static void ocfs2_clear_inode(struct inode *inode)
1073 1097
1074 /* Do these before all the other work so that we don't bounce 1098 /* Do these before all the other work so that we don't bounce
1075 * the downconvert thread while waiting to destroy the locks. */ 1099 * the downconvert thread while waiting to destroy the locks. */
1076 ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres); 1100 ocfs2_mark_lockres_freeing(osb, &oi->ip_rw_lockres);
1077 ocfs2_mark_lockres_freeing(&oi->ip_inode_lockres); 1101 ocfs2_mark_lockres_freeing(osb, &oi->ip_inode_lockres);
1078 ocfs2_mark_lockres_freeing(&oi->ip_open_lockres); 1102 ocfs2_mark_lockres_freeing(osb, &oi->ip_open_lockres);
1079 1103
1080 ocfs2_resv_discard(&OCFS2_SB(inode->i_sb)->osb_la_resmap, 1104 ocfs2_resv_discard(&OCFS2_SB(inode->i_sb)->osb_la_resmap,
1081 &oi->ip_la_data_resv); 1105 &oi->ip_la_data_resv);
@@ -1157,7 +1181,7 @@ void ocfs2_evict_inode(struct inode *inode)
1157 (OCFS2_I(inode)->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)) { 1181 (OCFS2_I(inode)->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)) {
1158 ocfs2_delete_inode(inode); 1182 ocfs2_delete_inode(inode);
1159 } else { 1183 } else {
1160 truncate_inode_pages(&inode->i_data, 0); 1184 truncate_inode_pages_final(&inode->i_data);
1161 } 1185 }
1162 ocfs2_clear_inode(inode); 1186 ocfs2_clear_inode(inode);
1163} 1187}
@@ -1260,6 +1284,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
1260 fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); 1284 fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
1261 1285
1262 ocfs2_journal_dirty(handle, bh); 1286 ocfs2_journal_dirty(handle, bh);
1287 ocfs2_update_inode_fsync_trans(handle, inode, 1);
1263leave: 1288leave:
1264 return status; 1289 return status;
1265} 1290}
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 621fc73bf23d..a6c991c0fc98 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -44,7 +44,7 @@ struct ocfs2_inode_info
44 struct rw_semaphore ip_xattr_sem; 44 struct rw_semaphore ip_xattr_sem;
45 45
46 /* Number of outstanding AIO's which are not page aligned */ 46 /* Number of outstanding AIO's which are not page aligned */
47 atomic_t ip_unaligned_aio; 47 struct mutex ip_unaligned_aio;
48 48
49 /* These fields are protected by ip_lock */ 49 /* These fields are protected by ip_lock */
50 spinlock_t ip_lock; 50 spinlock_t ip_lock;
@@ -73,6 +73,13 @@ struct ocfs2_inode_info
73 u32 ip_dir_lock_gen; 73 u32 ip_dir_lock_gen;
74 74
75 struct ocfs2_alloc_reservation ip_la_data_resv; 75 struct ocfs2_alloc_reservation ip_la_data_resv;
76
77 /*
78 * Transactions that contain inode's metadata needed to complete
79 * fsync and fdatasync, respectively.
80 */
81 tid_t i_sync_tid;
82 tid_t i_datasync_tid;
76}; 83};
77 84
78/* 85/*
@@ -84,8 +91,6 @@ struct ocfs2_inode_info
84#define OCFS2_INODE_BITMAP 0x00000004 91#define OCFS2_INODE_BITMAP 0x00000004
85/* This inode has been wiped from disk */ 92/* This inode has been wiped from disk */
86#define OCFS2_INODE_DELETED 0x00000008 93#define OCFS2_INODE_DELETED 0x00000008
87/* Another node is deleting, so our delete is a nop */
88#define OCFS2_INODE_SKIP_DELETE 0x00000010
89/* Has the inode been orphaned on another node? 94/* Has the inode been orphaned on another node?
90 * 95 *
91 * This hints to ocfs2_drop_inode that it should clear i_nlink before 96 * This hints to ocfs2_drop_inode that it should clear i_nlink before
@@ -100,11 +105,11 @@ struct ocfs2_inode_info
100 * rely on ocfs2_delete_inode to sort things out under the proper 105 * rely on ocfs2_delete_inode to sort things out under the proper
101 * cluster locks. 106 * cluster locks.
102 */ 107 */
103#define OCFS2_INODE_MAYBE_ORPHANED 0x00000020 108#define OCFS2_INODE_MAYBE_ORPHANED 0x00000010
104/* Does someone have the file open O_DIRECT */ 109/* Does someone have the file open O_DIRECT */
105#define OCFS2_INODE_OPEN_DIRECT 0x00000040 110#define OCFS2_INODE_OPEN_DIRECT 0x00000020
106/* Tell the inode wipe code it's not in orphan dir */ 111/* Tell the inode wipe code it's not in orphan dir */
107#define OCFS2_INODE_SKIP_ORPHAN_DIR 0x00000080 112#define OCFS2_INODE_SKIP_ORPHAN_DIR 0x00000040
108 113
109static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode) 114static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
110{ 115{
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 8ca3c29accbf..490229f43731 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -413,11 +413,12 @@ int ocfs2_info_handle_freeinode(struct inode *inode,
413 } 413 }
414 414
415 status = ocfs2_info_scan_inode_alloc(osb, inode_alloc, blkno, oifi, i); 415 status = ocfs2_info_scan_inode_alloc(osb, inode_alloc, blkno, oifi, i);
416 if (status < 0)
417 goto bail;
418 416
419 iput(inode_alloc); 417 iput(inode_alloc);
420 inode_alloc = NULL; 418 inode_alloc = NULL;
419
420 if (status < 0)
421 goto bail;
421 } 422 }
422 423
423 o2info_set_request_filled(&oifi->ifi_req); 424 o2info_set_request_filled(&oifi->ifi_req);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 44fc3e530c3d..03ea9314fecd 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -2132,12 +2132,6 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
2132 iter = oi->ip_next_orphan; 2132 iter = oi->ip_next_orphan;
2133 2133
2134 spin_lock(&oi->ip_lock); 2134 spin_lock(&oi->ip_lock);
2135 /* The remote delete code may have set these on the
2136 * assumption that the other node would wipe them
2137 * successfully. If they are still in the node's
2138 * orphan dir, we need to reset that state. */
2139 oi->ip_flags &= ~(OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE);
2140
2141 /* Set the proper information to get us going into 2135 /* Set the proper information to get us going into
2142 * ocfs2_delete_inode. */ 2136 * ocfs2_delete_inode. */
2143 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; 2137 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 9ff4e8cf9d97..7f8cde94abfe 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -626,4 +626,15 @@ static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
626 new_size); 626 new_size);
627} 627}
628 628
629static inline void ocfs2_update_inode_fsync_trans(handle_t *handle,
630 struct inode *inode,
631 int datasync)
632{
633 struct ocfs2_inode_info *oi = OCFS2_I(inode);
634
635 oi->i_sync_tid = handle->h_transaction->t_tid;
636 if (datasync)
637 oi->i_datasync_tid = handle->h_transaction->t_tid;
638}
639
629#endif /* OCFS2_JOURNAL_H */ 640#endif /* OCFS2_JOURNAL_H */
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c
index e57c804069ea..6b6d092b0998 100644
--- a/fs/ocfs2/locks.c
+++ b/fs/ocfs2/locks.c
@@ -82,6 +82,8 @@ static int ocfs2_do_flock(struct file *file, struct inode *inode,
82 } 82 }
83 83
84 ret = flock_lock_file_wait(file, fl); 84 ret = flock_lock_file_wait(file, fl);
85 if (ret)
86 ocfs2_file_unlock(file);
85 87
86out: 88out:
87 mutex_unlock(&fp->fp_mutex); 89 mutex_unlock(&fp->fp_mutex);
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 64c304d668f0..599eb4c4c8be 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -151,6 +151,7 @@ static int __ocfs2_move_extent(handle_t *handle,
151 old_blkno, len); 151 old_blkno, len);
152 } 152 }
153 153
154 ocfs2_update_inode_fsync_trans(handle, inode, 0);
154out: 155out:
155 ocfs2_free_path(path); 156 ocfs2_free_path(path);
156 return ret; 157 return ret;
@@ -690,8 +691,11 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
690 691
691 ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh, 692 ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh,
692 goal_bit, len); 693 goal_bit, len);
693 if (ret) 694 if (ret) {
695 ocfs2_rollback_alloc_dinode_counts(gb_inode, gb_bh, len,
696 le16_to_cpu(gd->bg_chain));
694 mlog_errno(ret); 697 mlog_errno(ret);
698 }
695 699
696 /* 700 /*
697 * Here we should write the new page out first if we are 701 * Here we should write the new page out first if we are
@@ -957,6 +961,7 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
957 inode->i_ctime = CURRENT_TIME; 961 inode->i_ctime = CURRENT_TIME;
958 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 962 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
959 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 963 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
964 ocfs2_update_inode_fsync_trans(handle, inode, 0);
960 965
961 ocfs2_journal_dirty(handle, di_bh); 966 ocfs2_journal_dirty(handle, di_bh);
962 967
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 3683643f3f0e..2060fc398445 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -450,7 +450,6 @@ leave:
450 450
451 brelse(new_fe_bh); 451 brelse(new_fe_bh);
452 brelse(parent_fe_bh); 452 brelse(parent_fe_bh);
453 kfree(si.name);
454 kfree(si.value); 453 kfree(si.value);
455 454
456 ocfs2_free_dir_lookup_result(&lookup); 455 ocfs2_free_dir_lookup_result(&lookup);
@@ -495,6 +494,7 @@ static int __ocfs2_mknod_locked(struct inode *dir,
495 struct ocfs2_dinode *fe = NULL; 494 struct ocfs2_dinode *fe = NULL;
496 struct ocfs2_extent_list *fel; 495 struct ocfs2_extent_list *fel;
497 u16 feat; 496 u16 feat;
497 struct ocfs2_inode_info *oi = OCFS2_I(inode);
498 498
499 *new_fe_bh = NULL; 499 *new_fe_bh = NULL;
500 500
@@ -576,8 +576,8 @@ static int __ocfs2_mknod_locked(struct inode *dir,
576 mlog_errno(status); 576 mlog_errno(status);
577 } 577 }
578 578
579 status = 0; /* error in ocfs2_create_new_inode_locks is not 579 oi->i_sync_tid = handle->h_transaction->t_tid;
580 * critical */ 580 oi->i_datasync_tid = handle->h_transaction->t_tid;
581 581
582leave: 582leave:
583 if (status < 0) { 583 if (status < 0) {
@@ -1855,7 +1855,6 @@ bail:
1855 1855
1856 brelse(new_fe_bh); 1856 brelse(new_fe_bh);
1857 brelse(parent_fe_bh); 1857 brelse(parent_fe_bh);
1858 kfree(si.name);
1859 kfree(si.value); 1858 kfree(si.value);
1860 ocfs2_free_dir_lookup_result(&lookup); 1859 ocfs2_free_dir_lookup_result(&lookup);
1861 if (inode_ac) 1860 if (inode_ac)
@@ -2481,6 +2480,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2481 di->i_orphaned_slot = 0; 2480 di->i_orphaned_slot = 0;
2482 set_nlink(inode, 1); 2481 set_nlink(inode, 1);
2483 ocfs2_set_links_count(di, inode->i_nlink); 2482 ocfs2_set_links_count(di, inode->i_nlink);
2483 ocfs2_update_inode_fsync_trans(handle, inode, 1);
2484 ocfs2_journal_dirty(handle, di_bh); 2484 ocfs2_journal_dirty(handle, di_bh);
2485 2485
2486 status = ocfs2_add_entry(handle, dentry, inode, 2486 status = ocfs2_add_entry(handle, dentry, inode,
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 553f53cc73ae..8d64a97a9d5e 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -30,6 +30,7 @@
30#include <linux/sched.h> 30#include <linux/sched.h>
31#include <linux/wait.h> 31#include <linux/wait.h>
32#include <linux/list.h> 32#include <linux/list.h>
33#include <linux/llist.h>
33#include <linux/rbtree.h> 34#include <linux/rbtree.h>
34#include <linux/workqueue.h> 35#include <linux/workqueue.h>
35#include <linux/kref.h> 36#include <linux/kref.h>
@@ -274,19 +275,16 @@ enum ocfs2_mount_options
274 OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */ 275 OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */
275}; 276};
276 277
277#define OCFS2_OSB_SOFT_RO 0x0001 278#define OCFS2_OSB_SOFT_RO 0x0001
278#define OCFS2_OSB_HARD_RO 0x0002 279#define OCFS2_OSB_HARD_RO 0x0002
279#define OCFS2_OSB_ERROR_FS 0x0004 280#define OCFS2_OSB_ERROR_FS 0x0004
280#define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008 281#define OCFS2_DEFAULT_ATIME_QUANTUM 60
281
282#define OCFS2_DEFAULT_ATIME_QUANTUM 60
283 282
284struct ocfs2_journal; 283struct ocfs2_journal;
285struct ocfs2_slot_info; 284struct ocfs2_slot_info;
286struct ocfs2_recovery_map; 285struct ocfs2_recovery_map;
287struct ocfs2_replay_map; 286struct ocfs2_replay_map;
288struct ocfs2_quota_recovery; 287struct ocfs2_quota_recovery;
289struct ocfs2_dentry_lock;
290struct ocfs2_super 288struct ocfs2_super
291{ 289{
292 struct task_struct *commit_task; 290 struct task_struct *commit_task;
@@ -414,10 +412,9 @@ struct ocfs2_super
414 struct list_head blocked_lock_list; 412 struct list_head blocked_lock_list;
415 unsigned long blocked_lock_count; 413 unsigned long blocked_lock_count;
416 414
417 /* List of dentry locks to release. Anyone can add locks to 415 /* List of dquot structures to drop last reference to */
418 * the list, ocfs2_wq processes the list */ 416 struct llist_head dquot_drop_list;
419 struct ocfs2_dentry_lock *dentry_lock_list; 417 struct work_struct dquot_drop_work;
420 struct work_struct dentry_lock_work;
421 418
422 wait_queue_head_t osb_mount_event; 419 wait_queue_head_t osb_mount_event;
423 420
@@ -449,6 +446,8 @@ struct ocfs2_super
449 /* rb tree root for refcount lock. */ 446 /* rb tree root for refcount lock. */
450 struct rb_root osb_rf_lock_tree; 447 struct rb_root osb_rf_lock_tree;
451 struct ocfs2_refcount_tree *osb_ref_tree_lru; 448 struct ocfs2_refcount_tree *osb_ref_tree_lru;
449
450 struct mutex system_file_mutex;
452}; 451};
453 452
454#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) 453#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info)
@@ -579,18 +578,6 @@ static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb,
579 spin_unlock(&osb->osb_lock); 578 spin_unlock(&osb->osb_lock);
580} 579}
581 580
582
583static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb,
584 unsigned long flag)
585{
586 unsigned long ret;
587
588 spin_lock(&osb->osb_lock);
589 ret = osb->osb_flags & flag;
590 spin_unlock(&osb->osb_lock);
591 return ret;
592}
593
594static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb, 581static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb,
595 int hard) 582 int hard)
596{ 583{
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index d5ab56cbe5c5..f266d67df3c6 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -28,6 +28,7 @@ struct ocfs2_dquot {
28 unsigned int dq_use_count; /* Number of nodes having reference to this entry in global quota file */ 28 unsigned int dq_use_count; /* Number of nodes having reference to this entry in global quota file */
29 s64 dq_origspace; /* Last globally synced space usage */ 29 s64 dq_origspace; /* Last globally synced space usage */
30 s64 dq_originodes; /* Last globally synced inode usage */ 30 s64 dq_originodes; /* Last globally synced inode usage */
31 struct llist_node list; /* Member of list of dquots to drop */
31}; 32};
32 33
33/* Description of one chunk to recover in memory */ 34/* Description of one chunk to recover in memory */
@@ -110,6 +111,7 @@ int ocfs2_read_quota_phys_block(struct inode *inode, u64 p_block,
110int ocfs2_create_local_dquot(struct dquot *dquot); 111int ocfs2_create_local_dquot(struct dquot *dquot);
111int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot); 112int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot);
112int ocfs2_local_write_dquot(struct dquot *dquot); 113int ocfs2_local_write_dquot(struct dquot *dquot);
114void ocfs2_drop_dquot_refs(struct work_struct *work);
113 115
114extern const struct dquot_operations ocfs2_quota_operations; 116extern const struct dquot_operations ocfs2_quota_operations;
115extern struct quota_format_type ocfs2_quota_format; 117extern struct quota_format_type ocfs2_quota_format;
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index d7b5108789e2..b990a62cff50 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -10,6 +10,7 @@
10#include <linux/jiffies.h> 10#include <linux/jiffies.h>
11#include <linux/writeback.h> 11#include <linux/writeback.h>
12#include <linux/workqueue.h> 12#include <linux/workqueue.h>
13#include <linux/llist.h>
13 14
14#include <cluster/masklog.h> 15#include <cluster/masklog.h>
15 16
@@ -679,6 +680,27 @@ static int ocfs2_calc_qdel_credits(struct super_block *sb, int type)
679 OCFS2_INODE_UPDATE_CREDITS; 680 OCFS2_INODE_UPDATE_CREDITS;
680} 681}
681 682
683void ocfs2_drop_dquot_refs(struct work_struct *work)
684{
685 struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
686 dquot_drop_work);
687 struct llist_node *list;
688 struct ocfs2_dquot *odquot, *next_odquot;
689
690 list = llist_del_all(&osb->dquot_drop_list);
691 llist_for_each_entry_safe(odquot, next_odquot, list, list) {
692 /* Drop the reference we acquired in ocfs2_dquot_release() */
693 dqput(&odquot->dq_dquot);
694 }
695}
696
697/*
698 * Called when the last reference to dquot is dropped. If we are called from
699 * downconvert thread, we cannot do all the handling here because grabbing
700 * quota lock could deadlock (the node holding the quota lock could need some
701 * other cluster lock to proceed but with blocked downconvert thread we cannot
702 * release any lock).
703 */
682static int ocfs2_release_dquot(struct dquot *dquot) 704static int ocfs2_release_dquot(struct dquot *dquot)
683{ 705{
684 handle_t *handle; 706 handle_t *handle;
@@ -694,6 +716,19 @@ static int ocfs2_release_dquot(struct dquot *dquot)
694 /* Check whether we are not racing with some other dqget() */ 716 /* Check whether we are not racing with some other dqget() */
695 if (atomic_read(&dquot->dq_count) > 1) 717 if (atomic_read(&dquot->dq_count) > 1)
696 goto out; 718 goto out;
719 /* Running from downconvert thread? Postpone quota processing to wq */
720 if (current == osb->dc_task) {
721 /*
722 * Grab our own reference to dquot and queue it for delayed
723 * dropping. Quota code rechecks after calling
724 * ->release_dquot() and won't free dquot structure.
725 */
726 dqgrab(dquot);
727 /* First entry on list -> queue work */
728 if (llist_add(&OCFS2_DQUOT(dquot)->list, &osb->dquot_drop_list))
729 queue_work(ocfs2_wq, &osb->dquot_drop_work);
730 goto out;
731 }
697 status = ocfs2_lock_global_qf(oinfo, 1); 732 status = ocfs2_lock_global_qf(oinfo, 1);
698 if (status < 0) 733 if (status < 0)
699 goto out; 734 goto out;
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index ca5ce14cbddc..83f1a665ae97 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -496,7 +496,7 @@ static ssize_t ocfs2_max_locking_protocol_show(struct kobject *kobj,
496} 496}
497 497
498static struct kobj_attribute ocfs2_attr_max_locking_protocol = 498static struct kobj_attribute ocfs2_attr_max_locking_protocol =
499 __ATTR(max_locking_protocol, S_IFREG | S_IRUGO, 499 __ATTR(max_locking_protocol, S_IRUGO,
500 ocfs2_max_locking_protocol_show, NULL); 500 ocfs2_max_locking_protocol_show, NULL);
501 501
502static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj, 502static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj,
@@ -528,7 +528,7 @@ static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj,
528} 528}
529 529
530static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins = 530static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins =
531 __ATTR(loaded_cluster_plugins, S_IFREG | S_IRUGO, 531 __ATTR(loaded_cluster_plugins, S_IRUGO,
532 ocfs2_loaded_cluster_plugins_show, NULL); 532 ocfs2_loaded_cluster_plugins_show, NULL);
533 533
534static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj, 534static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj,
@@ -550,7 +550,7 @@ static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj,
550} 550}
551 551
552static struct kobj_attribute ocfs2_attr_active_cluster_plugin = 552static struct kobj_attribute ocfs2_attr_active_cluster_plugin =
553 __ATTR(active_cluster_plugin, S_IFREG | S_IRUGO, 553 __ATTR(active_cluster_plugin, S_IRUGO,
554 ocfs2_active_cluster_plugin_show, NULL); 554 ocfs2_active_cluster_plugin_show, NULL);
555 555
556static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj, 556static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj,
@@ -599,15 +599,29 @@ static ssize_t ocfs2_cluster_stack_store(struct kobject *kobj,
599 599
600 600
601static struct kobj_attribute ocfs2_attr_cluster_stack = 601static struct kobj_attribute ocfs2_attr_cluster_stack =
602 __ATTR(cluster_stack, S_IFREG | S_IRUGO | S_IWUSR, 602 __ATTR(cluster_stack, S_IRUGO | S_IWUSR,
603 ocfs2_cluster_stack_show, 603 ocfs2_cluster_stack_show,
604 ocfs2_cluster_stack_store); 604 ocfs2_cluster_stack_store);
605 605
606
607
608static ssize_t ocfs2_dlm_recover_show(struct kobject *kobj,
609 struct kobj_attribute *attr,
610 char *buf)
611{
612 return snprintf(buf, PAGE_SIZE, "1\n");
613}
614
615static struct kobj_attribute ocfs2_attr_dlm_recover_support =
616 __ATTR(dlm_recover_callback_support, S_IRUGO,
617 ocfs2_dlm_recover_show, NULL);
618
606static struct attribute *ocfs2_attrs[] = { 619static struct attribute *ocfs2_attrs[] = {
607 &ocfs2_attr_max_locking_protocol.attr, 620 &ocfs2_attr_max_locking_protocol.attr,
608 &ocfs2_attr_loaded_cluster_plugins.attr, 621 &ocfs2_attr_loaded_cluster_plugins.attr,
609 &ocfs2_attr_active_cluster_plugin.attr, 622 &ocfs2_attr_active_cluster_plugin.attr,
610 &ocfs2_attr_cluster_stack.attr, 623 &ocfs2_attr_cluster_stack.attr,
624 &ocfs2_attr_dlm_recover_support.attr,
611 NULL, 625 NULL,
612}; 626};
613 627
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 47ae2663a6f5..0cb889a17ae1 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -771,6 +771,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
771 spin_unlock(&OCFS2_I(alloc_inode)->ip_lock); 771 spin_unlock(&OCFS2_I(alloc_inode)->ip_lock);
772 i_size_write(alloc_inode, le64_to_cpu(fe->i_size)); 772 i_size_write(alloc_inode, le64_to_cpu(fe->i_size));
773 alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode); 773 alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode);
774 ocfs2_update_inode_fsync_trans(handle, alloc_inode, 0);
774 775
775 status = 0; 776 status = 0;
776 777
@@ -1607,6 +1608,21 @@ out:
1607 return ret; 1608 return ret;
1608} 1609}
1609 1610
1611void ocfs2_rollback_alloc_dinode_counts(struct inode *inode,
1612 struct buffer_head *di_bh,
1613 u32 num_bits,
1614 u16 chain)
1615{
1616 u32 tmp_used;
1617 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
1618 struct ocfs2_chain_list *cl;
1619
1620 cl = (struct ocfs2_chain_list *)&di->id2.i_chain;
1621 tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
1622 di->id1.bitmap1.i_used = cpu_to_le32(tmp_used - num_bits);
1623 le32_add_cpu(&cl->cl_recs[chain].c_free, num_bits);
1624}
1625
1610static int ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result *res, 1626static int ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result *res,
1611 struct ocfs2_extent_rec *rec, 1627 struct ocfs2_extent_rec *rec,
1612 struct ocfs2_chain_list *cl) 1628 struct ocfs2_chain_list *cl)
@@ -1707,8 +1723,12 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1707 1723
1708 ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh, 1724 ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
1709 res->sr_bit_offset, res->sr_bits); 1725 res->sr_bit_offset, res->sr_bits);
1710 if (ret < 0) 1726 if (ret < 0) {
1727 ocfs2_rollback_alloc_dinode_counts(alloc_inode, ac->ac_bh,
1728 res->sr_bits,
1729 le16_to_cpu(gd->bg_chain));
1711 mlog_errno(ret); 1730 mlog_errno(ret);
1731 }
1712 1732
1713out_loc_only: 1733out_loc_only:
1714 *bits_left = le16_to_cpu(gd->bg_free_bits_count); 1734 *bits_left = le16_to_cpu(gd->bg_free_bits_count);
@@ -1838,6 +1858,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1838 res->sr_bit_offset, 1858 res->sr_bit_offset,
1839 res->sr_bits); 1859 res->sr_bits);
1840 if (status < 0) { 1860 if (status < 0) {
1861 ocfs2_rollback_alloc_dinode_counts(alloc_inode,
1862 ac->ac_bh, res->sr_bits, chain);
1841 mlog_errno(status); 1863 mlog_errno(status);
1842 goto bail; 1864 goto bail;
1843 } 1865 }
@@ -2091,7 +2113,7 @@ int ocfs2_find_new_inode_loc(struct inode *dir,
2091 2113
2092 ac->ac_find_loc_priv = res; 2114 ac->ac_find_loc_priv = res;
2093 *fe_blkno = res->sr_blkno; 2115 *fe_blkno = res->sr_blkno;
2094 2116 ocfs2_update_inode_fsync_trans(handle, dir, 0);
2095out: 2117out:
2096 if (handle) 2118 if (handle)
2097 ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle); 2119 ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle);
@@ -2149,6 +2171,8 @@ int ocfs2_claim_new_inode_at_loc(handle_t *handle,
2149 res->sr_bit_offset, 2171 res->sr_bit_offset,
2150 res->sr_bits); 2172 res->sr_bits);
2151 if (ret < 0) { 2173 if (ret < 0) {
2174 ocfs2_rollback_alloc_dinode_counts(ac->ac_inode,
2175 ac->ac_bh, res->sr_bits, chain);
2152 mlog_errno(ret); 2176 mlog_errno(ret);
2153 goto out; 2177 goto out;
2154 } 2178 }
@@ -2870,6 +2894,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
2870 status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0); 2894 status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0);
2871 if (status < 0) { 2895 if (status < 0) {
2872 mutex_unlock(&inode_alloc_inode->i_mutex); 2896 mutex_unlock(&inode_alloc_inode->i_mutex);
2897 iput(inode_alloc_inode);
2873 mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n", 2898 mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n",
2874 (u32)suballoc_slot, status); 2899 (u32)suballoc_slot, status);
2875 goto bail; 2900 goto bail;
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index 218d8036b3e7..2d2501767c0c 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -91,6 +91,10 @@ int ocfs2_alloc_dinode_update_counts(struct inode *inode,
91 struct buffer_head *di_bh, 91 struct buffer_head *di_bh,
92 u32 num_bits, 92 u32 num_bits,
93 u16 chain); 93 u16 chain);
94void ocfs2_rollback_alloc_dinode_counts(struct inode *inode,
95 struct buffer_head *di_bh,
96 u32 num_bits,
97 u16 chain);
94int ocfs2_block_group_set_bits(handle_t *handle, 98int ocfs2_block_group_set_bits(handle_t *handle,
95 struct inode *alloc_inode, 99 struct inode *alloc_inode,
96 struct ocfs2_group_desc *bg, 100 struct ocfs2_group_desc *bg,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 49d84f80f36c..a7cdd56f4c79 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -561,6 +561,9 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb)
561 if (!oi) 561 if (!oi)
562 return NULL; 562 return NULL;
563 563
564 oi->i_sync_tid = 0;
565 oi->i_datasync_tid = 0;
566
564 jbd2_journal_init_jbd_inode(&oi->ip_jinode, &oi->vfs_inode); 567 jbd2_journal_init_jbd_inode(&oi->ip_jinode, &oi->vfs_inode);
565 return &oi->vfs_inode; 568 return &oi->vfs_inode;
566} 569}
@@ -631,6 +634,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
631 struct ocfs2_super *osb = OCFS2_SB(sb); 634 struct ocfs2_super *osb = OCFS2_SB(sb);
632 u32 tmp; 635 u32 tmp;
633 636
637 sync_filesystem(sb);
638
634 if (!ocfs2_parse_options(sb, data, &parsed_options, 1) || 639 if (!ocfs2_parse_options(sb, data, &parsed_options, 1) ||
635 !ocfs2_check_set_options(sb, &parsed_options)) { 640 !ocfs2_check_set_options(sb, &parsed_options)) {
636 ret = -EINVAL; 641 ret = -EINVAL;
@@ -1238,30 +1243,11 @@ static struct dentry *ocfs2_mount(struct file_system_type *fs_type,
1238 return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super); 1243 return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super);
1239} 1244}
1240 1245
1241static void ocfs2_kill_sb(struct super_block *sb)
1242{
1243 struct ocfs2_super *osb = OCFS2_SB(sb);
1244
1245 /* Failed mount? */
1246 if (!osb || atomic_read(&osb->vol_state) == VOLUME_DISABLED)
1247 goto out;
1248
1249 /* Prevent further queueing of inode drop events */
1250 spin_lock(&dentry_list_lock);
1251 ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED);
1252 spin_unlock(&dentry_list_lock);
1253 /* Wait for work to finish and/or remove it */
1254 cancel_work_sync(&osb->dentry_lock_work);
1255out:
1256 kill_block_super(sb);
1257}
1258
1259static struct file_system_type ocfs2_fs_type = { 1246static struct file_system_type ocfs2_fs_type = {
1260 .owner = THIS_MODULE, 1247 .owner = THIS_MODULE,
1261 .name = "ocfs2", 1248 .name = "ocfs2",
1262 .mount = ocfs2_mount, 1249 .mount = ocfs2_mount,
1263 .kill_sb = ocfs2_kill_sb, 1250 .kill_sb = kill_block_super,
1264
1265 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, 1251 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
1266 .next = NULL 1252 .next = NULL
1267}; 1253};
@@ -1612,14 +1598,9 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
1612 return 0; 1598 return 0;
1613} 1599}
1614 1600
1615wait_queue_head_t ocfs2__ioend_wq[OCFS2_IOEND_WQ_HASH_SZ];
1616
1617static int __init ocfs2_init(void) 1601static int __init ocfs2_init(void)
1618{ 1602{
1619 int status, i; 1603 int status;
1620
1621 for (i = 0; i < OCFS2_IOEND_WQ_HASH_SZ; i++)
1622 init_waitqueue_head(&ocfs2__ioend_wq[i]);
1623 1604
1624 status = init_ocfs2_uptodate_cache(); 1605 status = init_ocfs2_uptodate_cache();
1625 if (status < 0) 1606 if (status < 0)
@@ -1761,7 +1742,7 @@ static void ocfs2_inode_init_once(void *data)
1761 ocfs2_extent_map_init(&oi->vfs_inode); 1742 ocfs2_extent_map_init(&oi->vfs_inode);
1762 INIT_LIST_HEAD(&oi->ip_io_markers); 1743 INIT_LIST_HEAD(&oi->ip_io_markers);
1763 oi->ip_dir_start_lookup = 0; 1744 oi->ip_dir_start_lookup = 0;
1764 atomic_set(&oi->ip_unaligned_aio, 0); 1745 mutex_init(&oi->ip_unaligned_aio);
1765 init_rwsem(&oi->ip_alloc_sem); 1746 init_rwsem(&oi->ip_alloc_sem);
1766 init_rwsem(&oi->ip_xattr_sem); 1747 init_rwsem(&oi->ip_xattr_sem);
1767 mutex_init(&oi->ip_io_mutex); 1748 mutex_init(&oi->ip_io_mutex);
@@ -1932,17 +1913,16 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1932 1913
1933 debugfs_remove(osb->osb_ctxt); 1914 debugfs_remove(osb->osb_ctxt);
1934 1915
1935 /*
1936 * Flush inode dropping work queue so that deletes are
1937 * performed while the filesystem is still working
1938 */
1939 ocfs2_drop_all_dl_inodes(osb);
1940
1941 /* Orphan scan should be stopped as early as possible */ 1916 /* Orphan scan should be stopped as early as possible */
1942 ocfs2_orphan_scan_stop(osb); 1917 ocfs2_orphan_scan_stop(osb);
1943 1918
1944 ocfs2_disable_quotas(osb); 1919 ocfs2_disable_quotas(osb);
1945 1920
1921 /* All dquots should be freed by now */
1922 WARN_ON(!llist_empty(&osb->dquot_drop_list));
1923 /* Wait for worker to be done with the work structure in osb */
1924 cancel_work_sync(&osb->dquot_drop_work);
1925
1946 ocfs2_shutdown_local_alloc(osb); 1926 ocfs2_shutdown_local_alloc(osb);
1947 1927
1948 /* This will disable recovery and flush any recovery work. */ 1928 /* This will disable recovery and flush any recovery work. */
@@ -2077,7 +2057,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
2077 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 2057 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
2078 struct inode *inode = NULL; 2058 struct inode *inode = NULL;
2079 struct ocfs2_journal *journal; 2059 struct ocfs2_journal *journal;
2080 __le32 uuid_net_key;
2081 struct ocfs2_super *osb; 2060 struct ocfs2_super *osb;
2082 u64 total_blocks; 2061 u64 total_blocks;
2083 2062
@@ -2123,6 +2102,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
2123 spin_lock_init(&osb->osb_xattr_lock); 2102 spin_lock_init(&osb->osb_xattr_lock);
2124 ocfs2_init_steal_slots(osb); 2103 ocfs2_init_steal_slots(osb);
2125 2104
2105 mutex_init(&osb->system_file_mutex);
2106
2126 atomic_set(&osb->alloc_stats.moves, 0); 2107 atomic_set(&osb->alloc_stats.moves, 0);
2127 atomic_set(&osb->alloc_stats.local_data, 0); 2108 atomic_set(&osb->alloc_stats.local_data, 0);
2128 atomic_set(&osb->alloc_stats.bitmap_data, 0); 2109 atomic_set(&osb->alloc_stats.bitmap_data, 0);
@@ -2276,8 +2257,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
2276 INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery); 2257 INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
2277 journal->j_state = OCFS2_JOURNAL_FREE; 2258 journal->j_state = OCFS2_JOURNAL_FREE;
2278 2259
2279 INIT_WORK(&osb->dentry_lock_work, ocfs2_drop_dl_inodes); 2260 INIT_WORK(&osb->dquot_drop_work, ocfs2_drop_dquot_refs);
2280 osb->dentry_lock_list = NULL; 2261 init_llist_head(&osb->dquot_drop_list);
2281 2262
2282 /* get some pseudo constants for clustersize bits */ 2263 /* get some pseudo constants for clustersize bits */
2283 osb->s_clustersize_bits = 2264 osb->s_clustersize_bits =
@@ -2311,8 +2292,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
2311 goto bail; 2292 goto bail;
2312 } 2293 }
2313 2294
2314 memcpy(&uuid_net_key, di->id2.i_super.s_uuid, sizeof(uuid_net_key));
2315
2316 strncpy(osb->vol_label, di->id2.i_super.s_label, 63); 2295 strncpy(osb->vol_label, di->id2.i_super.s_label, 63);
2317 osb->vol_label[63] = '\0'; 2296 osb->vol_label[63] = '\0';
2318 osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno); 2297 osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno);
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index f053688d22a3..af155c183123 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -113,9 +113,11 @@ struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb,
113 } else 113 } else
114 arr = get_local_system_inode(osb, type, slot); 114 arr = get_local_system_inode(osb, type, slot);
115 115
116 mutex_lock(&osb->system_file_mutex);
116 if (arr && ((inode = *arr) != NULL)) { 117 if (arr && ((inode = *arr) != NULL)) {
117 /* get a ref in addition to the array ref */ 118 /* get a ref in addition to the array ref */
118 inode = igrab(inode); 119 inode = igrab(inode);
120 mutex_unlock(&osb->system_file_mutex);
119 BUG_ON(!inode); 121 BUG_ON(!inode);
120 122
121 return inode; 123 return inode;
@@ -129,6 +131,7 @@ struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb,
129 *arr = igrab(inode); 131 *arr = igrab(inode);
130 BUG_ON(!*arr); 132 BUG_ON(!*arr);
131 } 133 }
134 mutex_unlock(&osb->system_file_mutex);
132 return inode; 135 return inode;
133} 136}
134 137
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 185fa3b7f962..016f01df3825 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -369,7 +369,7 @@ static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
369 * them fully. 369 * them fully.
370 */ 370 */
371static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, 371static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
372 u64 xb_blkno) 372 u64 xb_blkno, int new)
373{ 373{
374 int i, rc = 0; 374 int i, rc = 0;
375 375
@@ -383,9 +383,16 @@ static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
383 } 383 }
384 384
385 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 385 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
386 bucket->bu_bhs[i])) 386 bucket->bu_bhs[i])) {
387 ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode), 387 if (new)
388 bucket->bu_bhs[i]); 388 ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
389 bucket->bu_bhs[i]);
390 else {
391 set_buffer_uptodate(bucket->bu_bhs[i]);
392 ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
393 bucket->bu_bhs[i]);
394 }
395 }
389 } 396 }
390 397
391 if (rc) 398 if (rc)
@@ -2602,6 +2609,7 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
2602 oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); 2609 oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
2603 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2610 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2604 spin_unlock(&oi->ip_lock); 2611 spin_unlock(&oi->ip_lock);
2612 ocfs2_update_inode_fsync_trans(handle, inode, 0);
2605 2613
2606 ocfs2_journal_dirty(handle, di_bh); 2614 ocfs2_journal_dirty(handle, di_bh);
2607out_commit: 2615out_commit:
@@ -3200,8 +3208,15 @@ meta_guess:
3200 clusters_add += 1; 3208 clusters_add += 1;
3201 } 3209 }
3202 } else { 3210 } else {
3203 meta_add += 1;
3204 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; 3211 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
3212 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3213 struct ocfs2_extent_list *el = &def_xv.xv.xr_list;
3214 meta_add += ocfs2_extend_meta_needed(el);
3215 credits += ocfs2_calc_extend_credits(inode->i_sb,
3216 el);
3217 } else {
3218 meta_add += 1;
3219 }
3205 } 3220 }
3206out: 3221out:
3207 if (clusters_need) 3222 if (clusters_need)
@@ -3614,6 +3629,7 @@ int ocfs2_xattr_set(struct inode *inode,
3614 } 3629 }
3615 3630
3616 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); 3631 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3632 ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0);
3617 3633
3618 ocfs2_commit_trans(osb, ctxt.handle); 3634 ocfs2_commit_trans(osb, ctxt.handle);
3619 3635
@@ -4294,7 +4310,7 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
4294 4310
4295 trace_ocfs2_xattr_create_index_block((unsigned long long)blkno); 4311 trace_ocfs2_xattr_create_index_block((unsigned long long)blkno);
4296 4312
4297 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno); 4313 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1);
4298 if (ret) { 4314 if (ret) {
4299 mlog_errno(ret); 4315 mlog_errno(ret);
4300 goto out; 4316 goto out;
@@ -4638,7 +4654,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
4638 * Even if !new_bucket_head, we're overwriting t_bucket. Thus, 4654 * Even if !new_bucket_head, we're overwriting t_bucket. Thus,
4639 * there's no need to read it. 4655 * there's no need to read it.
4640 */ 4656 */
4641 ret = ocfs2_init_xattr_bucket(t_bucket, new_blk); 4657 ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head);
4642 if (ret) { 4658 if (ret) {
4643 mlog_errno(ret); 4659 mlog_errno(ret);
4644 goto out; 4660 goto out;
@@ -4804,7 +4820,7 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
4804 * Even if !t_is_new, we're overwriting t_bucket. Thus, 4820 * Even if !t_is_new, we're overwriting t_bucket. Thus,
4805 * there's no need to read it. 4821 * there's no need to read it.
4806 */ 4822 */
4807 ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno); 4823 ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new);
4808 if (ret) 4824 if (ret)
4809 goto out; 4825 goto out;
4810 4826
@@ -5476,6 +5492,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
5476 ret = ocfs2_truncate_log_append(osb, handle, blkno, len); 5492 ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
5477 if (ret) 5493 if (ret)
5478 mlog_errno(ret); 5494 mlog_errno(ret);
5495 ocfs2_update_inode_fsync_trans(handle, inode, 0);
5479 5496
5480out_commit: 5497out_commit:
5481 ocfs2_commit_trans(osb, handle); 5498 ocfs2_commit_trans(osb, handle);
@@ -6830,7 +6847,7 @@ static int ocfs2_reflink_xattr_bucket(handle_t *handle,
6830 break; 6847 break;
6831 } 6848 }
6832 6849
6833 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno); 6850 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1);
6834 if (ret) { 6851 if (ret) {
6835 mlog_errno(ret); 6852 mlog_errno(ret);
6836 break; 6853 break;