aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2011-10-31 10:56:32 -0400
committerTheodore Ts'o <tytso@mit.edu>2011-10-31 10:56:32 -0400
commitb82e384c7bb9a19036b4daf58fa216df7cd48aa0 (patch)
tree42bde122000b3bf3adf7eaa0328e0fdafdb3b5fd /fs/ext4
parent4e298021216727cc27017c5032ade86167c66256 (diff)
ext4: optimize locking for end_io extent conversion
Now that we are doing the locking correctly, we need to grab the i_completed_io_lock() twice per end_io. We can clean this up by removing the structure from the i_complted_io_list, and use this as the locking mechanism to prevent ext4_flush_completed_IO() racing against ext4_end_io_work(), instead of clearing the EXT4_IO_END_UNWRITTEN in io->flag. In addition, if the ext4_convert_unwritten_extents() returns an error, we no longer keep the end_io structure on the linked list. This doesn't help, because it tends to lock up the file system and wedges the system. That's one way to call attention to the problem, but it doesn't help the overall robustness of the system. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/fsync.c5
-rw-r--r--fs/ext4/page-io.c37
2 files changed, 13 insertions, 29 deletions
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 851ac5b3cec..00a2cb753ef 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -88,6 +88,7 @@ int ext4_flush_completed_IO(struct inode *inode)
88 while (!list_empty(&ei->i_completed_io_list)){ 88 while (!list_empty(&ei->i_completed_io_list)){
89 io = list_entry(ei->i_completed_io_list.next, 89 io = list_entry(ei->i_completed_io_list.next,
90 ext4_io_end_t, list); 90 ext4_io_end_t, list);
91 list_del_init(&io->list);
91 /* 92 /*
92 * Calling ext4_end_io_nolock() to convert completed 93 * Calling ext4_end_io_nolock() to convert completed
93 * IO to written. 94 * IO to written.
@@ -104,11 +105,9 @@ int ext4_flush_completed_IO(struct inode *inode)
104 */ 105 */
105 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 106 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
106 ret = ext4_end_io_nolock(io); 107 ret = ext4_end_io_nolock(io);
107 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
108 if (ret < 0) 108 if (ret < 0)
109 ret2 = ret; 109 ret2 = ret;
110 else 110 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
111 list_del_init(&io->list);
112 } 111 }
113 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 112 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
114 return (ret2 < 0) ? ret2 : 0; 113 return (ret2 < 0) ? ret2 : 0;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 4fa1d709b60..9eebf44646f 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -99,28 +99,21 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
99 "list->prev 0x%p\n", 99 "list->prev 0x%p\n",
100 io, inode->i_ino, io->list.next, io->list.prev); 100 io, inode->i_ino, io->list.next, io->list.prev);
101 101
102 if (!(io->flag & EXT4_IO_END_UNWRITTEN))
103 return ret;
104
105 ret = ext4_convert_unwritten_extents(inode, offset, size); 102 ret = ext4_convert_unwritten_extents(inode, offset, size);
106 if (ret < 0) { 103 if (ret < 0) {
107 printk(KERN_EMERG "%s: failed to convert unwritten " 104 ext4_msg(inode->i_sb, KERN_EMERG,
108 "extents to written extents, error is %d " 105 "failed to convert unwritten extents to written "
109 "io is still on inode %lu aio dio list\n", 106 "extents -- potential data loss! "
110 __func__, ret, inode->i_ino); 107 "(inode %lu, offset %llu, size %zd, error %d)",
111 return ret; 108 inode->i_ino, offset, size, ret);
112 } 109 }
113 110
114 if (io->iocb) 111 if (io->iocb)
115 aio_complete(io->iocb, io->result, 0); 112 aio_complete(io->iocb, io->result, 0);
116 /* clear the DIO AIO unwritten flag */
117 if (io->flag & EXT4_IO_END_UNWRITTEN) {
118 io->flag &= ~EXT4_IO_END_UNWRITTEN;
119 /* Wake up anyone waiting on unwritten extent conversion */
120 if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten))
121 wake_up_all(ext4_ioend_wq(io->inode));
122 }
123 113
114 /* Wake up anyone waiting on unwritten extent conversion */
115 if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten))
116 wake_up_all(ext4_ioend_wq(io->inode));
124 return ret; 117 return ret;
125} 118}
126 119
@@ -133,16 +126,15 @@ static void ext4_end_io_work(struct work_struct *work)
133 struct inode *inode = io->inode; 126 struct inode *inode = io->inode;
134 struct ext4_inode_info *ei = EXT4_I(inode); 127 struct ext4_inode_info *ei = EXT4_I(inode);
135 unsigned long flags; 128 unsigned long flags;
136 int ret;
137 129
138 spin_lock_irqsave(&ei->i_completed_io_lock, flags); 130 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
139 if (list_empty(&io->list)) { 131 if (list_empty(&io->list)) {
140 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 132 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
141 goto free; 133 goto free;
142 } 134 }
143 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
144 135
145 if (!mutex_trylock(&inode->i_mutex)) { 136 if (!mutex_trylock(&inode->i_mutex)) {
137 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
146 /* 138 /*
147 * Requeue the work instead of waiting so that the work 139 * Requeue the work instead of waiting so that the work
148 * items queued after this can be processed. 140 * items queued after this can be processed.
@@ -159,16 +151,9 @@ static void ext4_end_io_work(struct work_struct *work)
159 io->flag |= EXT4_IO_END_QUEUED; 151 io->flag |= EXT4_IO_END_QUEUED;
160 return; 152 return;
161 } 153 }
162 ret = ext4_end_io_nolock(io); 154 list_del_init(&io->list);
163 if (ret < 0) {
164 mutex_unlock(&inode->i_mutex);
165 return;
166 }
167
168 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
169 if (!list_empty(&io->list))
170 list_del_init(&io->list);
171 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 155 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
156 (void) ext4_end_io_nolock(io);
172 mutex_unlock(&inode->i_mutex); 157 mutex_unlock(&inode->i_mutex);
173free: 158free:
174 ext4_free_io_end(io); 159 ext4_free_io_end(io);