aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2011-06-24 14:29:43 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2011-07-20 20:47:46 -0400
commitbd5fe6c5eb9c548d7f07fe8f89a150bb6705e8e3 (patch)
treeef5341c7747f809aec7ae233f6e3ef90af39be5f /mm
parentf9b5570d7fdedff32a2e78102bfb54cd1b12b289 (diff)
fs: kill i_alloc_sem
i_alloc_sem is a rather special rw_semaphore. It's the last one that may be released by a non-owner, and it's write side is always mirrored by real exclusion. It's intended use it to wait for all pending direct I/O requests to finish before starting a truncate. Replace it with a hand-grown construct: - exclusion for truncates is already guaranteed by i_mutex, so it can simply fall way - the reader side is replaced by an i_dio_count member in struct inode that counts the number of pending direct I/O requests. Truncate can't proceed as long as it's non-zero - when i_dio_count reaches non-zero we wake up a pending truncate using wake_up_bit on a new bit in i_flags - new references to i_dio_count can't appear while we are waiting for it to read zero because the direct I/O count always needs i_mutex (or an equivalent like XFS's i_iolock) for starting a new operation. This scheme is much simpler, and saves the space of a spinlock_t and a struct list_head in struct inode (typically 160 bits on a non-debug 64-bit system). Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c3
-rw-r--r--mm/madvise.c2
-rw-r--r--mm/rmap.c1
-rw-r--r--mm/truncate.c3
4 files changed, 2 insertions, 7 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index a8251a8d345..f820e600f1a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -78,9 +78,6 @@
78 * ->i_mutex (generic_file_buffered_write) 78 * ->i_mutex (generic_file_buffered_write)
79 * ->mmap_sem (fault_in_pages_readable->do_page_fault) 79 * ->mmap_sem (fault_in_pages_readable->do_page_fault)
80 * 80 *
81 * ->i_mutex
82 * ->i_alloc_sem (various)
83 *
84 * inode_wb_list_lock 81 * inode_wb_list_lock
85 * sb_lock (fs/fs-writeback.c) 82 * sb_lock (fs/fs-writeback.c)
86 * ->mapping->tree_lock (__sync_single_inode) 83 * ->mapping->tree_lock (__sync_single_inode)
diff --git a/mm/madvise.c b/mm/madvise.c
index 2221491ed50..74bf193eff0 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -218,7 +218,7 @@ static long madvise_remove(struct vm_area_struct *vma,
218 endoff = (loff_t)(end - vma->vm_start - 1) 218 endoff = (loff_t)(end - vma->vm_start - 1)
219 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 219 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
220 220
221 /* vmtruncate_range needs to take i_mutex and i_alloc_sem */ 221 /* vmtruncate_range needs to take i_mutex */
222 up_read(&current->mm->mmap_sem); 222 up_read(&current->mm->mmap_sem);
223 error = vmtruncate_range(mapping->host, offset, endoff); 223 error = vmtruncate_range(mapping->host, offset, endoff);
224 down_read(&current->mm->mmap_sem); 224 down_read(&current->mm->mmap_sem);
diff --git a/mm/rmap.c b/mm/rmap.c
index 23295f65ae4..2540a39eea4 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -21,7 +21,6 @@
21 * Lock ordering in mm: 21 * Lock ordering in mm:
22 * 22 *
23 * inode->i_mutex (while writing or truncating, not reading or faulting) 23 * inode->i_mutex (while writing or truncating, not reading or faulting)
24 * inode->i_alloc_sem (vmtruncate_range)
25 * mm->mmap_sem 24 * mm->mmap_sem
26 * page->flags PG_locked (lock_page) 25 * page->flags PG_locked (lock_page)
27 * mapping->i_mmap_mutex 26 * mapping->i_mmap_mutex
diff --git a/mm/truncate.c b/mm/truncate.c
index e13f22efaad..003c6c685fc 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -622,12 +622,11 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
622 return -ENOSYS; 622 return -ENOSYS;
623 623
624 mutex_lock(&inode->i_mutex); 624 mutex_lock(&inode->i_mutex);
625 down_write(&inode->i_alloc_sem); 625 inode_dio_wait(inode);
626 unmap_mapping_range(mapping, offset, (end - offset), 1); 626 unmap_mapping_range(mapping, offset, (end - offset), 1);
627 inode->i_op->truncate_range(inode, offset, end); 627 inode->i_op->truncate_range(inode, offset, end);
628 /* unmap again to remove racily COWed private pages */ 628 /* unmap again to remove racily COWed private pages */
629 unmap_mapping_range(mapping, offset, (end - offset), 1); 629 unmap_mapping_range(mapping, offset, (end - offset), 1);
630 up_write(&inode->i_alloc_sem);
631 mutex_unlock(&inode->i_mutex); 630 mutex_unlock(&inode->i_mutex);
632 631
633 return 0; 632 return 0;