aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2012-03-21 19:34:08 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-21 20:54:58 -0400
commita05b0855fd15504972dba2358e5faa172a1e50ba (patch)
treef4cba4ef12d888ec472714f67ef7bcec318a5191 /fs
parentf5bf18fa22f8c41a13eb8762c7373eb3a93a7333 (diff)
hugetlbfs: avoid taking i_mutex from hugetlbfs_read()
Taking i_mutex in hugetlbfs_read() can result in deadlock with mmap as explained below Thread A: read() on hugetlbfs hugetlbfs_read() called i_mutex grabbed hugetlbfs_read_actor() called __copy_to_user() called page fault is triggered Thread B, sharing address space with A: mmap() the same file ->mmap_sem is grabbed on task_B->mm->mmap_sem hugetlbfs_file_mmap() is called attempt to grab ->i_mutex and block waiting for A to give it up Thread A: pagefault handled blocked on attempt to grab task_A->mm->mmap_sem, which happens to be the same thing as task_B->mm->mmap_sem. Block waiting for B to give it up. AFAIU the i_mutex locking was added to hugetlbfs_read() as per http://lkml.indiana.edu/hypermail/linux/kernel/0707.2/3066.html to take care of the race between truncate and read. This patch fixes this by looking at page->mapping under lock_page() (find_lock_page()) to ensure that the inode didn't get truncated in the range during a parallel read. Ideally we can extend the patch to make sure we don't increase i_size in mmap. But that will break userspace, because applications will now have to use truncate(2) to increase i_size in hugetlbfs. Based on the original patch from Hillf Danton. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Cc: Hillf Danton <dhillf@gmail.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Hugh Dickins <hughd@google.com> Cc: <stable@kernel.org> [everything after 2007 :)] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/hugetlbfs/inode.c25
1 files changed, 9 insertions, 16 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b7bc7868c7b5..19654cfe780b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -245,17 +245,10 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
245 loff_t isize; 245 loff_t isize;
246 ssize_t retval = 0; 246 ssize_t retval = 0;
247 247
248 mutex_lock(&inode->i_mutex);
249
250 /* validate length */ 248 /* validate length */
251 if (len == 0) 249 if (len == 0)
252 goto out; 250 goto out;
253 251
254 isize = i_size_read(inode);
255 if (!isize)
256 goto out;
257
258 end_index = (isize - 1) >> huge_page_shift(h);
259 for (;;) { 252 for (;;) {
260 struct page *page; 253 struct page *page;
261 unsigned long nr, ret; 254 unsigned long nr, ret;
@@ -263,18 +256,21 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
263 256
264 /* nr is the maximum number of bytes to copy from this page */ 257 /* nr is the maximum number of bytes to copy from this page */
265 nr = huge_page_size(h); 258 nr = huge_page_size(h);
259 isize = i_size_read(inode);
260 if (!isize)
261 goto out;
262 end_index = (isize - 1) >> huge_page_shift(h);
266 if (index >= end_index) { 263 if (index >= end_index) {
267 if (index > end_index) 264 if (index > end_index)
268 goto out; 265 goto out;
269 nr = ((isize - 1) & ~huge_page_mask(h)) + 1; 266 nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
270 if (nr <= offset) { 267 if (nr <= offset)
271 goto out; 268 goto out;
272 }
273 } 269 }
274 nr = nr - offset; 270 nr = nr - offset;
275 271
276 /* Find the page */ 272 /* Find the page */
277 page = find_get_page(mapping, index); 273 page = find_lock_page(mapping, index);
278 if (unlikely(page == NULL)) { 274 if (unlikely(page == NULL)) {
279 /* 275 /*
280 * We have a HOLE, zero out the user-buffer for the 276 * We have a HOLE, zero out the user-buffer for the
@@ -286,17 +282,18 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
286 else 282 else
287 ra = 0; 283 ra = 0;
288 } else { 284 } else {
285 unlock_page(page);
286
289 /* 287 /*
290 * We have the page, copy it to user space buffer. 288 * We have the page, copy it to user space buffer.
291 */ 289 */
292 ra = hugetlbfs_read_actor(page, offset, buf, len, nr); 290 ra = hugetlbfs_read_actor(page, offset, buf, len, nr);
293 ret = ra; 291 ret = ra;
292 page_cache_release(page);
294 } 293 }
295 if (ra < 0) { 294 if (ra < 0) {
296 if (retval == 0) 295 if (retval == 0)
297 retval = ra; 296 retval = ra;
298 if (page)
299 page_cache_release(page);
300 goto out; 297 goto out;
301 } 298 }
302 299
@@ -306,16 +303,12 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
306 index += offset >> huge_page_shift(h); 303 index += offset >> huge_page_shift(h);
307 offset &= ~huge_page_mask(h); 304 offset &= ~huge_page_mask(h);
308 305
309 if (page)
310 page_cache_release(page);
311
312 /* short read or no more work */ 306 /* short read or no more work */
313 if ((ret != nr) || (len == 0)) 307 if ((ret != nr) || (len == 0))
314 break; 308 break;
315 } 309 }
316out: 310out:
317 *ppos = ((loff_t)index << huge_page_shift(h)) + offset; 311 *ppos = ((loff_t)index << huge_page_shift(h)) + offset;
318 mutex_unlock(&inode->i_mutex);
319 return retval; 312 return retval;
320} 313}
321 314