diff options
author | Josef Bacik <jbacik@fb.com> | 2017-07-24 15:14:25 -0400 |
---|---|---|
committer | David Sterba <dsterba@suse.com> | 2017-08-16 10:12:05 -0400 |
commit | 23b5ec74943f44378b68c0edd8e210a86318ea5e (patch) | |
tree | 07fc0067812f384350e17660072d864a0a3eec87 /fs/btrfs/inode.c | |
parent | 8d8aafeea23e2d641460d7e6231361f0322ac058 (diff) |
btrfs: fix readdir deadlock with pagefault
Readdir does dir_emit while under the btree lock. dir_emit can trigger
the page fault which means we can deadlock. Fix this by allocating a
buffer on opening a directory and copying the readdir into this buffer
and doing dir_emit from outside of the tree lock.
Thread A
readdir <holding tree lock>
dir_emit
<page fault>
down_read(mmap_sem)
Thread B
mmap write
down_write(mmap_sem)
page_mkwrite
wait_ordered_extents
Process C
finish_ordered_extent
insert_reserved_file_extent
try to lock leaf <hang>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ copy the deadlock scenario to changelog ]
Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 108 |
1 files changed, 82 insertions, 26 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a17a61e2ff9d..fa4b2563dfd7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -5876,25 +5876,74 @@ unsigned char btrfs_filetype_table[] = { | |||
5876 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK | 5876 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK |
5877 | }; | 5877 | }; |
5878 | 5878 | ||
5879 | /* | ||
5880 | * All this infrastructure exists because dir_emit can fault, and we are holding | ||
5881 | * the tree lock when doing readdir. For now just allocate a buffer and copy | ||
5882 | * our information into that, and then dir_emit from the buffer. This is | ||
5883 | * similar to what NFS does, only we don't keep the buffer around in pagecache | ||
5884 | * because I'm afraid I'll mess that up. Long term we need to make filldir do | ||
5885 | * copy_to_user_inatomic so we don't have to worry about page faulting under the | ||
5886 | * tree lock. | ||
5887 | */ | ||
5888 | static int btrfs_opendir(struct inode *inode, struct file *file) | ||
5889 | { | ||
5890 | struct btrfs_file_private *private; | ||
5891 | |||
5892 | private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL); | ||
5893 | if (!private) | ||
5894 | return -ENOMEM; | ||
5895 | private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL); | ||
5896 | if (!private->filldir_buf) { | ||
5897 | kfree(private); | ||
5898 | return -ENOMEM; | ||
5899 | } | ||
5900 | file->private_data = private; | ||
5901 | return 0; | ||
5902 | } | ||
5903 | |||
5904 | struct dir_entry { | ||
5905 | u64 ino; | ||
5906 | u64 offset; | ||
5907 | unsigned type; | ||
5908 | int name_len; | ||
5909 | }; | ||
5910 | |||
5911 | static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx) | ||
5912 | { | ||
5913 | while (entries--) { | ||
5914 | struct dir_entry *entry = addr; | ||
5915 | char *name = (char *)(entry + 1); | ||
5916 | |||
5917 | ctx->pos = entry->offset; | ||
5918 | if (!dir_emit(ctx, name, entry->name_len, entry->ino, | ||
5919 | entry->type)) | ||
5920 | return 1; | ||
5921 | addr += sizeof(struct dir_entry) + entry->name_len; | ||
5922 | ctx->pos++; | ||
5923 | } | ||
5924 | return 0; | ||
5925 | } | ||
5926 | |||
5879 | static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) | 5927 | static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) |
5880 | { | 5928 | { |
5881 | struct inode *inode = file_inode(file); | 5929 | struct inode *inode = file_inode(file); |
5882 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); | 5930 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
5883 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5931 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5932 | struct btrfs_file_private *private = file->private_data; | ||
5884 | struct btrfs_dir_item *di; | 5933 | struct btrfs_dir_item *di; |
5885 | struct btrfs_key key; | 5934 | struct btrfs_key key; |
5886 | struct btrfs_key found_key; | 5935 | struct btrfs_key found_key; |
5887 | struct btrfs_path *path; | 5936 | struct btrfs_path *path; |
5937 | void *addr; | ||
5888 | struct list_head ins_list; | 5938 | struct list_head ins_list; |
5889 | struct list_head del_list; | 5939 | struct list_head del_list; |
5890 | int ret; | 5940 | int ret; |
5891 | struct extent_buffer *leaf; | 5941 | struct extent_buffer *leaf; |
5892 | int slot; | 5942 | int slot; |
5893 | unsigned char d_type; | ||
5894 | int over = 0; | ||
5895 | char tmp_name[32]; | ||
5896 | char *name_ptr; | 5943 | char *name_ptr; |
5897 | int name_len; | 5944 | int name_len; |
5945 | int entries = 0; | ||
5946 | int total_len = 0; | ||
5898 | bool put = false; | 5947 | bool put = false; |
5899 | struct btrfs_key location; | 5948 | struct btrfs_key location; |
5900 | 5949 | ||
@@ -5905,12 +5954,14 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) | |||
5905 | if (!path) | 5954 | if (!path) |
5906 | return -ENOMEM; | 5955 | return -ENOMEM; |
5907 | 5956 | ||
5957 | addr = private->filldir_buf; | ||
5908 | path->reada = READA_FORWARD; | 5958 | path->reada = READA_FORWARD; |
5909 | 5959 | ||
5910 | INIT_LIST_HEAD(&ins_list); | 5960 | INIT_LIST_HEAD(&ins_list); |
5911 | INIT_LIST_HEAD(&del_list); | 5961 | INIT_LIST_HEAD(&del_list); |
5912 | put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list); | 5962 | put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list); |
5913 | 5963 | ||
5964 | again: | ||
5914 | key.type = BTRFS_DIR_INDEX_KEY; | 5965 | key.type = BTRFS_DIR_INDEX_KEY; |
5915 | key.offset = ctx->pos; | 5966 | key.offset = ctx->pos; |
5916 | key.objectid = btrfs_ino(BTRFS_I(inode)); | 5967 | key.objectid = btrfs_ino(BTRFS_I(inode)); |
@@ -5920,6 +5971,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) | |||
5920 | goto err; | 5971 | goto err; |
5921 | 5972 | ||
5922 | while (1) { | 5973 | while (1) { |
5974 | struct dir_entry *entry; | ||
5975 | |||
5923 | leaf = path->nodes[0]; | 5976 | leaf = path->nodes[0]; |
5924 | slot = path->slots[0]; | 5977 | slot = path->slots[0]; |
5925 | if (slot >= btrfs_header_nritems(leaf)) { | 5978 | if (slot >= btrfs_header_nritems(leaf)) { |
@@ -5941,41 +5994,43 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) | |||
5941 | goto next; | 5994 | goto next; |
5942 | if (btrfs_should_delete_dir_index(&del_list, found_key.offset)) | 5995 | if (btrfs_should_delete_dir_index(&del_list, found_key.offset)) |
5943 | goto next; | 5996 | goto next; |
5944 | |||
5945 | ctx->pos = found_key.offset; | ||
5946 | |||
5947 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); | 5997 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); |
5948 | if (verify_dir_item(fs_info, leaf, slot, di)) | 5998 | if (verify_dir_item(fs_info, leaf, slot, di)) |
5949 | goto next; | 5999 | goto next; |
5950 | 6000 | ||
5951 | name_len = btrfs_dir_name_len(leaf, di); | 6001 | name_len = btrfs_dir_name_len(leaf, di); |
5952 | if (name_len <= sizeof(tmp_name)) { | 6002 | if ((total_len + sizeof(struct dir_entry) + name_len) >= |
5953 | name_ptr = tmp_name; | 6003 | PAGE_SIZE) { |
5954 | } else { | 6004 | btrfs_release_path(path); |
5955 | name_ptr = kmalloc(name_len, GFP_KERNEL); | 6005 | ret = btrfs_filldir(private->filldir_buf, entries, ctx); |
5956 | if (!name_ptr) { | 6006 | if (ret) |
5957 | ret = -ENOMEM; | 6007 | goto nopos; |
5958 | goto err; | 6008 | addr = private->filldir_buf; |
5959 | } | 6009 | entries = 0; |
6010 | total_len = 0; | ||
6011 | goto again; | ||
5960 | } | 6012 | } |
6013 | |||
6014 | entry = addr; | ||
6015 | entry->name_len = name_len; | ||
6016 | name_ptr = (char *)(entry + 1); | ||
5961 | read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1), | 6017 | read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1), |
5962 | name_len); | 6018 | name_len); |
5963 | 6019 | entry->type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; | |
5964 | d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; | ||
5965 | btrfs_dir_item_key_to_cpu(leaf, di, &location); | 6020 | btrfs_dir_item_key_to_cpu(leaf, di, &location); |
5966 | 6021 | entry->ino = location.objectid; | |
5967 | over = !dir_emit(ctx, name_ptr, name_len, location.objectid, | 6022 | entry->offset = found_key.offset; |
5968 | d_type); | 6023 | entries++; |
5969 | 6024 | addr += sizeof(struct dir_entry) + name_len; | |
5970 | if (name_ptr != tmp_name) | 6025 | total_len += sizeof(struct dir_entry) + name_len; |
5971 | kfree(name_ptr); | ||
5972 | |||
5973 | if (over) | ||
5974 | goto nopos; | ||
5975 | ctx->pos++; | ||
5976 | next: | 6026 | next: |
5977 | path->slots[0]++; | 6027 | path->slots[0]++; |
5978 | } | 6028 | } |
6029 | btrfs_release_path(path); | ||
6030 | |||
6031 | ret = btrfs_filldir(private->filldir_buf, entries, ctx); | ||
6032 | if (ret) | ||
6033 | goto nopos; | ||
5979 | 6034 | ||
5980 | ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list); | 6035 | ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list); |
5981 | if (ret) | 6036 | if (ret) |
@@ -10779,6 +10834,7 @@ static const struct file_operations btrfs_dir_file_operations = { | |||
10779 | .llseek = generic_file_llseek, | 10834 | .llseek = generic_file_llseek, |
10780 | .read = generic_read_dir, | 10835 | .read = generic_read_dir, |
10781 | .iterate_shared = btrfs_real_readdir, | 10836 | .iterate_shared = btrfs_real_readdir, |
10837 | .open = btrfs_opendir, | ||
10782 | .unlocked_ioctl = btrfs_ioctl, | 10838 | .unlocked_ioctl = btrfs_ioctl, |
10783 | #ifdef CONFIG_COMPAT | 10839 | #ifdef CONFIG_COMPAT |
10784 | .compat_ioctl = btrfs_compat_ioctl, | 10840 | .compat_ioctl = btrfs_compat_ioctl, |