summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fb.com>2017-07-24 15:14:25 -0400
committerDavid Sterba <dsterba@suse.com>2017-08-16 10:12:05 -0400
commit23b5ec74943f44378b68c0edd8e210a86318ea5e (patch)
tree07fc0067812f384350e17660072d864a0a3eec87 /fs/btrfs/inode.c
parent8d8aafeea23e2d641460d7e6231361f0322ac058 (diff)
btrfs: fix readdir deadlock with pagefault
Readdir does dir_emit while under the btree lock. dir_emit can trigger the page fault which means we can deadlock. Fix this by allocating a buffer on opening a directory and copying the readdir into this buffer and doing dir_emit from outside of the tree lock. Thread A readdir <holding tree lock> dir_emit <page fault> down_read(mmap_sem) Thread B mmap write down_write(mmap_sem) page_mkwrite wait_ordered_extents Process C finish_ordered_extent insert_reserved_file_extent try to lock leaf <hang> Signed-off-by: Josef Bacik <jbacik@fb.com> Reviewed-by: David Sterba <dsterba@suse.com> [ copy the deadlock scenario to changelog ] Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c108
1 files changed, 82 insertions, 26 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a17a61e2ff9d..fa4b2563dfd7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5876,25 +5876,74 @@ unsigned char btrfs_filetype_table[] = {
5876 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 5876 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
5877}; 5877};
5878 5878
5879/*
5880 * All this infrastructure exists because dir_emit can fault, and we are holding
5881 * the tree lock when doing readdir. For now just allocate a buffer and copy
5882 * our information into that, and then dir_emit from the buffer. This is
5883 * similar to what NFS does, only we don't keep the buffer around in pagecache
5884 * because I'm afraid I'll mess that up. Long term we need to make filldir do
5885 * copy_to_user_inatomic so we don't have to worry about page faulting under the
5886 * tree lock.
5887 */
5888static int btrfs_opendir(struct inode *inode, struct file *file)
5889{
5890 struct btrfs_file_private *private;
5891
5892 private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
5893 if (!private)
5894 return -ENOMEM;
5895 private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
5896 if (!private->filldir_buf) {
5897 kfree(private);
5898 return -ENOMEM;
5899 }
5900 file->private_data = private;
5901 return 0;
5902}
5903
5904struct dir_entry {
5905 u64 ino;
5906 u64 offset;
5907 unsigned type;
5908 int name_len;
5909};
5910
5911static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx)
5912{
5913 while (entries--) {
5914 struct dir_entry *entry = addr;
5915 char *name = (char *)(entry + 1);
5916
5917 ctx->pos = entry->offset;
5918 if (!dir_emit(ctx, name, entry->name_len, entry->ino,
5919 entry->type))
5920 return 1;
5921 addr += sizeof(struct dir_entry) + entry->name_len;
5922 ctx->pos++;
5923 }
5924 return 0;
5925}
5926
5879static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) 5927static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
5880{ 5928{
5881 struct inode *inode = file_inode(file); 5929 struct inode *inode = file_inode(file);
5882 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5930 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5883 struct btrfs_root *root = BTRFS_I(inode)->root; 5931 struct btrfs_root *root = BTRFS_I(inode)->root;
5932 struct btrfs_file_private *private = file->private_data;
5884 struct btrfs_dir_item *di; 5933 struct btrfs_dir_item *di;
5885 struct btrfs_key key; 5934 struct btrfs_key key;
5886 struct btrfs_key found_key; 5935 struct btrfs_key found_key;
5887 struct btrfs_path *path; 5936 struct btrfs_path *path;
5937 void *addr;
5888 struct list_head ins_list; 5938 struct list_head ins_list;
5889 struct list_head del_list; 5939 struct list_head del_list;
5890 int ret; 5940 int ret;
5891 struct extent_buffer *leaf; 5941 struct extent_buffer *leaf;
5892 int slot; 5942 int slot;
5893 unsigned char d_type;
5894 int over = 0;
5895 char tmp_name[32];
5896 char *name_ptr; 5943 char *name_ptr;
5897 int name_len; 5944 int name_len;
5945 int entries = 0;
5946 int total_len = 0;
5898 bool put = false; 5947 bool put = false;
5899 struct btrfs_key location; 5948 struct btrfs_key location;
5900 5949
@@ -5905,12 +5954,14 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
5905 if (!path) 5954 if (!path)
5906 return -ENOMEM; 5955 return -ENOMEM;
5907 5956
5957 addr = private->filldir_buf;
5908 path->reada = READA_FORWARD; 5958 path->reada = READA_FORWARD;
5909 5959
5910 INIT_LIST_HEAD(&ins_list); 5960 INIT_LIST_HEAD(&ins_list);
5911 INIT_LIST_HEAD(&del_list); 5961 INIT_LIST_HEAD(&del_list);
5912 put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list); 5962 put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
5913 5963
5964again:
5914 key.type = BTRFS_DIR_INDEX_KEY; 5965 key.type = BTRFS_DIR_INDEX_KEY;
5915 key.offset = ctx->pos; 5966 key.offset = ctx->pos;
5916 key.objectid = btrfs_ino(BTRFS_I(inode)); 5967 key.objectid = btrfs_ino(BTRFS_I(inode));
@@ -5920,6 +5971,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
5920 goto err; 5971 goto err;
5921 5972
5922 while (1) { 5973 while (1) {
5974 struct dir_entry *entry;
5975
5923 leaf = path->nodes[0]; 5976 leaf = path->nodes[0];
5924 slot = path->slots[0]; 5977 slot = path->slots[0];
5925 if (slot >= btrfs_header_nritems(leaf)) { 5978 if (slot >= btrfs_header_nritems(leaf)) {
@@ -5941,41 +5994,43 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
5941 goto next; 5994 goto next;
5942 if (btrfs_should_delete_dir_index(&del_list, found_key.offset)) 5995 if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
5943 goto next; 5996 goto next;
5944
5945 ctx->pos = found_key.offset;
5946
5947 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); 5997 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
5948 if (verify_dir_item(fs_info, leaf, slot, di)) 5998 if (verify_dir_item(fs_info, leaf, slot, di))
5949 goto next; 5999 goto next;
5950 6000
5951 name_len = btrfs_dir_name_len(leaf, di); 6001 name_len = btrfs_dir_name_len(leaf, di);
5952 if (name_len <= sizeof(tmp_name)) { 6002 if ((total_len + sizeof(struct dir_entry) + name_len) >=
5953 name_ptr = tmp_name; 6003 PAGE_SIZE) {
5954 } else { 6004 btrfs_release_path(path);
5955 name_ptr = kmalloc(name_len, GFP_KERNEL); 6005 ret = btrfs_filldir(private->filldir_buf, entries, ctx);
5956 if (!name_ptr) { 6006 if (ret)
5957 ret = -ENOMEM; 6007 goto nopos;
5958 goto err; 6008 addr = private->filldir_buf;
5959 } 6009 entries = 0;
6010 total_len = 0;
6011 goto again;
5960 } 6012 }
6013
6014 entry = addr;
6015 entry->name_len = name_len;
6016 name_ptr = (char *)(entry + 1);
5961 read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1), 6017 read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
5962 name_len); 6018 name_len);
5963 6019 entry->type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
5964 d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
5965 btrfs_dir_item_key_to_cpu(leaf, di, &location); 6020 btrfs_dir_item_key_to_cpu(leaf, di, &location);
5966 6021 entry->ino = location.objectid;
5967 over = !dir_emit(ctx, name_ptr, name_len, location.objectid, 6022 entry->offset = found_key.offset;
5968 d_type); 6023 entries++;
5969 6024 addr += sizeof(struct dir_entry) + name_len;
5970 if (name_ptr != tmp_name) 6025 total_len += sizeof(struct dir_entry) + name_len;
5971 kfree(name_ptr);
5972
5973 if (over)
5974 goto nopos;
5975 ctx->pos++;
5976next: 6026next:
5977 path->slots[0]++; 6027 path->slots[0]++;
5978 } 6028 }
6029 btrfs_release_path(path);
6030
6031 ret = btrfs_filldir(private->filldir_buf, entries, ctx);
6032 if (ret)
6033 goto nopos;
5979 6034
5980 ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list); 6035 ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
5981 if (ret) 6036 if (ret)
@@ -10779,6 +10834,7 @@ static const struct file_operations btrfs_dir_file_operations = {
10779 .llseek = generic_file_llseek, 10834 .llseek = generic_file_llseek,
10780 .read = generic_read_dir, 10835 .read = generic_read_dir,
10781 .iterate_shared = btrfs_real_readdir, 10836 .iterate_shared = btrfs_real_readdir,
10837 .open = btrfs_opendir,
10782 .unlocked_ioctl = btrfs_ioctl, 10838 .unlocked_ioctl = btrfs_ioctl,
10783#ifdef CONFIG_COMPAT 10839#ifdef CONFIG_COMPAT
10784 .compat_ioctl = btrfs_compat_ioctl, 10840 .compat_ioctl = btrfs_compat_ioctl,