aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2007-04-19 13:37:44 -0400
committerDavid Woodhouse <dwmw2@hera.kernel.org>2007-04-19 13:37:44 -0400
commit236454dfffb64a95ee01c50a215153f5de61c475 (patch)
tree605a8256dc83916c4daac5e709117cdd2fab3d64
parenta429e51371eee3c989160c003ee40bc3947c6a76 (diff)
Btrfs: many file_write fixes, inline data
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/ctree.h35
-rw-r--r--fs/btrfs/disk-io.c6
-rw-r--r--fs/btrfs/extent-tree.c5
-rw-r--r--fs/btrfs/file-item.c1
-rw-r--r--fs/btrfs/super.c553
5 files changed, 484 insertions, 116 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8a329d3901a2..f1800008be34 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -73,6 +73,9 @@ struct btrfs_header {
73 (sizeof(struct btrfs_disk_key) + sizeof(u64))) 73 (sizeof(struct btrfs_disk_key) + sizeof(u64)))
74#define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) 74#define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header))
75#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->blocksize)) 75#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->blocksize))
76#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
77 sizeof(struct btrfs_item) - \
78 sizeof(struct btrfs_file_extent_item))
76 79
77struct buffer_head; 80struct buffer_head;
78/* 81/*
@@ -204,8 +207,12 @@ struct btrfs_root_item {
204 __le32 refs; 207 __le32 refs;
205} __attribute__ ((__packed__)); 208} __attribute__ ((__packed__));
206 209
210#define BTRFS_FILE_EXTENT_REG 0
211#define BTRFS_FILE_EXTENT_INLINE 1
212
207struct btrfs_file_extent_item { 213struct btrfs_file_extent_item {
208 __le64 generation; 214 __le64 generation;
215 u8 type;
209 /* 216 /*
210 * disk space consumed by the extent, checksum blocks are included 217 * disk space consumed by the extent, checksum blocks are included
211 * in these numbers 218 * in these numbers
@@ -862,6 +869,34 @@ static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l)
862 return (u8 *)l->items; 869 return (u8 *)l->items;
863} 870}
864 871
872static inline int btrfs_file_extent_type(struct btrfs_file_extent_item *e)
873{
874 return e->type;
875}
876static inline void btrfs_set_file_extent_type(struct btrfs_file_extent_item *e,
877 u8 val)
878{
879 e->type = val;
880}
881
882static inline char *btrfs_file_extent_inline_start(struct
883 btrfs_file_extent_item *e)
884{
885 return (char *)(&e->disk_blocknr);
886}
887
888static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize)
889{
890 return (unsigned long)(&((struct
891 btrfs_file_extent_item *)NULL)->disk_blocknr) + datasize;
892}
893
894static inline u32 btrfs_file_extent_inline_len(struct btrfs_item *e)
895{
896 struct btrfs_file_extent_item *fe = NULL;
897 return btrfs_item_size(e) - (unsigned long)(&fe->disk_blocknr);
898}
899
865static inline u64 btrfs_file_extent_disk_blocknr(struct btrfs_file_extent_item 900static inline u64 btrfs_file_extent_disk_blocknr(struct btrfs_file_extent_item
866 *e) 901 *e)
867{ 902{
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a2a3f529cada..11e17a2f736b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -108,6 +108,12 @@ int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh,
108 108
109 int ret; 109 int ret;
110 110
111 if (logical == 0) {
112 bh->b_bdev = NULL;
113 bh->b_blocknr = 0;
114 set_buffer_mapped(bh);
115 return 0;
116 }
111 root = root->fs_info->dev_root; 117 root = root->fs_info->dev_root;
112 ret = radix_tree_gang_lookup(&root->fs_info->dev_radix, 118 ret = radix_tree_gang_lookup(&root->fs_info->dev_radix,
113 (void **)lookup, 119 (void **)lookup,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index cb04a70eb7e8..b2faad3e8791 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -111,6 +111,9 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
111 continue; 111 continue;
112 fi = btrfs_item_ptr(buf_leaf, i, 112 fi = btrfs_item_ptr(buf_leaf, i,
113 struct btrfs_file_extent_item); 113 struct btrfs_file_extent_item);
114 if (btrfs_file_extent_type(fi) ==
115 BTRFS_FILE_EXTENT_INLINE)
116 continue;
114 ret = btrfs_inc_extent_ref(trans, root, 117 ret = btrfs_inc_extent_ref(trans, root,
115 btrfs_file_extent_disk_blocknr(fi), 118 btrfs_file_extent_disk_blocknr(fi),
116 btrfs_file_extent_disk_num_blocks(fi)); 119 btrfs_file_extent_disk_num_blocks(fi));
@@ -539,6 +542,8 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans,
539 if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) 542 if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
540 continue; 543 continue;
541 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 544 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
545 if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE)
546 continue;
542 /* 547 /*
543 * FIXME make sure to insert a trans record that 548 * FIXME make sure to insert a trans record that
544 * repeats the snapshot del on crash 549 * repeats the snapshot del on crash
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 8cc3c1d15412..10e4cf08e9ed 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -34,6 +34,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
34 btrfs_set_file_extent_offset(item, 0); 34 btrfs_set_file_extent_offset(item, 0);
35 btrfs_set_file_extent_num_blocks(item, num_blocks); 35 btrfs_set_file_extent_num_blocks(item, num_blocks);
36 btrfs_set_file_extent_generation(item, trans->transid); 36 btrfs_set_file_extent_generation(item, trans->transid);
37 btrfs_set_file_extent_type(item, BTRFS_FILE_EXTENT_REG);
37 btrfs_mark_buffer_dirty(path->nodes[0]); 38 btrfs_mark_buffer_dirty(path->nodes[0]);
38 39
39 btrfs_release_path(root, path); 40 btrfs_release_path(root, path);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 583cd87a62e2..1b286bb26f12 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -317,19 +317,22 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
317 break; 317 break;
318 if (btrfs_disk_key_offset(found_key) < inode->i_size) 318 if (btrfs_disk_key_offset(found_key) < inode->i_size)
319 break; 319 break;
320 found_extent = 0;
320 if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) { 321 if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) {
321 fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), 322 fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
322 path->slots[0], 323 path->slots[0],
323 struct btrfs_file_extent_item); 324 struct btrfs_file_extent_item);
324 extent_start = btrfs_file_extent_disk_blocknr(fi); 325 if (btrfs_file_extent_type(fi) !=
325 extent_num_blocks = 326 BTRFS_FILE_EXTENT_INLINE) {
326 btrfs_file_extent_disk_num_blocks(fi); 327 extent_start =
327 /* FIXME blocksize != 4096 */ 328 btrfs_file_extent_disk_blocknr(fi);
328 inode->i_blocks -= 329 extent_num_blocks =
329 btrfs_file_extent_num_blocks(fi) << 3; 330 btrfs_file_extent_disk_num_blocks(fi);
330 found_extent = 1; 331 /* FIXME blocksize != 4096 */
331 } else { 332 inode->i_blocks -=
332 found_extent = 0; 333 btrfs_file_extent_num_blocks(fi) << 3;
334 found_extent = 1;
335 }
333 } 336 }
334 ret = btrfs_del_item(trans, root, path); 337 ret = btrfs_del_item(trans, root, path);
335 BUG_ON(ret); 338 BUG_ON(ret);
@@ -1010,9 +1013,9 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1010 u64 extent_start = 0; 1013 u64 extent_start = 0;
1011 u64 extent_end = 0; 1014 u64 extent_end = 0;
1012 u64 objectid = inode->i_ino; 1015 u64 objectid = inode->i_ino;
1016 u32 found_type;
1013 struct btrfs_path *path; 1017 struct btrfs_path *path;
1014 struct btrfs_root *root = BTRFS_I(inode)->root; 1018 struct btrfs_root *root = BTRFS_I(inode)->root;
1015 struct btrfs_trans_handle *trans = NULL;
1016 struct btrfs_file_extent_item *item; 1019 struct btrfs_file_extent_item *item;
1017 struct btrfs_leaf *leaf; 1020 struct btrfs_leaf *leaf;
1018 struct btrfs_disk_key *found_key; 1021 struct btrfs_disk_key *found_key;
@@ -1021,13 +1024,12 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1021 BUG_ON(!path); 1024 BUG_ON(!path);
1022 btrfs_init_path(path); 1025 btrfs_init_path(path);
1023 if (create) { 1026 if (create) {
1024 trans = btrfs_start_transaction(root, 1);
1025 WARN_ON(1); 1027 WARN_ON(1);
1026 } 1028 }
1027 1029
1028 ret = btrfs_lookup_file_extent(trans, root, path, 1030 ret = btrfs_lookup_file_extent(NULL, root, path,
1029 inode->i_ino, 1031 inode->i_ino,
1030 iblock << inode->i_blkbits, create); 1032 iblock << inode->i_blkbits, 0);
1031 if (ret < 0) { 1033 if (ret < 0) {
1032 err = ret; 1034 err = ret;
1033 goto out; 1035 goto out;
@@ -1036,7 +1038,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1036 if (ret != 0) { 1038 if (ret != 0) {
1037 if (path->slots[0] == 0) { 1039 if (path->slots[0] == 0) {
1038 btrfs_release_path(root, path); 1040 btrfs_release_path(root, path);
1039 goto allocate; 1041 goto out;
1040 } 1042 }
1041 path->slots[0]--; 1043 path->slots[0]--;
1042 } 1044 }
@@ -1047,73 +1049,51 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1047 blocknr = btrfs_file_extent_disk_blocknr(item); 1049 blocknr = btrfs_file_extent_disk_blocknr(item);
1048 blocknr += btrfs_file_extent_offset(item); 1050 blocknr += btrfs_file_extent_offset(item);
1049 1051
1050 /* exact match found, use it, FIXME, deal with extents
1051 * other than the page size
1052 */
1053 if (0 && ret == 0) {
1054 err = 0;
1055 if (create &&
1056 btrfs_file_extent_generation(item) != trans->transid) {
1057 struct btrfs_key ins;
1058 ret = btrfs_alloc_extent(trans, root, 1,
1059 blocknr, (u64)-1, &ins);
1060 BUG_ON(ret);
1061 btrfs_set_file_extent_disk_blocknr(item, ins.objectid);
1062 mark_buffer_dirty(path->nodes[0]);
1063 ret = btrfs_free_extent(trans, root,
1064 blocknr, 1, 0);
1065 BUG_ON(ret);
1066 blocknr = ins.objectid;
1067
1068 }
1069 btrfs_map_bh_to_logical(root, result, blocknr);
1070 goto out;
1071 }
1072
1073 /* are we inside the extent that was found? */ 1052 /* are we inside the extent that was found? */
1074 found_key = &leaf->items[path->slots[0]].key; 1053 found_key = &leaf->items[path->slots[0]].key;
1054 found_type = btrfs_disk_key_type(found_key);
1075 if (btrfs_disk_key_objectid(found_key) != objectid || 1055 if (btrfs_disk_key_objectid(found_key) != objectid ||
1076 btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) { 1056 found_type != BTRFS_EXTENT_DATA_KEY) {
1077 extent_end = 0; 1057 extent_end = 0;
1078 extent_start = 0; 1058 extent_start = 0;
1079 btrfs_release_path(root, path); 1059 btrfs_release_path(root, path);
1080 goto allocate;
1081 }
1082
1083 extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1084 extent_start = extent_start >> inode->i_blkbits;
1085 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1086 if (iblock >= extent_start && iblock < extent_end) {
1087 err = 0;
1088 btrfs_map_bh_to_logical(root, result, blocknr + iblock -
1089 extent_start);
1090 goto out;
1091 }
1092allocate:
1093 /* ok, create a new extent */
1094 if (!create) {
1095 err = 0;
1096 goto out; 1060 goto out;
1097 } 1061 }
1098#if 0 1062 found_type = btrfs_file_extent_type(item);
1099 ret = btrfs_alloc_file_extent(trans, root, objectid, 1063 extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1100 iblock << inode->i_blkbits, 1064 if (found_type == BTRFS_FILE_EXTENT_REG) {
1101 1, extent_end, &blocknr); 1065 extent_start = extent_start >> inode->i_blkbits;
1102 if (ret) { 1066 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1103 err = ret; 1067 if (iblock >= extent_start && iblock < extent_end) {
1104 goto out; 1068 err = 0;
1069 btrfs_map_bh_to_logical(root, result, blocknr +
1070 iblock - extent_start);
1071 goto out;
1072 }
1073 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1074 char *ptr;
1075 char *map;
1076 u32 size;
1077 size = btrfs_file_extent_inline_len(leaf->items +
1078 path->slots[0]);
1079 extent_end = (extent_start + size) >> inode->i_blkbits;
1080 extent_start >>= inode->i_blkbits;
1081 if (iblock < extent_start || iblock > extent_end) {
1082 goto out;
1083 }
1084 ptr = btrfs_file_extent_inline_start(item);
1085 map = kmap(result->b_page);
1086 memcpy(map, ptr, size);
1087 memset(map + size, 0, PAGE_CACHE_SIZE - size);
1088 flush_dcache_page(result->b_page);
1089 kunmap(result->b_page);
1090 set_buffer_uptodate(result);
1091 SetPageChecked(result->b_page);
1092 btrfs_map_bh_to_logical(root, result, 0);
1105 } 1093 }
1106 inode->i_blocks += inode->i_sb->s_blocksize >> 9;
1107 set_buffer_new(result);
1108 map_bh(result, inode->i_sb, blocknr);
1109
1110 btrfs_map_bh_to_logical(root, result, blocknr);
1111#endif
1112out: 1094out:
1113 btrfs_release_path(root, path); 1095 btrfs_release_path(root, path);
1114 btrfs_free_path(path); 1096 btrfs_free_path(path);
1115 if (trans)
1116 btrfs_end_transaction(trans, root);
1117 return err; 1097 return err;
1118} 1098}
1119 1099
@@ -1124,7 +1104,6 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock,
1124 struct btrfs_root *root = BTRFS_I(inode)->root; 1104 struct btrfs_root *root = BTRFS_I(inode)->root;
1125 mutex_lock(&root->fs_info->fs_mutex); 1105 mutex_lock(&root->fs_info->fs_mutex);
1126 err = btrfs_get_block_lock(inode, iblock, result, create); 1106 err = btrfs_get_block_lock(inode, iblock, result, create);
1127 // err = btrfs_get_block_inline(inode, iblock, result, create);
1128 mutex_unlock(&root->fs_info->fs_mutex); 1107 mutex_unlock(&root->fs_info->fs_mutex);
1129 return err; 1108 return err;
1130} 1109}
@@ -1134,11 +1113,6 @@ static int btrfs_prepare_write(struct file *file, struct page *page,
1134{ 1113{
1135 return nobh_prepare_write(page, from, to, btrfs_get_block); 1114 return nobh_prepare_write(page, from, to, btrfs_get_block);
1136} 1115}
1137static int btrfs_commit_write(struct file *file, struct page *page,
1138 unsigned from, unsigned to)
1139{
1140 return nobh_commit_write(file, page, from, to);
1141}
1142 1116
1143static void btrfs_write_super(struct super_block *sb) 1117static void btrfs_write_super(struct super_block *sb)
1144{ 1118{
@@ -1150,9 +1124,227 @@ static int btrfs_readpage(struct file *file, struct page *page)
1150 return mpage_readpage(page, btrfs_get_block); 1124 return mpage_readpage(page, btrfs_get_block);
1151} 1125}
1152 1126
1127/*
1128 * While block_write_full_page is writing back the dirty buffers under
1129 * the page lock, whoever dirtied the buffers may decide to clean them
1130 * again at any time. We handle that by only looking at the buffer
1131 * state inside lock_buffer().
1132 *
1133 * If block_write_full_page() is called for regular writeback
1134 * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
1135 * locked buffer. This only can happen if someone has written the buffer
1136 * directly, with submit_bh(). At the address_space level PageWriteback
1137 * prevents this contention from occurring.
1138 */
1139static int __btrfs_write_full_page(struct inode *inode, struct page *page,
1140 struct writeback_control *wbc)
1141{
1142 int err;
1143 sector_t block;
1144 sector_t last_block;
1145 struct buffer_head *bh, *head;
1146 const unsigned blocksize = 1 << inode->i_blkbits;
1147 int nr_underway = 0;
1148
1149 BUG_ON(!PageLocked(page));
1150
1151 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1152
1153 if (!page_has_buffers(page)) {
1154 create_empty_buffers(page, blocksize,
1155 (1 << BH_Dirty)|(1 << BH_Uptodate));
1156 }
1157
1158 /*
1159 * Be very careful. We have no exclusion from __set_page_dirty_buffers
1160 * here, and the (potentially unmapped) buffers may become dirty at
1161 * any time. If a buffer becomes dirty here after we've inspected it
1162 * then we just miss that fact, and the page stays dirty.
1163 *
1164 * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
1165 * handle that here by just cleaning them.
1166 */
1167
1168 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1169 head = page_buffers(page);
1170 bh = head;
1171
1172 /*
1173 * Get all the dirty buffers mapped to disk addresses and
1174 * handle any aliases from the underlying blockdev's mapping.
1175 */
1176 do {
1177 if (block > last_block) {
1178 /*
1179 * mapped buffers outside i_size will occur, because
1180 * this page can be outside i_size when there is a
1181 * truncate in progress.
1182 */
1183 /*
1184 * The buffer was zeroed by block_write_full_page()
1185 */
1186 clear_buffer_dirty(bh);
1187 set_buffer_uptodate(bh);
1188 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1189 WARN_ON(bh->b_size != blocksize);
1190 err = btrfs_get_block(inode, block, bh, 0);
1191 if (err)
1192 goto recover;
1193 if (buffer_new(bh)) {
1194 /* blockdev mappings never come here */
1195 clear_buffer_new(bh);
1196 unmap_underlying_metadata(bh->b_bdev,
1197 bh->b_blocknr);
1198 }
1199 }
1200 bh = bh->b_this_page;
1201 block++;
1202 } while (bh != head);
1203
1204 do {
1205 if (!buffer_mapped(bh))
1206 continue;
1207 /*
1208 * If it's a fully non-blocking write attempt and we cannot
1209 * lock the buffer then redirty the page. Note that this can
1210 * potentially cause a busy-wait loop from pdflush and kswapd
1211 * activity, but those code paths have their own higher-level
1212 * throttling.
1213 */
1214 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1215 lock_buffer(bh);
1216 } else if (test_set_buffer_locked(bh)) {
1217 redirty_page_for_writepage(wbc, page);
1218 continue;
1219 }
1220 if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
1221 mark_buffer_async_write(bh);
1222 } else {
1223 unlock_buffer(bh);
1224 }
1225 } while ((bh = bh->b_this_page) != head);
1226
1227 /*
1228 * The page and its buffers are protected by PageWriteback(), so we can
1229 * drop the bh refcounts early.
1230 */
1231 BUG_ON(PageWriteback(page));
1232 set_page_writeback(page);
1233
1234 do {
1235 struct buffer_head *next = bh->b_this_page;
1236 if (buffer_async_write(bh)) {
1237 submit_bh(WRITE, bh);
1238 nr_underway++;
1239 }
1240 bh = next;
1241 } while (bh != head);
1242 unlock_page(page);
1243
1244 err = 0;
1245done:
1246 if (nr_underway == 0) {
1247 /*
1248 * The page was marked dirty, but the buffers were
1249 * clean. Someone wrote them back by hand with
1250 * ll_rw_block/submit_bh. A rare case.
1251 */
1252 int uptodate = 1;
1253 do {
1254 if (!buffer_uptodate(bh)) {
1255 uptodate = 0;
1256 break;
1257 }
1258 bh = bh->b_this_page;
1259 } while (bh != head);
1260 if (uptodate)
1261 SetPageUptodate(page);
1262 end_page_writeback(page);
1263 /*
1264 * The page and buffer_heads can be released at any time from
1265 * here on.
1266 */
1267 wbc->pages_skipped++; /* We didn't write this page */
1268 }
1269 return err;
1270
1271recover:
1272 /*
1273 * ENOSPC, or some other error. We may already have added some
1274 * blocks to the file, so we need to write these out to avoid
1275 * exposing stale data.
1276 * The page is currently locked and not marked for writeback
1277 */
1278 bh = head;
1279 /* Recovery: lock and submit the mapped buffers */
1280 do {
1281 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1282 lock_buffer(bh);
1283 mark_buffer_async_write(bh);
1284 } else {
1285 /*
1286 * The buffer may have been set dirty during
1287 * attachment to a dirty page.
1288 */
1289 clear_buffer_dirty(bh);
1290 }
1291 } while ((bh = bh->b_this_page) != head);
1292 SetPageError(page);
1293 BUG_ON(PageWriteback(page));
1294 set_page_writeback(page);
1295 do {
1296 struct buffer_head *next = bh->b_this_page;
1297 if (buffer_async_write(bh)) {
1298 clear_buffer_dirty(bh);
1299 submit_bh(WRITE, bh);
1300 nr_underway++;
1301 }
1302 bh = next;
1303 } while (bh != head);
1304 unlock_page(page);
1305 goto done;
1306}
1307
1308/*
1309 * The generic ->writepage function for buffer-backed address_spaces
1310 */
1153static int btrfs_writepage(struct page *page, struct writeback_control *wbc) 1311static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1154{ 1312{
1155 return nobh_writepage(page, btrfs_get_block, wbc); 1313 struct inode * const inode = page->mapping->host;
1314 loff_t i_size = i_size_read(inode);
1315 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
1316 unsigned offset;
1317 void *kaddr;
1318
1319 /* Is the page fully inside i_size? */
1320 if (page->index < end_index)
1321 return __btrfs_write_full_page(inode, page, wbc);
1322
1323 /* Is the page fully outside i_size? (truncate in progress) */
1324 offset = i_size & (PAGE_CACHE_SIZE-1);
1325 if (page->index >= end_index+1 || !offset) {
1326 /*
1327 * The page may have dirty, unmapped buffers. For example,
1328 * they may have been added in ext3_writepage(). Make them
1329 * freeable here, so the page does not leak.
1330 */
1331 block_invalidatepage(page, 0);
1332 unlock_page(page);
1333 return 0; /* don't care */
1334 }
1335
1336 /*
1337 * The page straddles i_size. It must be zeroed out on each and every
1338 * writepage invokation because it may be mmapped. "A file is mapped
1339 * in multiples of the page size. For a file that is not a multiple of
1340 * the page size, the remaining memory is zeroed when mapped, and
1341 * writes to that region are not written out to the file."
1342 */
1343 kaddr = kmap_atomic(page, KM_USER0);
1344 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1345 flush_dcache_page(page);
1346 kunmap_atomic(kaddr, KM_USER0);
1347 return __btrfs_write_full_page(inode, page, wbc);
1156} 1348}
1157 1349
1158static void btrfs_truncate(struct inode *inode) 1350static void btrfs_truncate(struct inode *inode)
@@ -1179,6 +1371,29 @@ static void btrfs_truncate(struct inode *inode)
1179 mark_inode_dirty(inode); 1371 mark_inode_dirty(inode);
1180} 1372}
1181 1373
1374/*
1375 * Make sure any changes to nobh_commit_write() are reflected in
1376 * nobh_truncate_page(), since it doesn't call commit_write().
1377 */
1378static int btrfs_commit_write(struct file *file, struct page *page,
1379 unsigned from, unsigned to)
1380{
1381 struct inode *inode = page->mapping->host;
1382 struct buffer_head *bh;
1383 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1384
1385 SetPageUptodate(page);
1386 bh = page_buffers(page);
1387 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1388 set_page_dirty(page);
1389 }
1390 if (pos > inode->i_size) {
1391 i_size_write(inode, pos);
1392 mark_inode_dirty(inode);
1393 }
1394 return 0;
1395}
1396
1182static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, 1397static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
1183 struct page **prepared_pages, 1398 struct page **prepared_pages,
1184 const char __user * buf) 1399 const char __user * buf)
@@ -1234,6 +1449,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
1234 int ret; 1449 int ret;
1235 int this_write; 1450 int this_write;
1236 struct inode *inode = file->f_path.dentry->d_inode; 1451 struct inode *inode = file->f_path.dentry->d_inode;
1452 struct buffer_head *bh;
1453 struct btrfs_file_extent_item *ei;
1237 1454
1238 for (i = 0; i < num_pages; i++) { 1455 for (i = 0; i < num_pages; i++) {
1239 offset = pos & (PAGE_CACHE_SIZE -1); 1456 offset = pos & (PAGE_CACHE_SIZE -1);
@@ -1242,16 +1459,47 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
1242 1459
1243 mutex_lock(&root->fs_info->fs_mutex); 1460 mutex_lock(&root->fs_info->fs_mutex);
1244 trans = btrfs_start_transaction(root, 1); 1461 trans = btrfs_start_transaction(root, 1);
1245 btrfs_csum_file_block(trans, root, inode->i_ino, 1462
1463 bh = page_buffers(pages[i]);
1464 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
1465 struct btrfs_key key;
1466 struct btrfs_path *path;
1467 char *ptr;
1468 u32 datasize;
1469
1470 path = btrfs_alloc_path();
1471 BUG_ON(!path);
1472 key.objectid = inode->i_ino;
1473 key.offset = pages[i]->index << PAGE_CACHE_SHIFT;
1474 key.flags = 0;
1475 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
1476 BUG_ON(write_bytes >= PAGE_CACHE_SIZE);
1477 datasize = offset +
1478 btrfs_file_extent_calc_inline_size(write_bytes);
1479 ret = btrfs_insert_empty_item(trans, root, path, &key,
1480 datasize);
1481 BUG_ON(ret);
1482 ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
1483 path->slots[0], struct btrfs_file_extent_item);
1484 btrfs_set_file_extent_generation(ei, trans->transid);
1485 btrfs_set_file_extent_type(ei,
1486 BTRFS_FILE_EXTENT_INLINE);
1487 ptr = btrfs_file_extent_inline_start(ei);
1488 memcpy(ptr, bh->b_data, offset + write_bytes);
1489 mark_buffer_dirty(path->nodes[0]);
1490 btrfs_free_path(path);
1491 } else {
1492 btrfs_csum_file_block(trans, root, inode->i_ino,
1246 pages[i]->index << PAGE_CACHE_SHIFT, 1493 pages[i]->index << PAGE_CACHE_SHIFT,
1247 kmap(pages[i]), PAGE_CACHE_SIZE); 1494 kmap(pages[i]), PAGE_CACHE_SIZE);
1248 kunmap(pages[i]); 1495 kunmap(pages[i]);
1496 }
1249 SetPageChecked(pages[i]); 1497 SetPageChecked(pages[i]);
1250 ret = btrfs_end_transaction(trans, root); 1498 ret = btrfs_end_transaction(trans, root);
1251 BUG_ON(ret); 1499 BUG_ON(ret);
1252 mutex_unlock(&root->fs_info->fs_mutex); 1500 mutex_unlock(&root->fs_info->fs_mutex);
1253 1501
1254 ret = nobh_commit_write(file, pages[i], offset, 1502 ret = btrfs_commit_write(file, pages[i], offset,
1255 offset + this_write); 1503 offset + this_write);
1256 pos += this_write; 1504 pos += this_write;
1257 if (ret) { 1505 if (ret) {
@@ -1275,12 +1523,16 @@ static int drop_extents(struct btrfs_trans_handle *trans,
1275 struct btrfs_leaf *leaf; 1523 struct btrfs_leaf *leaf;
1276 int slot; 1524 int slot;
1277 struct btrfs_file_extent_item *extent; 1525 struct btrfs_file_extent_item *extent;
1278 u64 extent_end; 1526 u64 extent_end = 0;
1279 int keep; 1527 int keep;
1280 struct btrfs_file_extent_item old; 1528 struct btrfs_file_extent_item old;
1281 struct btrfs_path *path; 1529 struct btrfs_path *path;
1282 u64 search_start = start; 1530 u64 search_start = start;
1283 int bookend; 1531 int bookend;
1532 int found_type;
1533 int found_extent;
1534 int found_inline;
1535
1284 path = btrfs_alloc_path(); 1536 path = btrfs_alloc_path();
1285 if (!path) 1537 if (!path)
1286 return -ENOMEM; 1538 return -ENOMEM;
@@ -1292,37 +1544,62 @@ static int drop_extents(struct btrfs_trans_handle *trans,
1292 goto out; 1544 goto out;
1293 if (ret > 0) { 1545 if (ret > 0) {
1294 if (path->slots[0] == 0) { 1546 if (path->slots[0] == 0) {
1295 ret = -ENOENT; 1547 ret = 0;
1296 goto out; 1548 goto out;
1297 } 1549 }
1298 path->slots[0]--; 1550 path->slots[0]--;
1299 } 1551 }
1300 keep = 0; 1552 keep = 0;
1301 bookend = 0; 1553 bookend = 0;
1554 found_extent = 0;
1555 found_inline = 0;
1556 extent = NULL;
1302 leaf = btrfs_buffer_leaf(path->nodes[0]); 1557 leaf = btrfs_buffer_leaf(path->nodes[0]);
1303 slot = path->slots[0]; 1558 slot = path->slots[0];
1304 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); 1559 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
1305 extent = btrfs_item_ptr(leaf, slot,
1306 struct btrfs_file_extent_item);
1307 extent_end = key.offset +
1308 (btrfs_file_extent_num_blocks(extent) <<
1309 inode->i_blkbits);
1310 if (key.offset >= end || key.objectid != inode->i_ino) { 1560 if (key.offset >= end || key.objectid != inode->i_ino) {
1311 ret = 0; 1561 ret = 0;
1312 goto out; 1562 goto out;
1313 } 1563 }
1314 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 1564 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
1565 ret = 0;
1566 goto out;
1567 }
1568 extent = btrfs_item_ptr(leaf, slot,
1569 struct btrfs_file_extent_item);
1570 found_type = btrfs_file_extent_type(extent);
1571 if (found_type == BTRFS_FILE_EXTENT_REG) {
1572 extent_end = key.offset +
1573 (btrfs_file_extent_num_blocks(extent) <<
1574 inode->i_blkbits);
1575 found_extent = 1;
1576 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1577 found_inline = 1;
1578 extent_end = key.offset +
1579 btrfs_file_extent_inline_len(leaf->items + slot);
1580 }
1581
1582 if (!found_extent && !found_inline) {
1583 ret = 0;
1315 goto out; 1584 goto out;
1316 if (search_start >= extent_end) 1585 }
1586
1587 if (search_start >= extent_end) {
1588 ret = 0;
1317 goto out; 1589 goto out;
1590 }
1591
1318 search_start = extent_end; 1592 search_start = extent_end;
1319 1593
1320 if (end < extent_end && end >= key.offset) { 1594 if (end < extent_end && end >= key.offset) {
1321 memcpy(&old, extent, sizeof(old)); 1595 if (found_extent) {
1322 ret = btrfs_inc_extent_ref(trans, root, 1596 memcpy(&old, extent, sizeof(old));
1323 btrfs_file_extent_disk_blocknr(&old), 1597 ret = btrfs_inc_extent_ref(trans, root,
1324 btrfs_file_extent_disk_num_blocks(&old)); 1598 btrfs_file_extent_disk_blocknr(&old),
1325 BUG_ON(ret); 1599 btrfs_file_extent_disk_num_blocks(&old));
1600 BUG_ON(ret);
1601 }
1602 WARN_ON(found_inline);
1326 bookend = 1; 1603 bookend = 1;
1327 } 1604 }
1328 1605
@@ -1332,25 +1609,45 @@ static int drop_extents(struct btrfs_trans_handle *trans,
1332 /* truncate existing extent */ 1609 /* truncate existing extent */
1333 keep = 1; 1610 keep = 1;
1334 WARN_ON(start & (root->blocksize - 1)); 1611 WARN_ON(start & (root->blocksize - 1));
1335 new_num = (start - key.offset) >> inode->i_blkbits; 1612 if (found_extent) {
1336 old_num = btrfs_file_extent_num_blocks(extent); 1613 new_num = (start - key.offset) >>
1337 inode->i_blocks -= (old_num - new_num) << 3; 1614 inode->i_blkbits;
1338 btrfs_set_file_extent_num_blocks(extent, new_num); 1615 old_num = btrfs_file_extent_num_blocks(extent);
1339 mark_buffer_dirty(path->nodes[0]); 1616 inode->i_blocks -= (old_num - new_num) << 3;
1617 btrfs_set_file_extent_num_blocks(extent,
1618 new_num);
1619 mark_buffer_dirty(path->nodes[0]);
1620 } else {
1621 WARN_ON(1);
1622 /*
1623 ret = btrfs_truncate_item(trans, root, path,
1624 start - key.offset);
1625 BUG_ON(ret);
1626 */
1627 }
1340 } 1628 }
1341 if (!keep) { 1629 if (!keep) {
1342 u64 disk_blocknr; 1630 u64 disk_blocknr = 0;
1343 u64 disk_num_blocks; 1631 u64 disk_num_blocks = 0;
1344 disk_blocknr = btrfs_file_extent_disk_blocknr(extent); 1632 u64 extent_num_blocks = 0;
1345 disk_num_blocks = 1633 if (found_extent) {
1346 btrfs_file_extent_disk_num_blocks(extent); 1634 disk_blocknr =
1635 btrfs_file_extent_disk_blocknr(extent);
1636 disk_num_blocks =
1637 btrfs_file_extent_disk_num_blocks(extent);
1638 extent_num_blocks =
1639 btrfs_file_extent_num_blocks(extent);
1640 }
1347 ret = btrfs_del_item(trans, root, path); 1641 ret = btrfs_del_item(trans, root, path);
1348 BUG_ON(ret); 1642 BUG_ON(ret);
1349 inode->i_blocks -=
1350 btrfs_file_extent_num_blocks(extent) << 3;
1351 btrfs_release_path(root, path); 1643 btrfs_release_path(root, path);
1352 ret = btrfs_free_extent(trans, root, disk_blocknr, 1644 if (found_extent) {
1353 disk_num_blocks, 0); 1645 inode->i_blocks -=
1646 btrfs_file_extent_num_blocks(extent) << 3;
1647 ret = btrfs_free_extent(trans, root,
1648 disk_blocknr,
1649 disk_num_blocks, 0);
1650 }
1354 1651
1355 BUG_ON(ret); 1652 BUG_ON(ret);
1356 if (!bookend && search_start >= end) { 1653 if (!bookend && search_start >= end) {
@@ -1360,7 +1657,7 @@ static int drop_extents(struct btrfs_trans_handle *trans,
1360 if (!bookend) 1657 if (!bookend)
1361 continue; 1658 continue;
1362 } 1659 }
1363 if (bookend) { 1660 if (bookend && found_extent) {
1364 /* create bookend */ 1661 /* create bookend */
1365 struct btrfs_key ins; 1662 struct btrfs_key ins;
1366 ins.objectid = inode->i_ino; 1663 ins.objectid = inode->i_ino;
@@ -1390,6 +1687,8 @@ static int drop_extents(struct btrfs_trans_handle *trans,
1390 btrfs_file_extent_num_blocks(&old) - 1687 btrfs_file_extent_num_blocks(&old) -
1391 ((end - key.offset) >> inode->i_blkbits)); 1688 ((end - key.offset) >> inode->i_blkbits));
1392 1689
1690 btrfs_set_file_extent_type(extent,
1691 BTRFS_FILE_EXTENT_REG);
1393 btrfs_set_file_extent_generation(extent, 1692 btrfs_set_file_extent_generation(extent,
1394 btrfs_file_extent_generation(&old)); 1693 btrfs_file_extent_generation(&old));
1395 btrfs_mark_buffer_dirty(path->nodes[0]); 1694 btrfs_mark_buffer_dirty(path->nodes[0]);
@@ -1445,7 +1744,8 @@ static int prepare_pages(struct btrfs_root *root,
1445 if (err) 1744 if (err)
1446 goto failed_truncate; 1745 goto failed_truncate;
1447 bh = bh->b_this_page; 1746 bh = bh->b_this_page;
1448 alloc_extent_start++; 1747 if (alloc_extent_start)
1748 alloc_extent_start++;
1449 } while (bh != head); 1749 } while (bh != head);
1450 pos += this_write; 1750 pos += this_write;
1451 WARN_ON(this_write > write_bytes); 1751 WARN_ON(this_write > write_bytes);
@@ -1543,12 +1843,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1543 start_pos, 1843 start_pos,
1544 (pos + count + root->blocksize -1) & 1844 (pos + count + root->blocksize -1) &
1545 ~((u64)root->blocksize - 1)); 1845 ~((u64)root->blocksize - 1));
1846 BUG_ON(ret);
1546 } 1847 }
1547 ret = btrfs_alloc_extent(trans, root, num_blocks, 1, 1848 if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
1849 pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
1850 ret = btrfs_alloc_extent(trans, root, num_blocks, 1,
1548 (u64)-1, &ins); 1851 (u64)-1, &ins);
1549 BUG_ON(ret); 1852 BUG_ON(ret);
1550 ret = btrfs_insert_file_extent(trans, root, inode->i_ino, 1853 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
1551 start_pos, ins.objectid, ins.offset); 1854 start_pos, ins.objectid, ins.offset);
1855 BUG_ON(ret);
1856 } else {
1857 ins.offset = 0;
1858 ins.objectid = 0;
1859 }
1552 BUG_ON(ret); 1860 BUG_ON(ret);
1553 alloc_extent_start = ins.objectid; 1861 alloc_extent_start = ins.objectid;
1554 ret = btrfs_end_transaction(trans, root); 1862 ret = btrfs_end_transaction(trans, root);
@@ -1567,7 +1875,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1567 BUG_ON(ret); 1875 BUG_ON(ret);
1568 1876
1569 /* FIXME blocks != pagesize */ 1877 /* FIXME blocks != pagesize */
1570 alloc_extent_start += num_pages; 1878 if (alloc_extent_start)
1879 alloc_extent_start += num_pages;
1571 ret = btrfs_copy_from_user(pos, num_pages, 1880 ret = btrfs_copy_from_user(pos, num_pages,
1572 write_bytes, pages, buf); 1881 write_bytes, pages, buf);
1573 BUG_ON(ret); 1882 BUG_ON(ret);
@@ -1779,10 +2088,11 @@ static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
1779 if (!PageChecked(page)) { 2088 if (!PageChecked(page)) {
1780 /* FIXME, do it per block */ 2089 /* FIXME, do it per block */
1781 struct btrfs_root *root = BTRFS_I(inode)->root; 2090 struct btrfs_root *root = BTRFS_I(inode)->root;
2091
1782 int ret = btrfs_csum_verify_file_block(root, 2092 int ret = btrfs_csum_verify_file_block(root,
1783 page->mapping->host->i_ino, 2093 page->mapping->host->i_ino,
1784 page->index << PAGE_CACHE_SHIFT, 2094 page->index << PAGE_CACHE_SHIFT,
1785 kmap(page), PAGE_CACHE_SIZE); 2095 kmap(page), PAGE_CACHE_SIZE);
1786 if (ret) { 2096 if (ret) {
1787 printk("failed to verify ino %lu page %lu\n", 2097 printk("failed to verify ino %lu page %lu\n",
1788 page->mapping->host->i_ino, 2098 page->mapping->host->i_ino,
@@ -2249,6 +2559,16 @@ static int btrfs_get_sb(struct file_system_type *fs_type,
2249 btrfs_fill_super, mnt); 2559 btrfs_fill_super, mnt);
2250} 2560}
2251 2561
2562
2563static int btrfs_getattr(struct vfsmount *mnt,
2564 struct dentry *dentry, struct kstat *stat)
2565{
2566 struct inode *inode = dentry->d_inode;
2567 generic_fillattr(inode, stat);
2568 stat->blksize = 256 * 1024;
2569 return 0;
2570}
2571
2252static struct file_system_type btrfs_fs_type = { 2572static struct file_system_type btrfs_fs_type = {
2253 .owner = THIS_MODULE, 2573 .owner = THIS_MODULE,
2254 .name = "btrfs", 2574 .name = "btrfs",
@@ -2298,6 +2618,7 @@ static struct address_space_operations btrfs_aops = {
2298 2618
2299static struct inode_operations btrfs_file_inode_operations = { 2619static struct inode_operations btrfs_file_inode_operations = {
2300 .truncate = btrfs_truncate, 2620 .truncate = btrfs_truncate,
2621 .getattr = btrfs_getattr,
2301}; 2622};
2302 2623
2303static struct file_operations btrfs_file_operations = { 2624static struct file_operations btrfs_file_operations = {