aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ocfs2/dir.c197
-rw-r--r--fs/ocfs2/ocfs2.h3
-rw-r--r--fs/ocfs2/ocfs2_fs.h29
3 files changed, 215 insertions, 14 deletions
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 45e4e03d8f71..1efd0ab680cf 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -84,6 +84,63 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
84 struct buffer_head **new_bh); 84 struct buffer_head **new_bh);
85 85
86/* 86/*
87 * These are distinct checks because future versions of the file system will
88 * want to have a trailing dirent structure independent of indexing.
89 */
90static int ocfs2_dir_has_trailer(struct inode *dir)
91{
92 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
93 return 0;
94
95 return ocfs2_meta_ecc(OCFS2_SB(dir->i_sb));
96}
97
98static int ocfs2_supports_dir_trailer(struct ocfs2_super *osb)
99{
100 return ocfs2_meta_ecc(osb);
101}
102
103static inline unsigned int ocfs2_dir_trailer_blk_off(struct super_block *sb)
104{
105 return sb->s_blocksize - sizeof(struct ocfs2_dir_block_trailer);
106}
107
108#define ocfs2_trailer_from_bh(_bh, _sb) ((struct ocfs2_dir_block_trailer *) ((_bh)->b_data + ocfs2_dir_trailer_blk_off((_sb))))
109
110/*
111 * XXX: This is executed once on every dirent. We should consider optimizing
112 * it.
113 */
114static int ocfs2_skip_dir_trailer(struct inode *dir,
115 struct ocfs2_dir_entry *de,
116 unsigned long offset,
117 unsigned long blklen)
118{
119 unsigned long toff = blklen - sizeof(struct ocfs2_dir_block_trailer);
120
121 if (!ocfs2_dir_has_trailer(dir))
122 return 0;
123
124 if (offset != toff)
125 return 0;
126
127 return 1;
128}
129
130static void ocfs2_init_dir_trailer(struct inode *inode,
131 struct buffer_head *bh)
132{
133 struct ocfs2_dir_block_trailer *trailer;
134
135 trailer = ocfs2_trailer_from_bh(bh, inode->i_sb);
136 strcpy(trailer->db_signature, OCFS2_DIR_TRAILER_SIGNATURE);
137 trailer->db_compat_rec_len =
138 cpu_to_le16(sizeof(struct ocfs2_dir_block_trailer));
139 trailer->db_parent_dinode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
140 trailer->db_blkno = cpu_to_le64(bh->b_blocknr);
141}
142
143/*
87 * bh passed here can be an inode block or a dir data block, depending 144 * bh passed here can be an inode block or a dir data block, depending
88 * on the inode inline data flag. 145 * on the inode inline data flag.
89 */ 146 */
@@ -232,16 +289,60 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
232{ 289{
233 int rc = 0; 290 int rc = 0;
234 struct buffer_head *tmp = *bh; 291 struct buffer_head *tmp = *bh;
292 struct ocfs2_dir_block_trailer *trailer;
235 293
236 rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags, 294 rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags,
237 ocfs2_validate_dir_block); 295 ocfs2_validate_dir_block);
238 if (rc) 296 if (rc) {
239 mlog_errno(rc); 297 mlog_errno(rc);
298 goto out;
299 }
300
301 /*
302 * We check the trailer here rather than in
303 * ocfs2_validate_dir_block() because that function doesn't have
304 * the inode to test.
305 */
306 if (!(flags & OCFS2_BH_READAHEAD) &&
307 ocfs2_dir_has_trailer(inode)) {
308 trailer = ocfs2_trailer_from_bh(tmp, inode->i_sb);
309 if (!OCFS2_IS_VALID_DIR_TRAILER(trailer)) {
310 rc = -EINVAL;
311 ocfs2_error(inode->i_sb,
312 "Invalid dirblock #%llu: "
313 "signature = %.*s\n",
314 (unsigned long long)tmp->b_blocknr, 7,
315 trailer->db_signature);
316 goto out;
317 }
318 if (le64_to_cpu(trailer->db_blkno) != tmp->b_blocknr) {
319 rc = -EINVAL;
320 ocfs2_error(inode->i_sb,
321 "Directory block #%llu has an invalid "
322 "db_blkno of %llu",
323 (unsigned long long)tmp->b_blocknr,
324 (unsigned long long)le64_to_cpu(trailer->db_blkno));
325 goto out;
326 }
327 if (le64_to_cpu(trailer->db_parent_dinode) !=
328 OCFS2_I(inode)->ip_blkno) {
329 rc = -EINVAL;
330 ocfs2_error(inode->i_sb,
331 "Directory block #%llu on dinode "
332 "#%llu has an invalid parent_dinode "
333 "of %llu",
334 (unsigned long long)tmp->b_blocknr,
335 (unsigned long long)OCFS2_I(inode)->ip_blkno,
336 (unsigned long long)le64_to_cpu(trailer->db_blkno));
337 goto out;
338 }
339 }
240 340
241 /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */ 341 /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */
242 if (!rc && !*bh) 342 if (!*bh)
243 *bh = tmp; 343 *bh = tmp;
244 344
345out:
245 return rc ? -EIO : 0; 346 return rc ? -EIO : 0;
246} 347}
247 348
@@ -581,6 +682,16 @@ int __ocfs2_add_entry(handle_t *handle,
581 goto bail; 682 goto bail;
582 } 683 }
583 684
685 /* We're guaranteed that we should have space, so we
686 * can't possibly have hit the trailer...right? */
687 mlog_bug_on_msg(ocfs2_skip_dir_trailer(dir, de, offset, size),
688 "Hit dir trailer trying to insert %.*s "
689 "(namelen %d) into directory %llu. "
690 "offset is %lu, trailer offset is %d\n",
691 namelen, name, namelen,
692 (unsigned long long)parent_fe_bh->b_blocknr,
693 offset, ocfs2_dir_trailer_blk_off(dir->i_sb));
694
584 if (ocfs2_dirent_would_fit(de, rec_len)) { 695 if (ocfs2_dirent_would_fit(de, rec_len)) {
585 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 696 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
586 retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh); 697 retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
@@ -622,6 +733,7 @@ int __ocfs2_add_entry(handle_t *handle,
622 retval = 0; 733 retval = 0;
623 goto bail; 734 goto bail;
624 } 735 }
736
625 offset += le16_to_cpu(de->rec_len); 737 offset += le16_to_cpu(de->rec_len);
626 de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len)); 738 de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
627 } 739 }
@@ -1059,9 +1171,15 @@ int ocfs2_empty_dir(struct inode *inode)
1059 return !priv.seen_other; 1171 return !priv.seen_other;
1060} 1172}
1061 1173
1062static void ocfs2_fill_initial_dirents(struct inode *inode, 1174/*
1063 struct inode *parent, 1175 * Fills "." and ".." dirents in a new directory block. Returns dirent for
1064 char *start, unsigned int size) 1176 * "..", which might be used during creation of a directory with a trailing
1177 * header. It is otherwise safe to ignore the return code.
1178 */
1179static struct ocfs2_dir_entry *ocfs2_fill_initial_dirents(struct inode *inode,
1180 struct inode *parent,
1181 char *start,
1182 unsigned int size)
1065{ 1183{
1066 struct ocfs2_dir_entry *de = (struct ocfs2_dir_entry *)start; 1184 struct ocfs2_dir_entry *de = (struct ocfs2_dir_entry *)start;
1067 1185
@@ -1078,6 +1196,8 @@ static void ocfs2_fill_initial_dirents(struct inode *inode,
1078 de->name_len = 2; 1196 de->name_len = 2;
1079 strcpy(de->name, ".."); 1197 strcpy(de->name, "..");
1080 ocfs2_set_de_type(de, S_IFDIR); 1198 ocfs2_set_de_type(de, S_IFDIR);
1199
1200 return de;
1081} 1201}
1082 1202
1083/* 1203/*
@@ -1130,10 +1250,15 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
1130 struct ocfs2_alloc_context *data_ac) 1250 struct ocfs2_alloc_context *data_ac)
1131{ 1251{
1132 int status; 1252 int status;
1253 unsigned int size = osb->sb->s_blocksize;
1133 struct buffer_head *new_bh = NULL; 1254 struct buffer_head *new_bh = NULL;
1255 struct ocfs2_dir_entry *de;
1134 1256
1135 mlog_entry_void(); 1257 mlog_entry_void();
1136 1258
1259 if (ocfs2_supports_dir_trailer(osb))
1260 size = ocfs2_dir_trailer_blk_off(parent->i_sb);
1261
1137 status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh, 1262 status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
1138 data_ac, NULL, &new_bh); 1263 data_ac, NULL, &new_bh);
1139 if (status < 0) { 1264 if (status < 0) {
@@ -1151,8 +1276,9 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
1151 } 1276 }
1152 memset(new_bh->b_data, 0, osb->sb->s_blocksize); 1277 memset(new_bh->b_data, 0, osb->sb->s_blocksize);
1153 1278
1154 ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, 1279 de = ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, size);
1155 osb->sb->s_blocksize); 1280 if (ocfs2_supports_dir_trailer(osb))
1281 ocfs2_init_dir_trailer(inode, new_bh);
1156 1282
1157 status = ocfs2_journal_dirty(handle, new_bh); 1283 status = ocfs2_journal_dirty(handle, new_bh);
1158 if (status < 0) { 1284 if (status < 0) {
@@ -1193,13 +1319,27 @@ int ocfs2_fill_new_dir(struct ocfs2_super *osb,
1193 data_ac); 1319 data_ac);
1194} 1320}
1195 1321
1322/*
1323 * Expand rec_len of the rightmost dirent in a directory block so that it
1324 * contains the end of our valid space for dirents. We do this during
1325 * expansion from an inline directory to one with extents. The first dir block
1326 * in that case is taken from the inline data portion of the inode block.
1327 *
1328 * We add the dir trailer if this filesystem wants it.
1329 */
1196static void ocfs2_expand_last_dirent(char *start, unsigned int old_size, 1330static void ocfs2_expand_last_dirent(char *start, unsigned int old_size,
1197 unsigned int new_size) 1331 struct super_block *sb)
1198{ 1332{
1199 struct ocfs2_dir_entry *de; 1333 struct ocfs2_dir_entry *de;
1200 struct ocfs2_dir_entry *prev_de; 1334 struct ocfs2_dir_entry *prev_de;
1201 char *de_buf, *limit; 1335 char *de_buf, *limit;
1202 unsigned int bytes = new_size - old_size; 1336 unsigned int new_size = sb->s_blocksize;
1337 unsigned int bytes;
1338
1339 if (ocfs2_supports_dir_trailer(OCFS2_SB(sb)))
1340 new_size = ocfs2_dir_trailer_blk_off(sb);
1341
1342 bytes = new_size - old_size;
1203 1343
1204 limit = start + old_size; 1344 limit = start + old_size;
1205 de_buf = start; 1345 de_buf = start;
@@ -1316,8 +1456,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1316 memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir)); 1456 memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir));
1317 memset(dirdata_bh->b_data + i_size_read(dir), 0, 1457 memset(dirdata_bh->b_data + i_size_read(dir), 0,
1318 sb->s_blocksize - i_size_read(dir)); 1458 sb->s_blocksize - i_size_read(dir));
1319 ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), 1459 ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), sb);
1320 sb->s_blocksize); 1460 if (ocfs2_supports_dir_trailer(osb))
1461 ocfs2_init_dir_trailer(dir, dirdata_bh);
1321 1462
1322 ret = ocfs2_journal_dirty(handle, dirdata_bh); 1463 ret = ocfs2_journal_dirty(handle, dirdata_bh);
1323 if (ret) { 1464 if (ret) {
@@ -1604,9 +1745,15 @@ do_extend:
1604 goto bail; 1745 goto bail;
1605 } 1746 }
1606 memset(new_bh->b_data, 0, sb->s_blocksize); 1747 memset(new_bh->b_data, 0, sb->s_blocksize);
1748
1607 de = (struct ocfs2_dir_entry *) new_bh->b_data; 1749 de = (struct ocfs2_dir_entry *) new_bh->b_data;
1608 de->inode = 0; 1750 de->inode = 0;
1609 de->rec_len = cpu_to_le16(sb->s_blocksize); 1751 if (ocfs2_dir_has_trailer(dir)) {
1752 de->rec_len = cpu_to_le16(ocfs2_dir_trailer_blk_off(sb));
1753 ocfs2_init_dir_trailer(dir, new_bh);
1754 } else {
1755 de->rec_len = cpu_to_le16(sb->s_blocksize);
1756 }
1610 status = ocfs2_journal_dirty(handle, new_bh); 1757 status = ocfs2_journal_dirty(handle, new_bh);
1611 if (status < 0) { 1758 if (status < 0) {
1612 mlog_errno(status); 1759 mlog_errno(status);
@@ -1648,11 +1795,21 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
1648 unsigned int *blocks_wanted) 1795 unsigned int *blocks_wanted)
1649{ 1796{
1650 int ret; 1797 int ret;
1798 struct super_block *sb = dir->i_sb;
1651 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 1799 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1652 struct ocfs2_dir_entry *de, *last_de = NULL; 1800 struct ocfs2_dir_entry *de, *last_de = NULL;
1653 char *de_buf, *limit; 1801 char *de_buf, *limit;
1654 unsigned long offset = 0; 1802 unsigned long offset = 0;
1655 unsigned int rec_len, new_rec_len; 1803 unsigned int rec_len, new_rec_len, free_space = dir->i_sb->s_blocksize;
1804
1805 /*
1806 * This calculates how many free bytes we'd have in block zero, should
1807 * this function force expansion to an extent tree.
1808 */
1809 if (ocfs2_supports_dir_trailer(OCFS2_SB(sb)))
1810 free_space = ocfs2_dir_trailer_blk_off(sb) - i_size_read(dir);
1811 else
1812 free_space = dir->i_sb->s_blocksize - i_size_read(dir);
1656 1813
1657 de_buf = di->id2.i_data.id_data; 1814 de_buf = di->id2.i_data.id_data;
1658 limit = de_buf + i_size_read(dir); 1815 limit = de_buf + i_size_read(dir);
@@ -1669,6 +1826,11 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
1669 ret = -EEXIST; 1826 ret = -EEXIST;
1670 goto out; 1827 goto out;
1671 } 1828 }
1829 /*
1830 * No need to check for a trailing dirent record here as
1831 * they're not used for inline dirs.
1832 */
1833
1672 if (ocfs2_dirent_would_fit(de, rec_len)) { 1834 if (ocfs2_dirent_would_fit(de, rec_len)) {
1673 /* Ok, we found a spot. Return this bh and let 1835 /* Ok, we found a spot. Return this bh and let
1674 * the caller actually fill it in. */ 1836 * the caller actually fill it in. */
@@ -1689,7 +1851,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
1689 * dirent can be found. 1851 * dirent can be found.
1690 */ 1852 */
1691 *blocks_wanted = 1; 1853 *blocks_wanted = 1;
1692 new_rec_len = le16_to_cpu(last_de->rec_len) + (dir->i_sb->s_blocksize - i_size_read(dir)); 1854 new_rec_len = le16_to_cpu(last_de->rec_len) + free_space;
1693 if (new_rec_len < (rec_len + OCFS2_DIR_REC_LEN(last_de->name_len))) 1855 if (new_rec_len < (rec_len + OCFS2_DIR_REC_LEN(last_de->name_len)))
1694 *blocks_wanted = 2; 1856 *blocks_wanted = 2;
1695 1857
@@ -1707,6 +1869,7 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
1707 struct ocfs2_dir_entry *de; 1869 struct ocfs2_dir_entry *de;
1708 struct super_block *sb = dir->i_sb; 1870 struct super_block *sb = dir->i_sb;
1709 int status; 1871 int status;
1872 int blocksize = dir->i_sb->s_blocksize;
1710 1873
1711 status = ocfs2_read_dir_block(dir, 0, &bh, 0); 1874 status = ocfs2_read_dir_block(dir, 0, &bh, 0);
1712 if (status) { 1875 if (status) {
@@ -1748,6 +1911,11 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
1748 status = -EEXIST; 1911 status = -EEXIST;
1749 goto bail; 1912 goto bail;
1750 } 1913 }
1914
1915 if (ocfs2_skip_dir_trailer(dir, de, offset % blocksize,
1916 blocksize))
1917 goto next;
1918
1751 if (ocfs2_dirent_would_fit(de, rec_len)) { 1919 if (ocfs2_dirent_would_fit(de, rec_len)) {
1752 /* Ok, we found a spot. Return this bh and let 1920 /* Ok, we found a spot. Return this bh and let
1753 * the caller actually fill it in. */ 1921 * the caller actually fill it in. */
@@ -1756,6 +1924,7 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
1756 status = 0; 1924 status = 0;
1757 goto bail; 1925 goto bail;
1758 } 1926 }
1927next:
1759 offset += le16_to_cpu(de->rec_len); 1928 offset += le16_to_cpu(de->rec_len);
1760 de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len)); 1929 de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len));
1761 } 1930 }
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index bad87d0a03c9..ad5c24a29edd 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -470,6 +470,9 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
470#define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \ 470#define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \
471 (!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE)) 471 (!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE))
472 472
473#define OCFS2_IS_VALID_DIR_TRAILER(ptr) \
474 (!strcmp((ptr)->db_signature, OCFS2_DIR_TRAILER_SIGNATURE))
475
473static inline unsigned long ino_from_blkno(struct super_block *sb, 476static inline unsigned long ino_from_blkno(struct super_block *sb,
474 u64 blkno) 477 u64 blkno)
475{ 478{
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 290fa26fba6e..af0013b9c17f 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -65,6 +65,7 @@
65#define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01" 65#define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01"
66#define OCFS2_GROUP_DESC_SIGNATURE "GROUP01" 66#define OCFS2_GROUP_DESC_SIGNATURE "GROUP01"
67#define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01" 67#define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01"
68#define OCFS2_DIR_TRAILER_SIGNATURE "DIRTRL1"
68 69
69/* Compatibility flags */ 70/* Compatibility flags */
70#define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \ 71#define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \
@@ -752,6 +753,34 @@ struct ocfs2_dir_entry {
752} __attribute__ ((packed)); 753} __attribute__ ((packed));
753 754
754/* 755/*
756 * Per-block record for the unindexed directory btree. This is carefully
757 * crafted so that the rec_len and name_len records of an ocfs2_dir_entry are
758 * mirrored. That way, the directory manipulation code needs a minimal amount
759 * of update.
760 *
761 * NOTE: Keep this structure aligned to a multiple of 4 bytes.
762 */
763struct ocfs2_dir_block_trailer {
764/*00*/ __le64 db_compat_inode; /* Always zero. Was inode */
765
766 __le16 db_compat_rec_len; /* Backwards compatible with
767 * ocfs2_dir_entry. */
768 __u8 db_compat_name_len; /* Always zero. Was name_len */
769 __u8 db_reserved0;
770 __le16 db_reserved1;
771 __le16 db_free_rec_len; /* Size of largest empty hole
772 * in this block. (unused) */
773/*10*/ __u8 db_signature[8]; /* Signature for verification */
774 __le64 db_reserved2;
775 __le64 db_free_next; /* Next block in list (unused) */
776/*20*/ __le64 db_blkno; /* Offset on disk, in blocks */
777 __le64 db_parent_dinode; /* dinode which owns me, in
778 blocks */
779/*30*/ __le64 db_check; /* Error checking */
780/*40*/
781};
782
783/*
755 * On disk allocator group structure for OCFS2 784 * On disk allocator group structure for OCFS2
756 */ 785 */
757struct ocfs2_group_desc 786struct ocfs2_group_desc