diff options
author | Steven Whitehouse <swhiteho@redhat.com> | 2011-06-15 05:29:37 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2011-07-15 04:31:48 -0400 |
commit | 17d539f0499fa2c0321b7c260831cca2bb36d119 (patch) | |
tree | 28b960ad670a0112d45afbab120f855b8e95b146 /fs/gfs2 | |
parent | 5dcd07b9f39ca3e9be5bcc387d193fc0674e1c81 (diff) |
GFS2: Cache dir hash table in a contiguous buffer
This patch adds a cache for the hash table to the directory code
in order to help simplify the way in which the hash table is
accessed. This is intended to be a first step towards introducing
some performance improvements in the directory code.
There are two follow ups that I'm hoping to see fairly shortly. One
is to simplify the hash table reading code now that we always read the
complete hash table, whether we want one entry or all of them. The
other is to introduce readahead on the heads of the hash chains
which are referred to from the table.
The hash table is a maximum of 128k in size, so it is not worth trying
to read it in small chunks.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/gfs2')
-rw-r--r-- | fs/gfs2/dir.c | 221 | ||||
-rw-r--r-- | fs/gfs2/dir.h | 1 | ||||
-rw-r--r-- | fs/gfs2/glops.c | 2 | ||||
-rw-r--r-- | fs/gfs2/incore.h | 1 | ||||
-rw-r--r-- | fs/gfs2/main.c | 1 | ||||
-rw-r--r-- | fs/gfs2/super.c | 2 |
6 files changed, 115 insertions, 113 deletions
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 091ee4779538..1cc2f8ec52a2 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
@@ -339,6 +339,67 @@ fail: | |||
339 | return (copied) ? copied : error; | 339 | return (copied) ? copied : error; |
340 | } | 340 | } |
341 | 341 | ||
342 | /** | ||
343 | * gfs2_dir_get_hash_table - Get pointer to the dir hash table | ||
344 | * @ip: The inode in question | ||
345 | * | ||
346 | * Returns: The hash table or an error | ||
347 | */ | ||
348 | |||
349 | static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip) | ||
350 | { | ||
351 | struct inode *inode = &ip->i_inode; | ||
352 | int ret; | ||
353 | u32 hsize; | ||
354 | __be64 *hc; | ||
355 | |||
356 | BUG_ON(!(ip->i_diskflags & GFS2_DIF_EXHASH)); | ||
357 | |||
358 | hc = ip->i_hash_cache; | ||
359 | if (hc) | ||
360 | return hc; | ||
361 | |||
362 | hsize = 1 << ip->i_depth; | ||
363 | hsize *= sizeof(__be64); | ||
364 | if (hsize != i_size_read(&ip->i_inode)) { | ||
365 | gfs2_consist_inode(ip); | ||
366 | return ERR_PTR(-EIO); | ||
367 | } | ||
368 | |||
369 | hc = kmalloc(hsize, GFP_NOFS); | ||
370 | ret = -ENOMEM; | ||
371 | if (hc == NULL) | ||
372 | return ERR_PTR(-ENOMEM); | ||
373 | |||
374 | ret = gfs2_dir_read_data(ip, (char *)hc, 0, hsize, 1); | ||
375 | if (ret < 0) { | ||
376 | kfree(hc); | ||
377 | return ERR_PTR(ret); | ||
378 | } | ||
379 | |||
380 | spin_lock(&inode->i_lock); | ||
381 | if (ip->i_hash_cache) | ||
382 | kfree(hc); | ||
383 | else | ||
384 | ip->i_hash_cache = hc; | ||
385 | spin_unlock(&inode->i_lock); | ||
386 | |||
387 | return ip->i_hash_cache; | ||
388 | } | ||
389 | |||
390 | /** | ||
391 | * gfs2_dir_hash_inval - Invalidate dir hash | ||
392 | * @ip: The directory inode | ||
393 | * | ||
394 | * Must be called with an exclusive glock, or during glock invalidation. | ||
395 | */ | ||
396 | void gfs2_dir_hash_inval(struct gfs2_inode *ip) | ||
397 | { | ||
398 | __be64 *hc = ip->i_hash_cache; | ||
399 | ip->i_hash_cache = NULL; | ||
400 | kfree(hc); | ||
401 | } | ||
402 | |||
342 | static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent) | 403 | static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent) |
343 | { | 404 | { |
344 | return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0; | 405 | return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0; |
@@ -686,17 +747,12 @@ static int get_leaf(struct gfs2_inode *dip, u64 leaf_no, | |||
686 | static int get_leaf_nr(struct gfs2_inode *dip, u32 index, | 747 | static int get_leaf_nr(struct gfs2_inode *dip, u32 index, |
687 | u64 *leaf_out) | 748 | u64 *leaf_out) |
688 | { | 749 | { |
689 | __be64 leaf_no; | 750 | __be64 *hash; |
690 | int error; | ||
691 | |||
692 | error = gfs2_dir_read_data(dip, (char *)&leaf_no, | ||
693 | index * sizeof(__be64), | ||
694 | sizeof(__be64), 0); | ||
695 | if (error != sizeof(u64)) | ||
696 | return (error < 0) ? error : -EIO; | ||
697 | |||
698 | *leaf_out = be64_to_cpu(leaf_no); | ||
699 | 751 | ||
752 | hash = gfs2_dir_get_hash_table(dip); | ||
753 | if (IS_ERR(hash)) | ||
754 | return PTR_ERR(hash); | ||
755 | *leaf_out = be64_to_cpu(*(hash + index)); | ||
700 | return 0; | 756 | return 0; |
701 | } | 757 | } |
702 | 758 | ||
@@ -966,6 +1022,8 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) | |||
966 | for (x = 0; x < half_len; x++) | 1022 | for (x = 0; x < half_len; x++) |
967 | lp[x] = cpu_to_be64(bn); | 1023 | lp[x] = cpu_to_be64(bn); |
968 | 1024 | ||
1025 | gfs2_dir_hash_inval(dip); | ||
1026 | |||
969 | error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(u64), | 1027 | error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(u64), |
970 | half_len * sizeof(u64)); | 1028 | half_len * sizeof(u64)); |
971 | if (error != half_len * sizeof(u64)) { | 1029 | if (error != half_len * sizeof(u64)) { |
@@ -1052,70 +1110,54 @@ fail_brelse: | |||
1052 | 1110 | ||
1053 | static int dir_double_exhash(struct gfs2_inode *dip) | 1111 | static int dir_double_exhash(struct gfs2_inode *dip) |
1054 | { | 1112 | { |
1055 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | ||
1056 | struct buffer_head *dibh; | 1113 | struct buffer_head *dibh; |
1057 | u32 hsize; | 1114 | u32 hsize; |
1058 | u64 *buf; | 1115 | u32 hsize_bytes; |
1059 | u64 *from, *to; | 1116 | __be64 *hc; |
1060 | u64 block; | 1117 | __be64 *hc2, *h; |
1061 | u64 disksize = i_size_read(&dip->i_inode); | ||
1062 | int x; | 1118 | int x; |
1063 | int error = 0; | 1119 | int error = 0; |
1064 | 1120 | ||
1065 | hsize = 1 << dip->i_depth; | 1121 | hsize = 1 << dip->i_depth; |
1066 | if (hsize * sizeof(u64) != disksize) { | 1122 | hsize_bytes = hsize * sizeof(__be64); |
1067 | gfs2_consist_inode(dip); | ||
1068 | return -EIO; | ||
1069 | } | ||
1070 | 1123 | ||
1071 | /* Allocate both the "from" and "to" buffers in one big chunk */ | 1124 | hc = gfs2_dir_get_hash_table(dip); |
1125 | if (IS_ERR(hc)) | ||
1126 | return PTR_ERR(hc); | ||
1072 | 1127 | ||
1073 | buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS); | 1128 | h = hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS); |
1074 | if (!buf) | 1129 | if (!hc2) |
1075 | return -ENOMEM; | 1130 | return -ENOMEM; |
1076 | 1131 | ||
1077 | for (block = disksize >> sdp->sd_hash_bsize_shift; block--;) { | 1132 | error = gfs2_meta_inode_buffer(dip, &dibh); |
1078 | error = gfs2_dir_read_data(dip, (char *)buf, | 1133 | if (error) |
1079 | block * sdp->sd_hash_bsize, | 1134 | goto out_kfree; |
1080 | sdp->sd_hash_bsize, 1); | ||
1081 | if (error != sdp->sd_hash_bsize) { | ||
1082 | if (error >= 0) | ||
1083 | error = -EIO; | ||
1084 | goto fail; | ||
1085 | } | ||
1086 | |||
1087 | from = buf; | ||
1088 | to = (u64 *)((char *)buf + sdp->sd_hash_bsize); | ||
1089 | |||
1090 | for (x = sdp->sd_hash_ptrs; x--; from++) { | ||
1091 | *to++ = *from; /* No endianess worries */ | ||
1092 | *to++ = *from; | ||
1093 | } | ||
1094 | 1135 | ||
1095 | error = gfs2_dir_write_data(dip, | 1136 | for (x = 0; x < hsize; x++) { |
1096 | (char *)buf + sdp->sd_hash_bsize, | 1137 | *h++ = *hc; |
1097 | block * sdp->sd_sb.sb_bsize, | 1138 | *h++ = *hc; |
1098 | sdp->sd_sb.sb_bsize); | 1139 | hc++; |
1099 | if (error != sdp->sd_sb.sb_bsize) { | ||
1100 | if (error >= 0) | ||
1101 | error = -EIO; | ||
1102 | goto fail; | ||
1103 | } | ||
1104 | } | 1140 | } |
1105 | 1141 | ||
1106 | kfree(buf); | 1142 | error = gfs2_dir_write_data(dip, (char *)hc2, 0, hsize_bytes * 2); |
1107 | 1143 | if (error != (hsize_bytes * 2)) | |
1108 | error = gfs2_meta_inode_buffer(dip, &dibh); | 1144 | goto fail; |
1109 | if (!gfs2_assert_withdraw(sdp, !error)) { | ||
1110 | dip->i_depth++; | ||
1111 | gfs2_dinode_out(dip, dibh->b_data); | ||
1112 | brelse(dibh); | ||
1113 | } | ||
1114 | 1145 | ||
1115 | return error; | 1146 | gfs2_dir_hash_inval(dip); |
1147 | dip->i_hash_cache = hc2; | ||
1148 | dip->i_depth++; | ||
1149 | gfs2_dinode_out(dip, dibh->b_data); | ||
1150 | brelse(dibh); | ||
1151 | return 0; | ||
1116 | 1152 | ||
1117 | fail: | 1153 | fail: |
1118 | kfree(buf); | 1154 | /* Replace original hash table & size */ |
1155 | gfs2_dir_write_data(dip, (char *)hc, 0, hsize_bytes); | ||
1156 | i_size_write(&dip->i_inode, hsize_bytes); | ||
1157 | gfs2_dinode_out(dip, dibh->b_data); | ||
1158 | brelse(dibh); | ||
1159 | out_kfree: | ||
1160 | kfree(hc2); | ||
1119 | return error; | 1161 | return error; |
1120 | } | 1162 | } |
1121 | 1163 | ||
@@ -1348,6 +1390,7 @@ out: | |||
1348 | return error; | 1390 | return error; |
1349 | } | 1391 | } |
1350 | 1392 | ||
1393 | |||
1351 | /** | 1394 | /** |
1352 | * dir_e_read - Reads the entries from a directory into a filldir buffer | 1395 | * dir_e_read - Reads the entries from a directory into a filldir buffer |
1353 | * @dip: dinode pointer | 1396 | * @dip: dinode pointer |
@@ -1362,9 +1405,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, | |||
1362 | filldir_t filldir) | 1405 | filldir_t filldir) |
1363 | { | 1406 | { |
1364 | struct gfs2_inode *dip = GFS2_I(inode); | 1407 | struct gfs2_inode *dip = GFS2_I(inode); |
1365 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
1366 | u32 hsize, len = 0; | 1408 | u32 hsize, len = 0; |
1367 | u32 ht_offset, lp_offset, ht_offset_cur = -1; | ||
1368 | u32 hash, index; | 1409 | u32 hash, index; |
1369 | __be64 *lp; | 1410 | __be64 *lp; |
1370 | int copied = 0; | 1411 | int copied = 0; |
@@ -1372,37 +1413,17 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, | |||
1372 | unsigned depth = 0; | 1413 | unsigned depth = 0; |
1373 | 1414 | ||
1374 | hsize = 1 << dip->i_depth; | 1415 | hsize = 1 << dip->i_depth; |
1375 | if (hsize * sizeof(u64) != i_size_read(inode)) { | ||
1376 | gfs2_consist_inode(dip); | ||
1377 | return -EIO; | ||
1378 | } | ||
1379 | |||
1380 | hash = gfs2_dir_offset2hash(*offset); | 1416 | hash = gfs2_dir_offset2hash(*offset); |
1381 | index = hash >> (32 - dip->i_depth); | 1417 | index = hash >> (32 - dip->i_depth); |
1382 | 1418 | ||
1383 | lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS); | 1419 | lp = gfs2_dir_get_hash_table(dip); |
1384 | if (!lp) | 1420 | if (IS_ERR(lp)) |
1385 | return -ENOMEM; | 1421 | return PTR_ERR(lp); |
1386 | 1422 | ||
1387 | while (index < hsize) { | 1423 | while (index < hsize) { |
1388 | lp_offset = index & (sdp->sd_hash_ptrs - 1); | ||
1389 | ht_offset = index - lp_offset; | ||
1390 | |||
1391 | if (ht_offset_cur != ht_offset) { | ||
1392 | error = gfs2_dir_read_data(dip, (char *)lp, | ||
1393 | ht_offset * sizeof(__be64), | ||
1394 | sdp->sd_hash_bsize, 1); | ||
1395 | if (error != sdp->sd_hash_bsize) { | ||
1396 | if (error >= 0) | ||
1397 | error = -EIO; | ||
1398 | goto out; | ||
1399 | } | ||
1400 | ht_offset_cur = ht_offset; | ||
1401 | } | ||
1402 | |||
1403 | error = gfs2_dir_read_leaf(inode, offset, opaque, filldir, | 1424 | error = gfs2_dir_read_leaf(inode, offset, opaque, filldir, |
1404 | &copied, &depth, | 1425 | &copied, &depth, |
1405 | be64_to_cpu(lp[lp_offset])); | 1426 | be64_to_cpu(lp[index])); |
1406 | if (error) | 1427 | if (error) |
1407 | break; | 1428 | break; |
1408 | 1429 | ||
@@ -1410,8 +1431,6 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, | |||
1410 | index = (index & ~(len - 1)) + len; | 1431 | index = (index & ~(len - 1)) + len; |
1411 | } | 1432 | } |
1412 | 1433 | ||
1413 | out: | ||
1414 | kfree(lp); | ||
1415 | if (error > 0) | 1434 | if (error > 0) |
1416 | error = 0; | 1435 | error = 0; |
1417 | return error; | 1436 | return error; |
@@ -1914,43 +1933,22 @@ out: | |||
1914 | 1933 | ||
1915 | int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip) | 1934 | int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip) |
1916 | { | 1935 | { |
1917 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | ||
1918 | struct buffer_head *bh; | 1936 | struct buffer_head *bh; |
1919 | struct gfs2_leaf *leaf; | 1937 | struct gfs2_leaf *leaf; |
1920 | u32 hsize, len; | 1938 | u32 hsize, len; |
1921 | u32 ht_offset, lp_offset, ht_offset_cur = -1; | ||
1922 | u32 index = 0, next_index; | 1939 | u32 index = 0, next_index; |
1923 | __be64 *lp; | 1940 | __be64 *lp; |
1924 | u64 leaf_no; | 1941 | u64 leaf_no; |
1925 | int error = 0, last; | 1942 | int error = 0, last; |
1926 | 1943 | ||
1927 | hsize = 1 << dip->i_depth; | 1944 | hsize = 1 << dip->i_depth; |
1928 | if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) { | ||
1929 | gfs2_consist_inode(dip); | ||
1930 | return -EIO; | ||
1931 | } | ||
1932 | 1945 | ||
1933 | lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS); | 1946 | lp = gfs2_dir_get_hash_table(dip); |
1934 | if (!lp) | 1947 | if (IS_ERR(lp)) |
1935 | return -ENOMEM; | 1948 | return PTR_ERR(lp); |
1936 | 1949 | ||
1937 | while (index < hsize) { | 1950 | while (index < hsize) { |
1938 | lp_offset = index & (sdp->sd_hash_ptrs - 1); | 1951 | leaf_no = be64_to_cpu(lp[index]); |
1939 | ht_offset = index - lp_offset; | ||
1940 | |||
1941 | if (ht_offset_cur != ht_offset) { | ||
1942 | error = gfs2_dir_read_data(dip, (char *)lp, | ||
1943 | ht_offset * sizeof(__be64), | ||
1944 | sdp->sd_hash_bsize, 1); | ||
1945 | if (error != sdp->sd_hash_bsize) { | ||
1946 | if (error >= 0) | ||
1947 | error = -EIO; | ||
1948 | goto out; | ||
1949 | } | ||
1950 | ht_offset_cur = ht_offset; | ||
1951 | } | ||
1952 | |||
1953 | leaf_no = be64_to_cpu(lp[lp_offset]); | ||
1954 | if (leaf_no) { | 1952 | if (leaf_no) { |
1955 | error = get_leaf(dip, leaf_no, &bh); | 1953 | error = get_leaf(dip, leaf_no, &bh); |
1956 | if (error) | 1954 | if (error) |
@@ -1976,7 +1974,6 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip) | |||
1976 | } | 1974 | } |
1977 | 1975 | ||
1978 | out: | 1976 | out: |
1979 | kfree(lp); | ||
1980 | 1977 | ||
1981 | return error; | 1978 | return error; |
1982 | } | 1979 | } |
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index e686af11becd..ff5772fbf024 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h | |||
@@ -35,6 +35,7 @@ extern int gfs2_diradd_alloc_required(struct inode *dir, | |||
35 | const struct qstr *filename); | 35 | const struct qstr *filename); |
36 | extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, | 36 | extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, |
37 | struct buffer_head **bhp); | 37 | struct buffer_head **bhp); |
38 | extern void gfs2_dir_hash_inval(struct gfs2_inode *ip); | ||
38 | 39 | ||
39 | static inline u32 gfs2_disk_hash(const char *data, int len) | 40 | static inline u32 gfs2_disk_hash(const char *data, int len) |
40 | { | 41 | { |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 2cca29316bd6..95788ae436c6 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include "rgrp.h" | 26 | #include "rgrp.h" |
27 | #include "util.h" | 27 | #include "util.h" |
28 | #include "trans.h" | 28 | #include "trans.h" |
29 | #include "dir.h" | ||
29 | 30 | ||
30 | /** | 31 | /** |
31 | * __gfs2_ail_flush - remove all buffers for a given lock from the AIL | 32 | * __gfs2_ail_flush - remove all buffers for a given lock from the AIL |
@@ -218,6 +219,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) | |||
218 | if (ip) { | 219 | if (ip) { |
219 | set_bit(GIF_INVALID, &ip->i_flags); | 220 | set_bit(GIF_INVALID, &ip->i_flags); |
220 | forget_all_cached_acls(&ip->i_inode); | 221 | forget_all_cached_acls(&ip->i_inode); |
222 | gfs2_dir_hash_inval(ip); | ||
221 | } | 223 | } |
222 | } | 224 | } |
223 | 225 | ||
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 81206e70cbf6..24cd55f60e60 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -285,6 +285,7 @@ struct gfs2_inode { | |||
285 | u64 i_goal; /* goal block for allocations */ | 285 | u64 i_goal; /* goal block for allocations */ |
286 | struct rw_semaphore i_rw_mutex; | 286 | struct rw_semaphore i_rw_mutex; |
287 | struct list_head i_trunc_list; | 287 | struct list_head i_trunc_list; |
288 | __be64 *i_hash_cache; | ||
288 | u32 i_entries; | 289 | u32 i_entries; |
289 | u32 i_diskflags; | 290 | u32 i_diskflags; |
290 | u8 i_height; | 291 | u8 i_height; |
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index c2b34cd2abe0..29e1ace7953d 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
@@ -41,6 +41,7 @@ static void gfs2_init_inode_once(void *foo) | |||
41 | init_rwsem(&ip->i_rw_mutex); | 41 | init_rwsem(&ip->i_rw_mutex); |
42 | INIT_LIST_HEAD(&ip->i_trunc_list); | 42 | INIT_LIST_HEAD(&ip->i_trunc_list); |
43 | ip->i_alloc = NULL; | 43 | ip->i_alloc = NULL; |
44 | ip->i_hash_cache = NULL; | ||
44 | } | 45 | } |
45 | 46 | ||
46 | static void gfs2_init_glock_once(void *foo) | 47 | static void gfs2_init_glock_once(void *foo) |
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index fb0edf735483..b7beadd9ba4c 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -1533,7 +1533,7 @@ out: | |||
1533 | /* Case 3 starts here */ | 1533 | /* Case 3 starts here */ |
1534 | truncate_inode_pages(&inode->i_data, 0); | 1534 | truncate_inode_pages(&inode->i_data, 0); |
1535 | end_writeback(inode); | 1535 | end_writeback(inode); |
1536 | 1536 | gfs2_dir_hash_inval(ip); | |
1537 | ip->i_gl->gl_object = NULL; | 1537 | ip->i_gl->gl_object = NULL; |
1538 | gfs2_glock_add_to_lru(ip->i_gl); | 1538 | gfs2_glock_add_to_lru(ip->i_gl); |
1539 | gfs2_glock_put(ip->i_gl); | 1539 | gfs2_glock_put(ip->i_gl); |