diff options
author | Mark Fasheh <mark.fasheh@oracle.com> | 2006-09-22 20:28:19 -0400 |
---|---|---|
committer | Mark Fasheh <mark.fasheh@oracle.com> | 2006-09-24 16:50:46 -0400 |
commit | 24c19ef40474c3930597f31ae233dc06319bd881 (patch) | |
tree | e05b1cf72435d25bf47e67b206aa376bbea33b7d /fs/ocfs2/dlmglue.c | |
parent | f9e2d82e6395cfa0802446b54b63cc412089d82c (diff) |
ocfs2: Remove i_generation from inode lock names
OCFS2 puts inode meta data in the "lock value block" provided by the DLM.
Typically, i_generation is encoded in the lock name so that a deleted inode
on and a new one in the same block don't share the same lvb.
Unfortunately, that scheme means that the read in ocfs2_read_locked_inode()
is potentially thrown away as soon as the meta data lock is taken - we
cannot encode the lock name without first knowing i_generation, which
requires a disk read.
This patch encodes i_generation in the inode meta data lvb, and removes the
value from the inode meta data lock name. This way, the read can be covered
by a lock, and at the same time we can distinguish between an up to date and
a stale LVB.
This will help cold-cache stat(2) performance in particular.
Since this patch changes the protocol version, we take the opportunity to do
a minor re-organization of two of the LVB fields.
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2/dlmglue.c')
-rw-r--r-- | fs/ocfs2/dlmglue.c | 42 |
1 files changed, 36 insertions, 6 deletions
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 6cd84dffbbf4..ecb3cba22814 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -320,6 +320,7 @@ void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) | |||
320 | 320 | ||
321 | void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | 321 | void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, |
322 | enum ocfs2_lock_type type, | 322 | enum ocfs2_lock_type type, |
323 | unsigned int generation, | ||
323 | struct inode *inode) | 324 | struct inode *inode) |
324 | { | 325 | { |
325 | struct ocfs2_lock_res_ops *ops; | 326 | struct ocfs2_lock_res_ops *ops; |
@@ -341,7 +342,7 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | |||
341 | }; | 342 | }; |
342 | 343 | ||
343 | ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, | 344 | ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, |
344 | inode->i_generation, res->l_name); | 345 | generation, res->l_name); |
345 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); | 346 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); |
346 | } | 347 | } |
347 | 348 | ||
@@ -1173,17 +1174,19 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb, | |||
1173 | 1174 | ||
1174 | int ocfs2_create_new_lock(struct ocfs2_super *osb, | 1175 | int ocfs2_create_new_lock(struct ocfs2_super *osb, |
1175 | struct ocfs2_lock_res *lockres, | 1176 | struct ocfs2_lock_res *lockres, |
1176 | int ex) | 1177 | int ex, |
1178 | int local) | ||
1177 | { | 1179 | { |
1178 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 1180 | int level = ex ? LKM_EXMODE : LKM_PRMODE; |
1179 | unsigned long flags; | 1181 | unsigned long flags; |
1182 | int lkm_flags = local ? LKM_LOCAL : 0; | ||
1180 | 1183 | ||
1181 | spin_lock_irqsave(&lockres->l_lock, flags); | 1184 | spin_lock_irqsave(&lockres->l_lock, flags); |
1182 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); | 1185 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); |
1183 | lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); | 1186 | lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); |
1184 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1187 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1185 | 1188 | ||
1186 | return ocfs2_lock_create(osb, lockres, level, LKM_LOCAL); | 1189 | return ocfs2_lock_create(osb, lockres, level, lkm_flags); |
1187 | } | 1190 | } |
1188 | 1191 | ||
1189 | /* Grants us an EX lock on the data and metadata resources, skipping | 1192 | /* Grants us an EX lock on the data and metadata resources, skipping |
@@ -1212,19 +1215,23 @@ int ocfs2_create_new_inode_locks(struct inode *inode) | |||
1212 | * on a resource which has an invalid one -- we'll set it | 1215 | * on a resource which has an invalid one -- we'll set it |
1213 | * valid when we release the EX. */ | 1216 | * valid when we release the EX. */ |
1214 | 1217 | ||
1215 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1); | 1218 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1); |
1216 | if (ret) { | 1219 | if (ret) { |
1217 | mlog_errno(ret); | 1220 | mlog_errno(ret); |
1218 | goto bail; | 1221 | goto bail; |
1219 | } | 1222 | } |
1220 | 1223 | ||
1221 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1); | 1224 | /* |
1225 | * We don't want to use LKM_LOCAL on a meta data lock as they | ||
1226 | * don't use a generation in their lock names. | ||
1227 | */ | ||
1228 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1, 0); | ||
1222 | if (ret) { | 1229 | if (ret) { |
1223 | mlog_errno(ret); | 1230 | mlog_errno(ret); |
1224 | goto bail; | 1231 | goto bail; |
1225 | } | 1232 | } |
1226 | 1233 | ||
1227 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1); | 1234 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1, 1); |
1228 | if (ret) { | 1235 | if (ret) { |
1229 | mlog_errno(ret); | 1236 | mlog_errno(ret); |
1230 | goto bail; | 1237 | goto bail; |
@@ -1413,6 +1420,16 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) | |||
1413 | 1420 | ||
1414 | lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; | 1421 | lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; |
1415 | 1422 | ||
1423 | /* | ||
1424 | * Invalidate the LVB of a deleted inode - this way other | ||
1425 | * nodes are forced to go to disk and discover the new inode | ||
1426 | * status. | ||
1427 | */ | ||
1428 | if (oi->ip_flags & OCFS2_INODE_DELETED) { | ||
1429 | lvb->lvb_version = 0; | ||
1430 | goto out; | ||
1431 | } | ||
1432 | |||
1416 | lvb->lvb_version = OCFS2_LVB_VERSION; | 1433 | lvb->lvb_version = OCFS2_LVB_VERSION; |
1417 | lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); | 1434 | lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); |
1418 | lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); | 1435 | lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); |
@@ -1429,6 +1446,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) | |||
1429 | lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); | 1446 | lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); |
1430 | lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); | 1447 | lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); |
1431 | 1448 | ||
1449 | out: | ||
1432 | mlog_meta_lvb(0, lockres); | 1450 | mlog_meta_lvb(0, lockres); |
1433 | 1451 | ||
1434 | mlog_exit_void(); | 1452 | mlog_exit_void(); |
@@ -1727,6 +1745,18 @@ int ocfs2_meta_lock_full(struct inode *inode, | |||
1727 | wait_event(osb->recovery_event, | 1745 | wait_event(osb->recovery_event, |
1728 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); | 1746 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); |
1729 | 1747 | ||
1748 | /* | ||
1749 | * We only see this flag if we're being called from | ||
1750 | * ocfs2_read_locked_inode(). It means we're locking an inode | ||
1751 | * which hasn't been populated yet, so clear the refresh flag | ||
1752 | * and let the caller handle it. | ||
1753 | */ | ||
1754 | if (inode->i_state & I_NEW) { | ||
1755 | status = 0; | ||
1756 | ocfs2_complete_lock_res_refresh(lockres, 0); | ||
1757 | goto bail; | ||
1758 | } | ||
1759 | |||
1730 | /* This is fun. The caller may want a bh back, or it may | 1760 | /* This is fun. The caller may want a bh back, or it may |
1731 | * not. ocfs2_meta_lock_update definitely wants one in, but | 1761 | * not. ocfs2_meta_lock_update definitely wants one in, but |
1732 | * may or may not read one, depending on what's in the | 1762 | * may or may not read one, depending on what's in the |