aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ufs
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2015-06-17 12:02:56 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2015-07-06 17:39:25 -0400
commit724bb09fdc06d4ff03757b25d6dba9ef1b133e8f (patch)
tree5ef4441867f681e132d820c78948b34c0f0ca1df /fs/ufs
parent4af7b2c080715b9452fdaefb7ada72b4dc79593e (diff)
ufs: don't use lock_ufs() for block pointers tree protection
* stores to block pointers are under per-inode seqlock (meta_lock) and mutex (truncate_mutex) * fetches of block pointers are either under truncate_mutex, or wrapped into seqretry loop on meta_lock * all changes of ->i_size are under truncate_mutex and i_mutex * all changes of ->i_lastfrag are under truncate_mutex It's similar to what ext2 is doing; the main difference is that unlike ext2 we can't rely upon the atomicity of stores into block pointers - on UFS2 they are 64bit. So we can't cut the corner when switching a pointer from NULL to non-NULL as we could in ext2_splice_branch() and need to use meta_lock on all modifications. We use seqlock where ext2 uses rwlock; ext2 could probably also benefit from such change... Another non-trivial difference is that with UFS we *cannot* have reader grab truncate_mutex in case of race - it has to keep retrying. That might be possible to change, but not until we lift tail unpacking several levels up in call chain. After that commit we do *NOT* hold fs-wide serialization on accesses to block pointers anymore. Moreover, lock_ufs() can become a normal mutex now - it's only used on statfs, remount and sync_fs and none of those uses are recursive. As the matter of fact, *now* it can be collapsed with ->s_lock, and be eventually replaced with saner per-cylinder-group spinlocks, but that's a separate story. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/ufs')
-rw-r--r--fs/ufs/balloc.c4
-rw-r--r--fs/ufs/inode.c138
-rw-r--r--fs/ufs/super.c2
-rw-r--r--fs/ufs/truncate.c22
-rw-r--r--fs/ufs/ufs.h2
5 files changed, 121 insertions, 47 deletions
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index a7106eda5024..fb8b54eb77c5 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -417,7 +417,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
417 if (oldcount == 0) { 417 if (oldcount == 0) {
418 result = ufs_alloc_fragments (inode, cgno, goal, count, err); 418 result = ufs_alloc_fragments (inode, cgno, goal, count, err);
419 if (result) { 419 if (result) {
420 write_seqlock(&UFS_I(inode)->meta_lock);
420 ufs_cpu_to_data_ptr(sb, p, result); 421 ufs_cpu_to_data_ptr(sb, p, result);
422 write_sequnlock(&UFS_I(inode)->meta_lock);
421 *err = 0; 423 *err = 0;
422 UFS_I(inode)->i_lastfrag = 424 UFS_I(inode)->i_lastfrag =
423 max(UFS_I(inode)->i_lastfrag, fragment + count); 425 max(UFS_I(inode)->i_lastfrag, fragment + count);
@@ -473,7 +475,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
473 ufs_change_blocknr(inode, fragment - oldcount, oldcount, 475 ufs_change_blocknr(inode, fragment - oldcount, oldcount,
474 uspi->s_sbbase + tmp, 476 uspi->s_sbbase + tmp,
475 uspi->s_sbbase + result, locked_page); 477 uspi->s_sbbase + result, locked_page);
478 write_seqlock(&UFS_I(inode)->meta_lock);
476 ufs_cpu_to_data_ptr(sb, p, result); 479 ufs_cpu_to_data_ptr(sb, p, result);
480 write_sequnlock(&UFS_I(inode)->meta_lock);
477 *err = 0; 481 *err = 0;
478 UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag, 482 UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
479 fragment + count); 483 fragment + count);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index a4fc3adfdc4c..100f93c6b309 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -41,8 +41,6 @@
41#include "swab.h" 41#include "swab.h"
42#include "util.h" 42#include "util.h"
43 43
44static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock);
45
46static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t offsets[4]) 44static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t offsets[4])
47{ 45{
48 struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi; 46 struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi;
@@ -75,12 +73,53 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off
75 return n; 73 return n;
76} 74}
77 75
76typedef struct {
77 void *p;
78 union {
79 __fs32 key32;
80 __fs64 key64;
81 };
82 struct buffer_head *bh;
83} Indirect;
84
85static inline int grow_chain32(struct ufs_inode_info *ufsi,
86 struct buffer_head *bh, __fs32 *v,
87 Indirect *from, Indirect *to)
88{
89 Indirect *p;
90 unsigned seq;
91 to->bh = bh;
92 do {
93 seq = read_seqbegin(&ufsi->meta_lock);
94 to->key32 = *(__fs32 *)(to->p = v);
95 for (p = from; p <= to && p->key32 == *(__fs32 *)p->p; p++)
96 ;
97 } while (read_seqretry(&ufsi->meta_lock, seq));
98 return (p > to);
99}
100
101static inline int grow_chain64(struct ufs_inode_info *ufsi,
102 struct buffer_head *bh, __fs64 *v,
103 Indirect *from, Indirect *to)
104{
105 Indirect *p;
106 unsigned seq;
107 to->bh = bh;
108 do {
109 seq = read_seqbegin(&ufsi->meta_lock);
110 to->key64 = *(__fs64 *)(to->p = v);
111 for (p = from; p <= to && p->key64 == *(__fs64 *)p->p; p++)
112 ;
113 } while (read_seqretry(&ufsi->meta_lock, seq));
114 return (p > to);
115}
116
78/* 117/*
79 * Returns the location of the fragment from 118 * Returns the location of the fragment from
80 * the beginning of the filesystem. 119 * the beginning of the filesystem.
81 */ 120 */
82 121
83static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock) 122static u64 ufs_frag_map(struct inode *inode, sector_t frag)
84{ 123{
85 struct ufs_inode_info *ufsi = UFS_I(inode); 124 struct ufs_inode_info *ufsi = UFS_I(inode);
86 struct super_block *sb = inode->i_sb; 125 struct super_block *sb = inode->i_sb;
@@ -88,12 +127,10 @@ static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock)
88 u64 mask = (u64) uspi->s_apbmask>>uspi->s_fpbshift; 127 u64 mask = (u64) uspi->s_apbmask>>uspi->s_fpbshift;
89 int shift = uspi->s_apbshift-uspi->s_fpbshift; 128 int shift = uspi->s_apbshift-uspi->s_fpbshift;
90 sector_t offsets[4], *p; 129 sector_t offsets[4], *p;
130 Indirect chain[4], *q = chain;
91 int depth = ufs_block_to_path(inode, frag >> uspi->s_fpbshift, offsets); 131 int depth = ufs_block_to_path(inode, frag >> uspi->s_fpbshift, offsets);
92 u64 ret = 0L;
93 __fs32 block;
94 __fs64 u2_block = 0L;
95 unsigned flags = UFS_SB(sb)->s_flags; 132 unsigned flags = UFS_SB(sb)->s_flags;
96 u64 temp = 0L; 133 u64 res = 0;
97 134
98 UFSD(": frag = %llu depth = %d\n", (unsigned long long)frag, depth); 135 UFSD(": frag = %llu depth = %d\n", (unsigned long long)frag, depth);
99 UFSD(": uspi->s_fpbshift = %d ,uspi->s_apbmask = %x, mask=%llx\n", 136 UFSD(": uspi->s_fpbshift = %d ,uspi->s_apbmask = %x, mask=%llx\n",
@@ -101,59 +138,73 @@ static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock)
101 (unsigned long long)mask); 138 (unsigned long long)mask);
102 139
103 if (depth == 0) 140 if (depth == 0)
104 return 0; 141 goto no_block;
105 142
143again:
106 p = offsets; 144 p = offsets;
107 145
108 if (needs_lock)
109 lock_ufs(sb);
110 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) 146 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
111 goto ufs2; 147 goto ufs2;
112 148
113 block = ufsi->i_u1.i_data[*p++]; 149 if (!grow_chain32(ufsi, NULL, &ufsi->i_u1.i_data[*p++], chain, q))
114 if (!block) 150 goto changed;
115 goto out; 151 if (!q->key32)
152 goto no_block;
116 while (--depth) { 153 while (--depth) {
154 __fs32 *ptr;
117 struct buffer_head *bh; 155 struct buffer_head *bh;
118 sector_t n = *p++; 156 sector_t n = *p++;
119 157
120 bh = sb_bread(sb, uspi->s_sbbase + fs32_to_cpu(sb, block)+(n>>shift)); 158 bh = sb_bread(sb, uspi->s_sbbase +
159 fs32_to_cpu(sb, q->key32) + (n>>shift));
121 if (!bh) 160 if (!bh)
122 goto out; 161 goto no_block;
123 block = ((__fs32 *) bh->b_data)[n & mask]; 162 ptr = (__fs32 *)bh->b_data + (n & mask);
124 brelse (bh); 163 if (!grow_chain32(ufsi, bh, ptr, chain, ++q))
125 if (!block) 164 goto changed;
126 goto out; 165 if (!q->key32)
166 goto no_block;
127 } 167 }
128 ret = (u64) (uspi->s_sbbase + fs32_to_cpu(sb, block) + (frag & uspi->s_fpbmask)); 168 res = fs32_to_cpu(sb, q->key32);
129 goto out; 169 goto found;
130ufs2:
131 u2_block = ufsi->i_u1.u2_i_data[*p++];
132 if (!u2_block)
133 goto out;
134 170
171ufs2:
172 if (!grow_chain64(ufsi, NULL, &ufsi->i_u1.u2_i_data[*p++], chain, q))
173 goto changed;
174 if (!q->key64)
175 goto no_block;
135 176
136 while (--depth) { 177 while (--depth) {
178 __fs64 *ptr;
137 struct buffer_head *bh; 179 struct buffer_head *bh;
138 sector_t n = *p++; 180 sector_t n = *p++;
139 181
140 182 bh = sb_bread(sb, uspi->s_sbbase +
141 temp = (u64)(uspi->s_sbbase) + fs64_to_cpu(sb, u2_block); 183 fs64_to_cpu(sb, q->key64) + (n>>shift));
142 bh = sb_bread(sb, temp +(u64) (n>>shift));
143 if (!bh) 184 if (!bh)
144 goto out; 185 goto no_block;
145 u2_block = ((__fs64 *)bh->b_data)[n & mask]; 186 ptr = (__fs64 *)bh->b_data + (n & mask);
146 brelse(bh); 187 if (!grow_chain64(ufsi, bh, ptr, chain, ++q))
147 if (!u2_block) 188 goto changed;
148 goto out; 189 if (!q->key64)
190 goto no_block;
191 }
192 res = fs64_to_cpu(sb, q->key64);
193found:
194 res += uspi->s_sbbase + (frag & uspi->s_fpbmask);
195no_block:
196 while (q > chain) {
197 brelse(q->bh);
198 q--;
149 } 199 }
150 temp = (u64)uspi->s_sbbase + fs64_to_cpu(sb, u2_block); 200 return res;
151 ret = temp + (u64) (frag & uspi->s_fpbmask);
152 201
153out: 202changed:
154 if (needs_lock) 203 while (q > chain) {
155 unlock_ufs(sb); 204 brelse(q->bh);
156 return ret; 205 q--;
206 }
207 goto again;
157} 208}
158 209
159/** 210/**
@@ -421,10 +472,9 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
421 int ret, err, new; 472 int ret, err, new;
422 unsigned long ptr,phys; 473 unsigned long ptr,phys;
423 u64 phys64 = 0; 474 u64 phys64 = 0;
424 bool needs_lock = (sbi->mutex_owner != current);
425 475
426 if (!create) { 476 if (!create) {
427 phys64 = ufs_frag_map(inode, fragment, needs_lock); 477 phys64 = ufs_frag_map(inode, fragment);
428 UFSD("phys64 = %llu\n", (unsigned long long)phys64); 478 UFSD("phys64 = %llu\n", (unsigned long long)phys64);
429 if (phys64) 479 if (phys64)
430 map_bh(bh_result, sb, phys64); 480 map_bh(bh_result, sb, phys64);
@@ -438,8 +488,7 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
438 ret = 0; 488 ret = 0;
439 bh = NULL; 489 bh = NULL;
440 490
441 if (needs_lock) 491 mutex_lock(&UFS_I(inode)->truncate_mutex);
442 lock_ufs(sb);
443 492
444 UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment); 493 UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment);
445 if (fragment > 494 if (fragment >
@@ -501,8 +550,7 @@ out:
501 set_buffer_new(bh_result); 550 set_buffer_new(bh_result);
502 map_bh(bh_result, sb, phys); 551 map_bh(bh_result, sb, phys);
503abort: 552abort:
504 if (needs_lock) 553 mutex_unlock(&UFS_I(inode)->truncate_mutex);
505 unlock_ufs(sb);
506 554
507 return err; 555 return err;
508 556
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 250579a80d90..15cd3338340c 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1429,6 +1429,8 @@ static struct inode *ufs_alloc_inode(struct super_block *sb)
1429 return NULL; 1429 return NULL;
1430 1430
1431 ei->vfs_inode.i_version = 1; 1431 ei->vfs_inode.i_version = 1;
1432 seqlock_init(&ei->meta_lock);
1433 mutex_init(&ei->truncate_mutex);
1432 return &ei->vfs_inode; 1434 return &ei->vfs_inode;
1433} 1435}
1434 1436
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 9908a6045d7a..ad34b7f4b499 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -128,7 +128,9 @@ next1:
128 tmp = ufs_data_ptr_to_cpu(sb, p); 128 tmp = ufs_data_ptr_to_cpu(sb, p);
129 if (!tmp) 129 if (!tmp)
130 continue; 130 continue;
131 write_seqlock(&ufsi->meta_lock);
131 ufs_data_ptr_clear(uspi, p); 132 ufs_data_ptr_clear(uspi, p);
133 write_sequnlock(&ufsi->meta_lock);
132 134
133 if (free_count == 0) { 135 if (free_count == 0) {
134 frag_to_free = tmp; 136 frag_to_free = tmp;
@@ -157,7 +159,9 @@ next1:
157 if (!tmp ) 159 if (!tmp )
158 ufs_panic(sb, "ufs_truncate_direct", "internal error"); 160 ufs_panic(sb, "ufs_truncate_direct", "internal error");
159 frag4 = ufs_fragnum (frag4); 161 frag4 = ufs_fragnum (frag4);
162 write_seqlock(&ufsi->meta_lock);
160 ufs_data_ptr_clear(uspi, p); 163 ufs_data_ptr_clear(uspi, p);
164 write_sequnlock(&ufsi->meta_lock);
161 165
162 ufs_free_fragments (inode, tmp, frag4); 166 ufs_free_fragments (inode, tmp, frag4);
163 mark_inode_dirty(inode); 167 mark_inode_dirty(inode);
@@ -199,7 +203,9 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
199 return 1; 203 return 1;
200 } 204 }
201 if (!ind_ubh) { 205 if (!ind_ubh) {
206 write_seqlock(&UFS_I(inode)->meta_lock);
202 ufs_data_ptr_clear(uspi, p); 207 ufs_data_ptr_clear(uspi, p);
208 write_sequnlock(&UFS_I(inode)->meta_lock);
203 return 0; 209 return 0;
204 } 210 }
205 211
@@ -210,7 +216,9 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
210 if (!tmp) 216 if (!tmp)
211 continue; 217 continue;
212 218
219 write_seqlock(&UFS_I(inode)->meta_lock);
213 ufs_data_ptr_clear(uspi, ind); 220 ufs_data_ptr_clear(uspi, ind);
221 write_sequnlock(&UFS_I(inode)->meta_lock);
214 ubh_mark_buffer_dirty(ind_ubh); 222 ubh_mark_buffer_dirty(ind_ubh);
215 if (free_count == 0) { 223 if (free_count == 0) {
216 frag_to_free = tmp; 224 frag_to_free = tmp;
@@ -235,7 +243,9 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
235 break; 243 break;
236 if (i >= uspi->s_apb) { 244 if (i >= uspi->s_apb) {
237 tmp = ufs_data_ptr_to_cpu(sb, p); 245 tmp = ufs_data_ptr_to_cpu(sb, p);
246 write_seqlock(&UFS_I(inode)->meta_lock);
238 ufs_data_ptr_clear(uspi, p); 247 ufs_data_ptr_clear(uspi, p);
248 write_sequnlock(&UFS_I(inode)->meta_lock);
239 249
240 ubh_bforget(ind_ubh); 250 ubh_bforget(ind_ubh);
241 ufs_free_blocks (inode, tmp, uspi->s_fpb); 251 ufs_free_blocks (inode, tmp, uspi->s_fpb);
@@ -278,7 +288,9 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
278 return 1; 288 return 1;
279 } 289 }
280 if (!dind_bh) { 290 if (!dind_bh) {
291 write_seqlock(&UFS_I(inode)->meta_lock);
281 ufs_data_ptr_clear(uspi, p); 292 ufs_data_ptr_clear(uspi, p);
293 write_sequnlock(&UFS_I(inode)->meta_lock);
282 return 0; 294 return 0;
283 } 295 }
284 296
@@ -297,7 +309,9 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
297 break; 309 break;
298 if (i >= uspi->s_apb) { 310 if (i >= uspi->s_apb) {
299 tmp = ufs_data_ptr_to_cpu(sb, p); 311 tmp = ufs_data_ptr_to_cpu(sb, p);
312 write_seqlock(&UFS_I(inode)->meta_lock);
300 ufs_data_ptr_clear(uspi, p); 313 ufs_data_ptr_clear(uspi, p);
314 write_sequnlock(&UFS_I(inode)->meta_lock);
301 315
302 ubh_bforget(dind_bh); 316 ubh_bforget(dind_bh);
303 ufs_free_blocks(inode, tmp, uspi->s_fpb); 317 ufs_free_blocks(inode, tmp, uspi->s_fpb);
@@ -339,7 +353,9 @@ static int ufs_trunc_tindirect(struct inode *inode)
339 return 1; 353 return 1;
340 } 354 }
341 if (!tind_bh) { 355 if (!tind_bh) {
356 write_seqlock(&ufsi->meta_lock);
342 ufs_data_ptr_clear(uspi, p); 357 ufs_data_ptr_clear(uspi, p);
358 write_sequnlock(&ufsi->meta_lock);
343 return 0; 359 return 0;
344 } 360 }
345 361
@@ -355,7 +371,9 @@ static int ufs_trunc_tindirect(struct inode *inode)
355 break; 371 break;
356 if (i >= uspi->s_apb) { 372 if (i >= uspi->s_apb) {
357 tmp = ufs_data_ptr_to_cpu(sb, p); 373 tmp = ufs_data_ptr_to_cpu(sb, p);
374 write_seqlock(&ufsi->meta_lock);
358 ufs_data_ptr_clear(uspi, p); 375 ufs_data_ptr_clear(uspi, p);
376 write_sequnlock(&ufsi->meta_lock);
359 377
360 ubh_bforget(tind_bh); 378 ubh_bforget(tind_bh);
361 ufs_free_blocks(inode, tmp, uspi->s_fpb); 379 ufs_free_blocks(inode, tmp, uspi->s_fpb);
@@ -447,7 +465,7 @@ static void __ufs_truncate_blocks(struct inode *inode)
447 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; 465 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
448 int retry; 466 int retry;
449 467
450 lock_ufs(sb); 468 mutex_lock(&ufsi->truncate_mutex);
451 while (1) { 469 while (1) {
452 retry = ufs_trunc_direct(inode); 470 retry = ufs_trunc_direct(inode);
453 retry |= ufs_trunc_indirect(inode, UFS_IND_BLOCK, 471 retry |= ufs_trunc_indirect(inode, UFS_IND_BLOCK,
@@ -465,7 +483,7 @@ static void __ufs_truncate_blocks(struct inode *inode)
465 } 483 }
466 484
467 ufsi->i_lastfrag = DIRECT_FRAGMENT; 485 ufsi->i_lastfrag = DIRECT_FRAGMENT;
468 unlock_ufs(sb); 486 mutex_unlock(&ufsi->truncate_mutex);
469} 487}
470 488
471int ufs_truncate(struct inode *inode, loff_t size) 489int ufs_truncate(struct inode *inode, loff_t size)
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 43fcab381de1..ea28b73a8b74 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -46,6 +46,8 @@ struct ufs_inode_info {
46 __u32 i_oeftflag; 46 __u32 i_oeftflag;
47 __u16 i_osync; 47 __u16 i_osync;
48 __u64 i_lastfrag; 48 __u64 i_lastfrag;
49 seqlock_t meta_lock;
50 struct mutex truncate_mutex;
49 __u32 i_dir_start_lookup; 51 __u32 i_dir_start_lookup;
50 struct inode vfs_inode; 52 struct inode vfs_inode;
51}; 53};