diff options
Diffstat (limited to 'fs')
174 files changed, 7376 insertions, 6140 deletions
diff --git a/fs/Makefile b/fs/Makefile index 4fe6df3ec28f..39a824f44e7c 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -53,7 +53,7 @@ obj-$(CONFIG_FHANDLE) += fhandle.o | |||
53 | obj-y += quota/ | 53 | obj-y += quota/ |
54 | 54 | ||
55 | obj-$(CONFIG_PROC_FS) += proc/ | 55 | obj-$(CONFIG_PROC_FS) += proc/ |
56 | obj-$(CONFIG_SYSFS) += sysfs/ | 56 | obj-$(CONFIG_SYSFS) += sysfs/ kernfs/ |
57 | obj-$(CONFIG_CONFIGFS_FS) += configfs/ | 57 | obj-$(CONFIG_CONFIGFS_FS) += configfs/ |
58 | obj-y += devpts/ | 58 | obj-y += devpts/ |
59 | 59 | ||
@@ -244,9 +244,14 @@ static void aio_free_ring(struct kioctx *ctx) | |||
244 | int i; | 244 | int i; |
245 | 245 | ||
246 | for (i = 0; i < ctx->nr_pages; i++) { | 246 | for (i = 0; i < ctx->nr_pages; i++) { |
247 | struct page *page; | ||
247 | pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i, | 248 | pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i, |
248 | page_count(ctx->ring_pages[i])); | 249 | page_count(ctx->ring_pages[i])); |
249 | put_page(ctx->ring_pages[i]); | 250 | page = ctx->ring_pages[i]; |
251 | if (!page) | ||
252 | continue; | ||
253 | ctx->ring_pages[i] = NULL; | ||
254 | put_page(page); | ||
250 | } | 255 | } |
251 | 256 | ||
252 | put_aio_ring_file(ctx); | 257 | put_aio_ring_file(ctx); |
@@ -280,18 +285,38 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, | |||
280 | unsigned long flags; | 285 | unsigned long flags; |
281 | int rc; | 286 | int rc; |
282 | 287 | ||
288 | rc = 0; | ||
289 | |||
290 | /* Make sure the old page hasn't already been changed */ | ||
291 | spin_lock(&mapping->private_lock); | ||
292 | ctx = mapping->private_data; | ||
293 | if (ctx) { | ||
294 | pgoff_t idx; | ||
295 | spin_lock_irqsave(&ctx->completion_lock, flags); | ||
296 | idx = old->index; | ||
297 | if (idx < (pgoff_t)ctx->nr_pages) { | ||
298 | if (ctx->ring_pages[idx] != old) | ||
299 | rc = -EAGAIN; | ||
300 | } else | ||
301 | rc = -EINVAL; | ||
302 | spin_unlock_irqrestore(&ctx->completion_lock, flags); | ||
303 | } else | ||
304 | rc = -EINVAL; | ||
305 | spin_unlock(&mapping->private_lock); | ||
306 | |||
307 | if (rc != 0) | ||
308 | return rc; | ||
309 | |||
283 | /* Writeback must be complete */ | 310 | /* Writeback must be complete */ |
284 | BUG_ON(PageWriteback(old)); | 311 | BUG_ON(PageWriteback(old)); |
285 | put_page(old); | 312 | get_page(new); |
286 | 313 | ||
287 | rc = migrate_page_move_mapping(mapping, new, old, NULL, mode); | 314 | rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1); |
288 | if (rc != MIGRATEPAGE_SUCCESS) { | 315 | if (rc != MIGRATEPAGE_SUCCESS) { |
289 | get_page(old); | 316 | put_page(new); |
290 | return rc; | 317 | return rc; |
291 | } | 318 | } |
292 | 319 | ||
293 | get_page(new); | ||
294 | |||
295 | /* We can potentially race against kioctx teardown here. Use the | 320 | /* We can potentially race against kioctx teardown here. Use the |
296 | * address_space's private data lock to protect the mapping's | 321 | * address_space's private data lock to protect the mapping's |
297 | * private_data. | 322 | * private_data. |
@@ -303,13 +328,24 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, | |||
303 | spin_lock_irqsave(&ctx->completion_lock, flags); | 328 | spin_lock_irqsave(&ctx->completion_lock, flags); |
304 | migrate_page_copy(new, old); | 329 | migrate_page_copy(new, old); |
305 | idx = old->index; | 330 | idx = old->index; |
306 | if (idx < (pgoff_t)ctx->nr_pages) | 331 | if (idx < (pgoff_t)ctx->nr_pages) { |
307 | ctx->ring_pages[idx] = new; | 332 | /* And only do the move if things haven't changed */ |
333 | if (ctx->ring_pages[idx] == old) | ||
334 | ctx->ring_pages[idx] = new; | ||
335 | else | ||
336 | rc = -EAGAIN; | ||
337 | } else | ||
338 | rc = -EINVAL; | ||
308 | spin_unlock_irqrestore(&ctx->completion_lock, flags); | 339 | spin_unlock_irqrestore(&ctx->completion_lock, flags); |
309 | } else | 340 | } else |
310 | rc = -EBUSY; | 341 | rc = -EBUSY; |
311 | spin_unlock(&mapping->private_lock); | 342 | spin_unlock(&mapping->private_lock); |
312 | 343 | ||
344 | if (rc == MIGRATEPAGE_SUCCESS) | ||
345 | put_page(old); | ||
346 | else | ||
347 | put_page(new); | ||
348 | |||
313 | return rc; | 349 | return rc; |
314 | } | 350 | } |
315 | #endif | 351 | #endif |
@@ -326,7 +362,7 @@ static int aio_setup_ring(struct kioctx *ctx) | |||
326 | struct aio_ring *ring; | 362 | struct aio_ring *ring; |
327 | unsigned nr_events = ctx->max_reqs; | 363 | unsigned nr_events = ctx->max_reqs; |
328 | struct mm_struct *mm = current->mm; | 364 | struct mm_struct *mm = current->mm; |
329 | unsigned long size, populate; | 365 | unsigned long size, unused; |
330 | int nr_pages; | 366 | int nr_pages; |
331 | int i; | 367 | int i; |
332 | struct file *file; | 368 | struct file *file; |
@@ -347,6 +383,20 @@ static int aio_setup_ring(struct kioctx *ctx) | |||
347 | return -EAGAIN; | 383 | return -EAGAIN; |
348 | } | 384 | } |
349 | 385 | ||
386 | ctx->aio_ring_file = file; | ||
387 | nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) | ||
388 | / sizeof(struct io_event); | ||
389 | |||
390 | ctx->ring_pages = ctx->internal_pages; | ||
391 | if (nr_pages > AIO_RING_PAGES) { | ||
392 | ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *), | ||
393 | GFP_KERNEL); | ||
394 | if (!ctx->ring_pages) { | ||
395 | put_aio_ring_file(ctx); | ||
396 | return -ENOMEM; | ||
397 | } | ||
398 | } | ||
399 | |||
350 | for (i = 0; i < nr_pages; i++) { | 400 | for (i = 0; i < nr_pages; i++) { |
351 | struct page *page; | 401 | struct page *page; |
352 | page = find_or_create_page(file->f_inode->i_mapping, | 402 | page = find_or_create_page(file->f_inode->i_mapping, |
@@ -358,19 +408,14 @@ static int aio_setup_ring(struct kioctx *ctx) | |||
358 | SetPageUptodate(page); | 408 | SetPageUptodate(page); |
359 | SetPageDirty(page); | 409 | SetPageDirty(page); |
360 | unlock_page(page); | 410 | unlock_page(page); |
411 | |||
412 | ctx->ring_pages[i] = page; | ||
361 | } | 413 | } |
362 | ctx->aio_ring_file = file; | 414 | ctx->nr_pages = i; |
363 | nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) | ||
364 | / sizeof(struct io_event); | ||
365 | 415 | ||
366 | ctx->ring_pages = ctx->internal_pages; | 416 | if (unlikely(i != nr_pages)) { |
367 | if (nr_pages > AIO_RING_PAGES) { | 417 | aio_free_ring(ctx); |
368 | ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *), | 418 | return -EAGAIN; |
369 | GFP_KERNEL); | ||
370 | if (!ctx->ring_pages) { | ||
371 | put_aio_ring_file(ctx); | ||
372 | return -ENOMEM; | ||
373 | } | ||
374 | } | 419 | } |
375 | 420 | ||
376 | ctx->mmap_size = nr_pages * PAGE_SIZE; | 421 | ctx->mmap_size = nr_pages * PAGE_SIZE; |
@@ -379,9 +424,9 @@ static int aio_setup_ring(struct kioctx *ctx) | |||
379 | down_write(&mm->mmap_sem); | 424 | down_write(&mm->mmap_sem); |
380 | ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size, | 425 | ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size, |
381 | PROT_READ | PROT_WRITE, | 426 | PROT_READ | PROT_WRITE, |
382 | MAP_SHARED | MAP_POPULATE, 0, &populate); | 427 | MAP_SHARED, 0, &unused); |
428 | up_write(&mm->mmap_sem); | ||
383 | if (IS_ERR((void *)ctx->mmap_base)) { | 429 | if (IS_ERR((void *)ctx->mmap_base)) { |
384 | up_write(&mm->mmap_sem); | ||
385 | ctx->mmap_size = 0; | 430 | ctx->mmap_size = 0; |
386 | aio_free_ring(ctx); | 431 | aio_free_ring(ctx); |
387 | return -EAGAIN; | 432 | return -EAGAIN; |
@@ -389,27 +434,6 @@ static int aio_setup_ring(struct kioctx *ctx) | |||
389 | 434 | ||
390 | pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base); | 435 | pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base); |
391 | 436 | ||
392 | /* We must do this while still holding mmap_sem for write, as we | ||
393 | * need to be protected against userspace attempting to mremap() | ||
394 | * or munmap() the ring buffer. | ||
395 | */ | ||
396 | ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages, | ||
397 | 1, 0, ctx->ring_pages, NULL); | ||
398 | |||
399 | /* Dropping the reference here is safe as the page cache will hold | ||
400 | * onto the pages for us. It is also required so that page migration | ||
401 | * can unmap the pages and get the right reference count. | ||
402 | */ | ||
403 | for (i = 0; i < ctx->nr_pages; i++) | ||
404 | put_page(ctx->ring_pages[i]); | ||
405 | |||
406 | up_write(&mm->mmap_sem); | ||
407 | |||
408 | if (unlikely(ctx->nr_pages != nr_pages)) { | ||
409 | aio_free_ring(ctx); | ||
410 | return -EAGAIN; | ||
411 | } | ||
412 | |||
413 | ctx->user_id = ctx->mmap_base; | 437 | ctx->user_id = ctx->mmap_base; |
414 | ctx->nr_events = nr_events; /* trusted copy */ | 438 | ctx->nr_events = nr_events; /* trusted copy */ |
415 | 439 | ||
@@ -652,7 +676,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
652 | aio_nr += ctx->max_reqs; | 676 | aio_nr += ctx->max_reqs; |
653 | spin_unlock(&aio_nr_lock); | 677 | spin_unlock(&aio_nr_lock); |
654 | 678 | ||
655 | percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */ | 679 | percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */ |
680 | percpu_ref_get(&ctx->reqs); /* free_ioctx_users() will drop this */ | ||
656 | 681 | ||
657 | err = ioctx_add_table(ctx, mm); | 682 | err = ioctx_add_table(ctx, mm); |
658 | if (err) | 683 | if (err) |
@@ -202,11 +202,6 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de | |||
202 | return -EPERM; | 202 | return -EPERM; |
203 | } | 203 | } |
204 | 204 | ||
205 | if ((ia_valid & ATTR_SIZE) && IS_I_VERSION(inode)) { | ||
206 | if (attr->ia_size != inode->i_size) | ||
207 | inode_inc_iversion(inode); | ||
208 | } | ||
209 | |||
210 | if ((ia_valid & ATTR_MODE)) { | 205 | if ((ia_valid & ATTR_MODE)) { |
211 | umode_t amode = attr->ia_mode; | 206 | umode_t amode = attr->ia_mode; |
212 | /* Flag setting protected by i_mutex */ | 207 | /* Flag setting protected by i_mutex */ |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f1a77449d032..471a4f7f4044 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -4354,8 +4354,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) | |||
4354 | * these flags set. For all other operations the VFS set these flags | 4354 | * these flags set. For all other operations the VFS set these flags |
4355 | * explicitly if it wants a timestamp update. | 4355 | * explicitly if it wants a timestamp update. |
4356 | */ | 4356 | */ |
4357 | if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) | 4357 | if (newsize != oldsize) { |
4358 | inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb); | 4358 | inode_inc_iversion(inode); |
4359 | if (!(mask & (ATTR_CTIME | ATTR_MTIME))) | ||
4360 | inode->i_ctime = inode->i_mtime = | ||
4361 | current_fs_time(inode->i_sb); | ||
4362 | } | ||
4359 | 4363 | ||
4360 | if (newsize > oldsize) { | 4364 | if (newsize > oldsize) { |
4361 | truncate_pagecache(inode, newsize); | 4365 | truncate_pagecache(inode, newsize); |
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index 6fc82010dc15..c8d9ddf84c69 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c | |||
@@ -101,7 +101,7 @@ static int test_extents(struct btrfs_block_group_cache *cache) | |||
101 | 101 | ||
102 | ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096); | 102 | ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096); |
103 | if (ret) { | 103 | if (ret) { |
104 | test_msg("Error removing middle peice %d\n", ret); | 104 | test_msg("Error removing middle piece %d\n", ret); |
105 | return ret; | 105 | return ret; |
106 | } | 106 | } |
107 | 107 | ||
@@ -266,7 +266,7 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) | |||
266 | } | 266 | } |
267 | 267 | ||
268 | if (test_check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) { | 268 | if (test_check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) { |
269 | test_msg("Left over peices after removing overlapping\n"); | 269 | test_msg("Left over pieces after removing overlapping\n"); |
270 | return -1; | 270 | return -1; |
271 | } | 271 | } |
272 | 272 | ||
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 1e561c059539..ec3ba43b9faa 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -210,9 +210,13 @@ static int readpage_nounlock(struct file *filp, struct page *page) | |||
210 | if (err < 0) { | 210 | if (err < 0) { |
211 | SetPageError(page); | 211 | SetPageError(page); |
212 | goto out; | 212 | goto out; |
213 | } else if (err < PAGE_CACHE_SIZE) { | 213 | } else { |
214 | if (err < PAGE_CACHE_SIZE) { | ||
214 | /* zero fill remainder of page */ | 215 | /* zero fill remainder of page */ |
215 | zero_user_segment(page, err, PAGE_CACHE_SIZE); | 216 | zero_user_segment(page, err, PAGE_CACHE_SIZE); |
217 | } else { | ||
218 | flush_dcache_page(page); | ||
219 | } | ||
216 | } | 220 | } |
217 | SetPageUptodate(page); | 221 | SetPageUptodate(page); |
218 | 222 | ||
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 9a8e396aed89..278fd2891288 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -978,7 +978,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
978 | struct ceph_mds_reply_inode *ininfo; | 978 | struct ceph_mds_reply_inode *ininfo; |
979 | struct ceph_vino vino; | 979 | struct ceph_vino vino; |
980 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); | 980 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
981 | int i = 0; | ||
982 | int err = 0; | 981 | int err = 0; |
983 | 982 | ||
984 | dout("fill_trace %p is_dentry %d is_target %d\n", req, | 983 | dout("fill_trace %p is_dentry %d is_target %d\n", req, |
@@ -1039,6 +1038,29 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1039 | } | 1038 | } |
1040 | } | 1039 | } |
1041 | 1040 | ||
1041 | if (rinfo->head->is_target) { | ||
1042 | vino.ino = le64_to_cpu(rinfo->targeti.in->ino); | ||
1043 | vino.snap = le64_to_cpu(rinfo->targeti.in->snapid); | ||
1044 | |||
1045 | in = ceph_get_inode(sb, vino); | ||
1046 | if (IS_ERR(in)) { | ||
1047 | err = PTR_ERR(in); | ||
1048 | goto done; | ||
1049 | } | ||
1050 | req->r_target_inode = in; | ||
1051 | |||
1052 | err = fill_inode(in, &rinfo->targeti, NULL, | ||
1053 | session, req->r_request_started, | ||
1054 | (le32_to_cpu(rinfo->head->result) == 0) ? | ||
1055 | req->r_fmode : -1, | ||
1056 | &req->r_caps_reservation); | ||
1057 | if (err < 0) { | ||
1058 | pr_err("fill_inode badness %p %llx.%llx\n", | ||
1059 | in, ceph_vinop(in)); | ||
1060 | goto done; | ||
1061 | } | ||
1062 | } | ||
1063 | |||
1042 | /* | 1064 | /* |
1043 | * ignore null lease/binding on snapdir ENOENT, or else we | 1065 | * ignore null lease/binding on snapdir ENOENT, or else we |
1044 | * will have trouble splicing in the virtual snapdir later | 1066 | * will have trouble splicing in the virtual snapdir later |
@@ -1108,7 +1130,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1108 | ceph_dentry(req->r_old_dentry)->offset); | 1130 | ceph_dentry(req->r_old_dentry)->offset); |
1109 | 1131 | ||
1110 | dn = req->r_old_dentry; /* use old_dentry */ | 1132 | dn = req->r_old_dentry; /* use old_dentry */ |
1111 | in = dn->d_inode; | ||
1112 | } | 1133 | } |
1113 | 1134 | ||
1114 | /* null dentry? */ | 1135 | /* null dentry? */ |
@@ -1130,44 +1151,28 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1130 | } | 1151 | } |
1131 | 1152 | ||
1132 | /* attach proper inode */ | 1153 | /* attach proper inode */ |
1133 | ininfo = rinfo->targeti.in; | 1154 | if (!dn->d_inode) { |
1134 | vino.ino = le64_to_cpu(ininfo->ino); | 1155 | ihold(in); |
1135 | vino.snap = le64_to_cpu(ininfo->snapid); | ||
1136 | in = dn->d_inode; | ||
1137 | if (!in) { | ||
1138 | in = ceph_get_inode(sb, vino); | ||
1139 | if (IS_ERR(in)) { | ||
1140 | pr_err("fill_trace bad get_inode " | ||
1141 | "%llx.%llx\n", vino.ino, vino.snap); | ||
1142 | err = PTR_ERR(in); | ||
1143 | d_drop(dn); | ||
1144 | goto done; | ||
1145 | } | ||
1146 | dn = splice_dentry(dn, in, &have_lease, true); | 1156 | dn = splice_dentry(dn, in, &have_lease, true); |
1147 | if (IS_ERR(dn)) { | 1157 | if (IS_ERR(dn)) { |
1148 | err = PTR_ERR(dn); | 1158 | err = PTR_ERR(dn); |
1149 | goto done; | 1159 | goto done; |
1150 | } | 1160 | } |
1151 | req->r_dentry = dn; /* may have spliced */ | 1161 | req->r_dentry = dn; /* may have spliced */ |
1152 | ihold(in); | 1162 | } else if (dn->d_inode && dn->d_inode != in) { |
1153 | } else if (ceph_ino(in) == vino.ino && | ||
1154 | ceph_snap(in) == vino.snap) { | ||
1155 | ihold(in); | ||
1156 | } else { | ||
1157 | dout(" %p links to %p %llx.%llx, not %llx.%llx\n", | 1163 | dout(" %p links to %p %llx.%llx, not %llx.%llx\n", |
1158 | dn, in, ceph_ino(in), ceph_snap(in), | 1164 | dn, dn->d_inode, ceph_vinop(dn->d_inode), |
1159 | vino.ino, vino.snap); | 1165 | ceph_vinop(in)); |
1160 | have_lease = false; | 1166 | have_lease = false; |
1161 | in = NULL; | ||
1162 | } | 1167 | } |
1163 | 1168 | ||
1164 | if (have_lease) | 1169 | if (have_lease) |
1165 | update_dentry_lease(dn, rinfo->dlease, session, | 1170 | update_dentry_lease(dn, rinfo->dlease, session, |
1166 | req->r_request_started); | 1171 | req->r_request_started); |
1167 | dout(" final dn %p\n", dn); | 1172 | dout(" final dn %p\n", dn); |
1168 | i++; | 1173 | } else if (!req->r_aborted && |
1169 | } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP || | 1174 | (req->r_op == CEPH_MDS_OP_LOOKUPSNAP || |
1170 | req->r_op == CEPH_MDS_OP_MKSNAP) && !req->r_aborted) { | 1175 | req->r_op == CEPH_MDS_OP_MKSNAP)) { |
1171 | struct dentry *dn = req->r_dentry; | 1176 | struct dentry *dn = req->r_dentry; |
1172 | 1177 | ||
1173 | /* fill out a snapdir LOOKUPSNAP dentry */ | 1178 | /* fill out a snapdir LOOKUPSNAP dentry */ |
@@ -1177,52 +1182,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1177 | ininfo = rinfo->targeti.in; | 1182 | ininfo = rinfo->targeti.in; |
1178 | vino.ino = le64_to_cpu(ininfo->ino); | 1183 | vino.ino = le64_to_cpu(ininfo->ino); |
1179 | vino.snap = le64_to_cpu(ininfo->snapid); | 1184 | vino.snap = le64_to_cpu(ininfo->snapid); |
1180 | in = ceph_get_inode(sb, vino); | ||
1181 | if (IS_ERR(in)) { | ||
1182 | pr_err("fill_inode get_inode badness %llx.%llx\n", | ||
1183 | vino.ino, vino.snap); | ||
1184 | err = PTR_ERR(in); | ||
1185 | d_delete(dn); | ||
1186 | goto done; | ||
1187 | } | ||
1188 | dout(" linking snapped dir %p to dn %p\n", in, dn); | 1185 | dout(" linking snapped dir %p to dn %p\n", in, dn); |
1186 | ihold(in); | ||
1189 | dn = splice_dentry(dn, in, NULL, true); | 1187 | dn = splice_dentry(dn, in, NULL, true); |
1190 | if (IS_ERR(dn)) { | 1188 | if (IS_ERR(dn)) { |
1191 | err = PTR_ERR(dn); | 1189 | err = PTR_ERR(dn); |
1192 | goto done; | 1190 | goto done; |
1193 | } | 1191 | } |
1194 | req->r_dentry = dn; /* may have spliced */ | 1192 | req->r_dentry = dn; /* may have spliced */ |
1195 | ihold(in); | ||
1196 | rinfo->head->is_dentry = 1; /* fool notrace handlers */ | ||
1197 | } | ||
1198 | |||
1199 | if (rinfo->head->is_target) { | ||
1200 | vino.ino = le64_to_cpu(rinfo->targeti.in->ino); | ||
1201 | vino.snap = le64_to_cpu(rinfo->targeti.in->snapid); | ||
1202 | |||
1203 | if (in == NULL || ceph_ino(in) != vino.ino || | ||
1204 | ceph_snap(in) != vino.snap) { | ||
1205 | in = ceph_get_inode(sb, vino); | ||
1206 | if (IS_ERR(in)) { | ||
1207 | err = PTR_ERR(in); | ||
1208 | goto done; | ||
1209 | } | ||
1210 | } | ||
1211 | req->r_target_inode = in; | ||
1212 | |||
1213 | err = fill_inode(in, | ||
1214 | &rinfo->targeti, NULL, | ||
1215 | session, req->r_request_started, | ||
1216 | (le32_to_cpu(rinfo->head->result) == 0) ? | ||
1217 | req->r_fmode : -1, | ||
1218 | &req->r_caps_reservation); | ||
1219 | if (err < 0) { | ||
1220 | pr_err("fill_inode badness %p %llx.%llx\n", | ||
1221 | in, ceph_vinop(in)); | ||
1222 | goto done; | ||
1223 | } | ||
1224 | } | 1193 | } |
1225 | |||
1226 | done: | 1194 | done: |
1227 | dout("fill_trace done err=%d\n", err); | 1195 | dout("fill_trace done err=%d\n", err); |
1228 | return err; | 1196 | return err; |
@@ -1272,7 +1240,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, | |||
1272 | struct qstr dname; | 1240 | struct qstr dname; |
1273 | struct dentry *dn; | 1241 | struct dentry *dn; |
1274 | struct inode *in; | 1242 | struct inode *in; |
1275 | int err = 0, i; | 1243 | int err = 0, ret, i; |
1276 | struct inode *snapdir = NULL; | 1244 | struct inode *snapdir = NULL; |
1277 | struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; | 1245 | struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; |
1278 | struct ceph_dentry_info *di; | 1246 | struct ceph_dentry_info *di; |
@@ -1305,6 +1273,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, | |||
1305 | ceph_fill_dirfrag(parent->d_inode, rinfo->dir_dir); | 1273 | ceph_fill_dirfrag(parent->d_inode, rinfo->dir_dir); |
1306 | } | 1274 | } |
1307 | 1275 | ||
1276 | /* FIXME: release caps/leases if error occurs */ | ||
1308 | for (i = 0; i < rinfo->dir_nr; i++) { | 1277 | for (i = 0; i < rinfo->dir_nr; i++) { |
1309 | struct ceph_vino vino; | 1278 | struct ceph_vino vino; |
1310 | 1279 | ||
@@ -1329,9 +1298,10 @@ retry_lookup: | |||
1329 | err = -ENOMEM; | 1298 | err = -ENOMEM; |
1330 | goto out; | 1299 | goto out; |
1331 | } | 1300 | } |
1332 | err = ceph_init_dentry(dn); | 1301 | ret = ceph_init_dentry(dn); |
1333 | if (err < 0) { | 1302 | if (ret < 0) { |
1334 | dput(dn); | 1303 | dput(dn); |
1304 | err = ret; | ||
1335 | goto out; | 1305 | goto out; |
1336 | } | 1306 | } |
1337 | } else if (dn->d_inode && | 1307 | } else if (dn->d_inode && |
@@ -1351,9 +1321,6 @@ retry_lookup: | |||
1351 | spin_unlock(&parent->d_lock); | 1321 | spin_unlock(&parent->d_lock); |
1352 | } | 1322 | } |
1353 | 1323 | ||
1354 | di = dn->d_fsdata; | ||
1355 | di->offset = ceph_make_fpos(frag, i + r_readdir_offset); | ||
1356 | |||
1357 | /* inode */ | 1324 | /* inode */ |
1358 | if (dn->d_inode) { | 1325 | if (dn->d_inode) { |
1359 | in = dn->d_inode; | 1326 | in = dn->d_inode; |
@@ -1366,26 +1333,39 @@ retry_lookup: | |||
1366 | err = PTR_ERR(in); | 1333 | err = PTR_ERR(in); |
1367 | goto out; | 1334 | goto out; |
1368 | } | 1335 | } |
1369 | dn = splice_dentry(dn, in, NULL, false); | ||
1370 | if (IS_ERR(dn)) | ||
1371 | dn = NULL; | ||
1372 | } | 1336 | } |
1373 | 1337 | ||
1374 | if (fill_inode(in, &rinfo->dir_in[i], NULL, session, | 1338 | if (fill_inode(in, &rinfo->dir_in[i], NULL, session, |
1375 | req->r_request_started, -1, | 1339 | req->r_request_started, -1, |
1376 | &req->r_caps_reservation) < 0) { | 1340 | &req->r_caps_reservation) < 0) { |
1377 | pr_err("fill_inode badness on %p\n", in); | 1341 | pr_err("fill_inode badness on %p\n", in); |
1342 | if (!dn->d_inode) | ||
1343 | iput(in); | ||
1344 | d_drop(dn); | ||
1378 | goto next_item; | 1345 | goto next_item; |
1379 | } | 1346 | } |
1380 | if (dn) | 1347 | |
1381 | update_dentry_lease(dn, rinfo->dir_dlease[i], | 1348 | if (!dn->d_inode) { |
1382 | req->r_session, | 1349 | dn = splice_dentry(dn, in, NULL, false); |
1383 | req->r_request_started); | 1350 | if (IS_ERR(dn)) { |
1351 | err = PTR_ERR(dn); | ||
1352 | dn = NULL; | ||
1353 | goto next_item; | ||
1354 | } | ||
1355 | } | ||
1356 | |||
1357 | di = dn->d_fsdata; | ||
1358 | di->offset = ceph_make_fpos(frag, i + r_readdir_offset); | ||
1359 | |||
1360 | update_dentry_lease(dn, rinfo->dir_dlease[i], | ||
1361 | req->r_session, | ||
1362 | req->r_request_started); | ||
1384 | next_item: | 1363 | next_item: |
1385 | if (dn) | 1364 | if (dn) |
1386 | dput(dn); | 1365 | dput(dn); |
1387 | } | 1366 | } |
1388 | req->r_did_prepopulate = true; | 1367 | if (err == 0) |
1368 | req->r_did_prepopulate = true; | ||
1389 | 1369 | ||
1390 | out: | 1370 | out: |
1391 | if (snapdir) { | 1371 | if (snapdir) { |
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index aa3397620342..2c29db6a247e 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
@@ -477,9 +477,10 @@ extern int CIFSGetExtAttr(const unsigned int xid, struct cifs_tcon *tcon, | |||
477 | const int netfid, __u64 *pExtAttrBits, __u64 *pMask); | 477 | const int netfid, __u64 *pExtAttrBits, __u64 *pMask); |
478 | extern void cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb); | 478 | extern void cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb); |
479 | extern bool CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr); | 479 | extern bool CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr); |
480 | extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr, | 480 | extern int CIFSCheckMFSymlink(unsigned int xid, struct cifs_tcon *tcon, |
481 | const unsigned char *path, | 481 | struct cifs_sb_info *cifs_sb, |
482 | struct cifs_sb_info *cifs_sb, unsigned int xid); | 482 | struct cifs_fattr *fattr, |
483 | const unsigned char *path); | ||
483 | extern int mdfour(unsigned char *, unsigned char *, int); | 484 | extern int mdfour(unsigned char *, unsigned char *, int); |
484 | extern int E_md4hash(const unsigned char *passwd, unsigned char *p16, | 485 | extern int E_md4hash(const unsigned char *passwd, unsigned char *p16, |
485 | const struct nls_table *codepage); | 486 | const struct nls_table *codepage); |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 124aa0230c1b..d707edb6b852 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -4010,7 +4010,7 @@ QFileInfoRetry: | |||
4010 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, | 4010 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, |
4011 | (struct smb_hdr *) pSMBr, &bytes_returned, 0); | 4011 | (struct smb_hdr *) pSMBr, &bytes_returned, 0); |
4012 | if (rc) { | 4012 | if (rc) { |
4013 | cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc); | 4013 | cifs_dbg(FYI, "Send error in QFileInfo = %d", rc); |
4014 | } else { /* decode response */ | 4014 | } else { /* decode response */ |
4015 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); | 4015 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); |
4016 | 4016 | ||
@@ -4179,7 +4179,7 @@ UnixQFileInfoRetry: | |||
4179 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, | 4179 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, |
4180 | (struct smb_hdr *) pSMBr, &bytes_returned, 0); | 4180 | (struct smb_hdr *) pSMBr, &bytes_returned, 0); |
4181 | if (rc) { | 4181 | if (rc) { |
4182 | cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc); | 4182 | cifs_dbg(FYI, "Send error in UnixQFileInfo = %d", rc); |
4183 | } else { /* decode response */ | 4183 | } else { /* decode response */ |
4184 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); | 4184 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); |
4185 | 4185 | ||
@@ -4263,7 +4263,7 @@ UnixQPathInfoRetry: | |||
4263 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, | 4263 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, |
4264 | (struct smb_hdr *) pSMBr, &bytes_returned, 0); | 4264 | (struct smb_hdr *) pSMBr, &bytes_returned, 0); |
4265 | if (rc) { | 4265 | if (rc) { |
4266 | cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc); | 4266 | cifs_dbg(FYI, "Send error in UnixQPathInfo = %d", rc); |
4267 | } else { /* decode response */ | 4267 | } else { /* decode response */ |
4268 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); | 4268 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); |
4269 | 4269 | ||
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 11ff5f116b20..a514e0a65f69 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
@@ -193,7 +193,7 @@ check_name(struct dentry *direntry) | |||
193 | static int | 193 | static int |
194 | cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, | 194 | cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, |
195 | struct tcon_link *tlink, unsigned oflags, umode_t mode, | 195 | struct tcon_link *tlink, unsigned oflags, umode_t mode, |
196 | __u32 *oplock, struct cifs_fid *fid, int *created) | 196 | __u32 *oplock, struct cifs_fid *fid) |
197 | { | 197 | { |
198 | int rc = -ENOENT; | 198 | int rc = -ENOENT; |
199 | int create_options = CREATE_NOT_DIR; | 199 | int create_options = CREATE_NOT_DIR; |
@@ -349,7 +349,6 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, | |||
349 | .device = 0, | 349 | .device = 0, |
350 | }; | 350 | }; |
351 | 351 | ||
352 | *created |= FILE_CREATED; | ||
353 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { | 352 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { |
354 | args.uid = current_fsuid(); | 353 | args.uid = current_fsuid(); |
355 | if (inode->i_mode & S_ISGID) | 354 | if (inode->i_mode & S_ISGID) |
@@ -480,13 +479,16 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, | |||
480 | cifs_add_pending_open(&fid, tlink, &open); | 479 | cifs_add_pending_open(&fid, tlink, &open); |
481 | 480 | ||
482 | rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, | 481 | rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, |
483 | &oplock, &fid, opened); | 482 | &oplock, &fid); |
484 | 483 | ||
485 | if (rc) { | 484 | if (rc) { |
486 | cifs_del_pending_open(&open); | 485 | cifs_del_pending_open(&open); |
487 | goto out; | 486 | goto out; |
488 | } | 487 | } |
489 | 488 | ||
489 | if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) | ||
490 | *opened |= FILE_CREATED; | ||
491 | |||
490 | rc = finish_open(file, direntry, generic_file_open, opened); | 492 | rc = finish_open(file, direntry, generic_file_open, opened); |
491 | if (rc) { | 493 | if (rc) { |
492 | if (server->ops->close) | 494 | if (server->ops->close) |
@@ -529,7 +531,6 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, | |||
529 | struct TCP_Server_Info *server; | 531 | struct TCP_Server_Info *server; |
530 | struct cifs_fid fid; | 532 | struct cifs_fid fid; |
531 | __u32 oplock; | 533 | __u32 oplock; |
532 | int created = FILE_CREATED; | ||
533 | 534 | ||
534 | cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p\n", | 535 | cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p\n", |
535 | inode, direntry->d_name.name, direntry); | 536 | inode, direntry->d_name.name, direntry); |
@@ -546,7 +547,7 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, | |||
546 | server->ops->new_lease_key(&fid); | 547 | server->ops->new_lease_key(&fid); |
547 | 548 | ||
548 | rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, | 549 | rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, |
549 | &oplock, &fid, &created); | 550 | &oplock, &fid); |
550 | if (!rc && server->ops->close) | 551 | if (!rc && server->ops->close) |
551 | server->ops->close(xid, tcon, &fid); | 552 | server->ops->close(xid, tcon, &fid); |
552 | 553 | ||
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 36f9ebb93ceb..49719b8228e5 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -383,7 +383,8 @@ int cifs_get_inode_info_unix(struct inode **pinode, | |||
383 | 383 | ||
384 | /* check for Minshall+French symlinks */ | 384 | /* check for Minshall+French symlinks */ |
385 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { | 385 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { |
386 | int tmprc = CIFSCheckMFSymlink(&fattr, full_path, cifs_sb, xid); | 386 | int tmprc = CIFSCheckMFSymlink(xid, tcon, cifs_sb, &fattr, |
387 | full_path); | ||
387 | if (tmprc) | 388 | if (tmprc) |
388 | cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc); | 389 | cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc); |
389 | } | 390 | } |
@@ -799,7 +800,8 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, | |||
799 | 800 | ||
800 | /* check for Minshall+French symlinks */ | 801 | /* check for Minshall+French symlinks */ |
801 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { | 802 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { |
802 | tmprc = CIFSCheckMFSymlink(&fattr, full_path, cifs_sb, xid); | 803 | tmprc = CIFSCheckMFSymlink(xid, tcon, cifs_sb, &fattr, |
804 | full_path); | ||
803 | if (tmprc) | 805 | if (tmprc) |
804 | cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc); | 806 | cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc); |
805 | } | 807 | } |
diff --git a/fs/cifs/link.c b/fs/cifs/link.c index cc0234710ddb..92aee08483a5 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c | |||
@@ -354,34 +354,30 @@ open_query_close_cifs_symlink(const unsigned char *path, char *pbuf, | |||
354 | 354 | ||
355 | 355 | ||
356 | int | 356 | int |
357 | CIFSCheckMFSymlink(struct cifs_fattr *fattr, | 357 | CIFSCheckMFSymlink(unsigned int xid, struct cifs_tcon *tcon, |
358 | const unsigned char *path, | 358 | struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, |
359 | struct cifs_sb_info *cifs_sb, unsigned int xid) | 359 | const unsigned char *path) |
360 | { | 360 | { |
361 | int rc = 0; | 361 | int rc; |
362 | u8 *buf = NULL; | 362 | u8 *buf = NULL; |
363 | unsigned int link_len = 0; | 363 | unsigned int link_len = 0; |
364 | unsigned int bytes_read = 0; | 364 | unsigned int bytes_read = 0; |
365 | struct cifs_tcon *ptcon; | ||
366 | 365 | ||
367 | if (!CIFSCouldBeMFSymlink(fattr)) | 366 | if (!CIFSCouldBeMFSymlink(fattr)) |
368 | /* it's not a symlink */ | 367 | /* it's not a symlink */ |
369 | return 0; | 368 | return 0; |
370 | 369 | ||
371 | buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL); | 370 | buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL); |
372 | if (!buf) { | 371 | if (!buf) |
373 | rc = -ENOMEM; | 372 | return -ENOMEM; |
374 | goto out; | ||
375 | } | ||
376 | 373 | ||
377 | ptcon = tlink_tcon(cifs_sb_tlink(cifs_sb)); | 374 | if (tcon->ses->server->ops->query_mf_symlink) |
378 | if ((ptcon->ses) && (ptcon->ses->server->ops->query_mf_symlink)) | 375 | rc = tcon->ses->server->ops->query_mf_symlink(path, buf, |
379 | rc = ptcon->ses->server->ops->query_mf_symlink(path, buf, | 376 | &bytes_read, cifs_sb, xid); |
380 | &bytes_read, cifs_sb, xid); | ||
381 | else | 377 | else |
382 | goto out; | 378 | rc = -ENOSYS; |
383 | 379 | ||
384 | if (rc != 0) | 380 | if (rc) |
385 | goto out; | 381 | goto out; |
386 | 382 | ||
387 | if (bytes_read == 0) /* not a symlink */ | 383 | if (bytes_read == 0) /* not a symlink */ |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index dc52e13d58e0..3881610b6438 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -680,7 +680,8 @@ static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd, | |||
680 | struct i2c_msg __user *tmsgs; | 680 | struct i2c_msg __user *tmsgs; |
681 | struct i2c_msg32 __user *umsgs; | 681 | struct i2c_msg32 __user *umsgs; |
682 | compat_caddr_t datap; | 682 | compat_caddr_t datap; |
683 | int nmsgs, i; | 683 | u32 nmsgs; |
684 | int i; | ||
684 | 685 | ||
685 | if (get_user(nmsgs, &udata->nmsgs)) | 686 | if (get_user(nmsgs, &udata->nmsgs)) |
686 | return -EFAULT; | 687 | return -EFAULT; |
diff --git a/fs/dcache.c b/fs/dcache.c index 6055d61811d3..cb4a10690868 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -3061,8 +3061,13 @@ char *d_path(const struct path *path, char *buf, int buflen) | |||
3061 | * thus don't need to be hashed. They also don't need a name until a | 3061 | * thus don't need to be hashed. They also don't need a name until a |
3062 | * user wants to identify the object in /proc/pid/fd/. The little hack | 3062 | * user wants to identify the object in /proc/pid/fd/. The little hack |
3063 | * below allows us to generate a name for these objects on demand: | 3063 | * below allows us to generate a name for these objects on demand: |
3064 | * | ||
3065 | * Some pseudo inodes are mountable. When they are mounted | ||
3066 | * path->dentry == path->mnt->mnt_root. In that case don't call d_dname | ||
3067 | * and instead have d_path return the mounted path. | ||
3064 | */ | 3068 | */ |
3065 | if (path->dentry->d_op && path->dentry->d_op->d_dname) | 3069 | if (path->dentry->d_op && path->dentry->d_op->d_dname && |
3070 | (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root)) | ||
3066 | return path->dentry->d_op->d_dname(path->dentry, buf, buflen); | 3071 | return path->dentry->d_op->d_dname(path->dentry, buf, buflen); |
3067 | 3072 | ||
3068 | rcu_read_lock(); | 3073 | rcu_read_lock(); |
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index d90909ec6aa6..a5e34dd6a32c 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
@@ -649,6 +649,7 @@ static void process_sctp_notification(struct connection *con, | |||
649 | struct msghdr *msg, char *buf) | 649 | struct msghdr *msg, char *buf) |
650 | { | 650 | { |
651 | union sctp_notification *sn = (union sctp_notification *)buf; | 651 | union sctp_notification *sn = (union sctp_notification *)buf; |
652 | struct linger linger; | ||
652 | 653 | ||
653 | switch (sn->sn_header.sn_type) { | 654 | switch (sn->sn_header.sn_type) { |
654 | case SCTP_SEND_FAILED: | 655 | case SCTP_SEND_FAILED: |
@@ -727,6 +728,13 @@ static void process_sctp_notification(struct connection *con, | |||
727 | } | 728 | } |
728 | add_sock(new_con->sock, new_con); | 729 | add_sock(new_con->sock, new_con); |
729 | 730 | ||
731 | linger.l_onoff = 1; | ||
732 | linger.l_linger = 0; | ||
733 | ret = kernel_setsockopt(new_con->sock, SOL_SOCKET, SO_LINGER, | ||
734 | (char *)&linger, sizeof(linger)); | ||
735 | if (ret < 0) | ||
736 | log_print("set socket option SO_LINGER failed"); | ||
737 | |||
730 | log_print("connecting to %d sctp association %d", | 738 | log_print("connecting to %d sctp association %d", |
731 | nodeid, (int)sn->sn_assoc_change.sac_assoc_id); | 739 | nodeid, (int)sn->sn_assoc_change.sac_assoc_id); |
732 | 740 | ||
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 8b5e2584c840..af903128891c 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -1907,10 +1907,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1907 | } | 1907 | } |
1908 | } | 1908 | } |
1909 | } | 1909 | } |
1910 | if (op == EPOLL_CTL_DEL && is_file_epoll(tf.file)) { | ||
1911 | tep = tf.file->private_data; | ||
1912 | mutex_lock_nested(&tep->mtx, 1); | ||
1913 | } | ||
1914 | 1910 | ||
1915 | /* | 1911 | /* |
1916 | * Try to lookup the file inside our RB tree, Since we grabbed "mtx" | 1912 | * Try to lookup the file inside our RB tree, Since we grabbed "mtx" |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 288534920fe5..20d6697bd638 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -1493,6 +1493,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type, | |||
1493 | sb->s_blocksize - offset : towrite; | 1493 | sb->s_blocksize - offset : towrite; |
1494 | 1494 | ||
1495 | tmp_bh.b_state = 0; | 1495 | tmp_bh.b_state = 0; |
1496 | tmp_bh.b_size = sb->s_blocksize; | ||
1496 | err = ext2_get_block(inode, blk, &tmp_bh, 1); | 1497 | err = ext2_get_block(inode, blk, &tmp_bh, 1); |
1497 | if (err < 0) | 1498 | if (err < 0) |
1498 | goto out; | 1499 | goto out; |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index e6185031c1cc..ece55565b9cd 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -268,6 +268,16 @@ struct ext4_io_submit { | |||
268 | /* Translate # of blks to # of clusters */ | 268 | /* Translate # of blks to # of clusters */ |
269 | #define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \ | 269 | #define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \ |
270 | (sbi)->s_cluster_bits) | 270 | (sbi)->s_cluster_bits) |
271 | /* Mask out the low bits to get the starting block of the cluster */ | ||
272 | #define EXT4_PBLK_CMASK(s, pblk) ((pblk) & \ | ||
273 | ~((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) | ||
274 | #define EXT4_LBLK_CMASK(s, lblk) ((lblk) & \ | ||
275 | ~((ext4_lblk_t) (s)->s_cluster_ratio - 1)) | ||
276 | /* Get the cluster offset */ | ||
277 | #define EXT4_PBLK_COFF(s, pblk) ((pblk) & \ | ||
278 | ((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) | ||
279 | #define EXT4_LBLK_COFF(s, lblk) ((lblk) & \ | ||
280 | ((ext4_lblk_t) (s)->s_cluster_ratio - 1)) | ||
271 | 281 | ||
272 | /* | 282 | /* |
273 | * Structure of a blocks group descriptor | 283 | * Structure of a blocks group descriptor |
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 17ac112ab101..3fe29de832c8 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
@@ -259,6 +259,15 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, | |||
259 | if (WARN_ON_ONCE(err)) { | 259 | if (WARN_ON_ONCE(err)) { |
260 | ext4_journal_abort_handle(where, line, __func__, bh, | 260 | ext4_journal_abort_handle(where, line, __func__, bh, |
261 | handle, err); | 261 | handle, err); |
262 | ext4_error_inode(inode, where, line, | ||
263 | bh->b_blocknr, | ||
264 | "journal_dirty_metadata failed: " | ||
265 | "handle type %u started at line %u, " | ||
266 | "credits %u/%u, errcode %d", | ||
267 | handle->h_type, | ||
268 | handle->h_line_no, | ||
269 | handle->h_requested_credits, | ||
270 | handle->h_buffer_credits, err); | ||
262 | } | 271 | } |
263 | } else { | 272 | } else { |
264 | if (inode) | 273 | if (inode) |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 35f65cf4f318..3384dc4bed40 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -360,8 +360,10 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) | |||
360 | { | 360 | { |
361 | ext4_fsblk_t block = ext4_ext_pblock(ext); | 361 | ext4_fsblk_t block = ext4_ext_pblock(ext); |
362 | int len = ext4_ext_get_actual_len(ext); | 362 | int len = ext4_ext_get_actual_len(ext); |
363 | ext4_lblk_t lblock = le32_to_cpu(ext->ee_block); | ||
364 | ext4_lblk_t last = lblock + len - 1; | ||
363 | 365 | ||
364 | if (len == 0) | 366 | if (lblock > last) |
365 | return 0; | 367 | return 0; |
366 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); | 368 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); |
367 | } | 369 | } |
@@ -387,11 +389,26 @@ static int ext4_valid_extent_entries(struct inode *inode, | |||
387 | if (depth == 0) { | 389 | if (depth == 0) { |
388 | /* leaf entries */ | 390 | /* leaf entries */ |
389 | struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); | 391 | struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); |
392 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | ||
393 | ext4_fsblk_t pblock = 0; | ||
394 | ext4_lblk_t lblock = 0; | ||
395 | ext4_lblk_t prev = 0; | ||
396 | int len = 0; | ||
390 | while (entries) { | 397 | while (entries) { |
391 | if (!ext4_valid_extent(inode, ext)) | 398 | if (!ext4_valid_extent(inode, ext)) |
392 | return 0; | 399 | return 0; |
400 | |||
401 | /* Check for overlapping extents */ | ||
402 | lblock = le32_to_cpu(ext->ee_block); | ||
403 | len = ext4_ext_get_actual_len(ext); | ||
404 | if ((lblock <= prev) && prev) { | ||
405 | pblock = ext4_ext_pblock(ext); | ||
406 | es->s_last_error_block = cpu_to_le64(pblock); | ||
407 | return 0; | ||
408 | } | ||
393 | ext++; | 409 | ext++; |
394 | entries--; | 410 | entries--; |
411 | prev = lblock + len - 1; | ||
395 | } | 412 | } |
396 | } else { | 413 | } else { |
397 | struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); | 414 | struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); |
@@ -1834,8 +1851,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi, | |||
1834 | depth = ext_depth(inode); | 1851 | depth = ext_depth(inode); |
1835 | if (!path[depth].p_ext) | 1852 | if (!path[depth].p_ext) |
1836 | goto out; | 1853 | goto out; |
1837 | b2 = le32_to_cpu(path[depth].p_ext->ee_block); | 1854 | b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block)); |
1838 | b2 &= ~(sbi->s_cluster_ratio - 1); | ||
1839 | 1855 | ||
1840 | /* | 1856 | /* |
1841 | * get the next allocated block if the extent in the path | 1857 | * get the next allocated block if the extent in the path |
@@ -1845,7 +1861,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi, | |||
1845 | b2 = ext4_ext_next_allocated_block(path); | 1861 | b2 = ext4_ext_next_allocated_block(path); |
1846 | if (b2 == EXT_MAX_BLOCKS) | 1862 | if (b2 == EXT_MAX_BLOCKS) |
1847 | goto out; | 1863 | goto out; |
1848 | b2 &= ~(sbi->s_cluster_ratio - 1); | 1864 | b2 = EXT4_LBLK_CMASK(sbi, b2); |
1849 | } | 1865 | } |
1850 | 1866 | ||
1851 | /* check for wrap through zero on extent logical start block*/ | 1867 | /* check for wrap through zero on extent logical start block*/ |
@@ -2504,7 +2520,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2504 | * extent, we have to mark the cluster as used (store negative | 2520 | * extent, we have to mark the cluster as used (store negative |
2505 | * cluster number in partial_cluster). | 2521 | * cluster number in partial_cluster). |
2506 | */ | 2522 | */ |
2507 | unaligned = pblk & (sbi->s_cluster_ratio - 1); | 2523 | unaligned = EXT4_PBLK_COFF(sbi, pblk); |
2508 | if (unaligned && (ee_len == num) && | 2524 | if (unaligned && (ee_len == num) && |
2509 | (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk)))) | 2525 | (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk)))) |
2510 | *partial_cluster = EXT4_B2C(sbi, pblk); | 2526 | *partial_cluster = EXT4_B2C(sbi, pblk); |
@@ -2598,7 +2614,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2598 | * accidentally freeing it later on | 2614 | * accidentally freeing it later on |
2599 | */ | 2615 | */ |
2600 | pblk = ext4_ext_pblock(ex); | 2616 | pblk = ext4_ext_pblock(ex); |
2601 | if (pblk & (sbi->s_cluster_ratio - 1)) | 2617 | if (EXT4_PBLK_COFF(sbi, pblk)) |
2602 | *partial_cluster = | 2618 | *partial_cluster = |
2603 | -((long long)EXT4_B2C(sbi, pblk)); | 2619 | -((long long)EXT4_B2C(sbi, pblk)); |
2604 | ex--; | 2620 | ex--; |
@@ -3753,7 +3769,7 @@ int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk) | |||
3753 | { | 3769 | { |
3754 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 3770 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
3755 | ext4_lblk_t lblk_start, lblk_end; | 3771 | ext4_lblk_t lblk_start, lblk_end; |
3756 | lblk_start = lblk & (~(sbi->s_cluster_ratio - 1)); | 3772 | lblk_start = EXT4_LBLK_CMASK(sbi, lblk); |
3757 | lblk_end = lblk_start + sbi->s_cluster_ratio - 1; | 3773 | lblk_end = lblk_start + sbi->s_cluster_ratio - 1; |
3758 | 3774 | ||
3759 | return ext4_find_delalloc_range(inode, lblk_start, lblk_end); | 3775 | return ext4_find_delalloc_range(inode, lblk_start, lblk_end); |
@@ -3812,9 +3828,9 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, | |||
3812 | trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks); | 3828 | trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks); |
3813 | 3829 | ||
3814 | /* Check towards left side */ | 3830 | /* Check towards left side */ |
3815 | c_offset = lblk_start & (sbi->s_cluster_ratio - 1); | 3831 | c_offset = EXT4_LBLK_COFF(sbi, lblk_start); |
3816 | if (c_offset) { | 3832 | if (c_offset) { |
3817 | lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1)); | 3833 | lblk_from = EXT4_LBLK_CMASK(sbi, lblk_start); |
3818 | lblk_to = lblk_from + c_offset - 1; | 3834 | lblk_to = lblk_from + c_offset - 1; |
3819 | 3835 | ||
3820 | if (ext4_find_delalloc_range(inode, lblk_from, lblk_to)) | 3836 | if (ext4_find_delalloc_range(inode, lblk_from, lblk_to)) |
@@ -3822,7 +3838,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, | |||
3822 | } | 3838 | } |
3823 | 3839 | ||
3824 | /* Now check towards right. */ | 3840 | /* Now check towards right. */ |
3825 | c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1); | 3841 | c_offset = EXT4_LBLK_COFF(sbi, lblk_start + num_blks); |
3826 | if (allocated_clusters && c_offset) { | 3842 | if (allocated_clusters && c_offset) { |
3827 | lblk_from = lblk_start + num_blks; | 3843 | lblk_from = lblk_start + num_blks; |
3828 | lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1; | 3844 | lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1; |
@@ -4030,7 +4046,7 @@ static int get_implied_cluster_alloc(struct super_block *sb, | |||
4030 | struct ext4_ext_path *path) | 4046 | struct ext4_ext_path *path) |
4031 | { | 4047 | { |
4032 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4048 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
4033 | ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1); | 4049 | ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk); |
4034 | ext4_lblk_t ex_cluster_start, ex_cluster_end; | 4050 | ext4_lblk_t ex_cluster_start, ex_cluster_end; |
4035 | ext4_lblk_t rr_cluster_start; | 4051 | ext4_lblk_t rr_cluster_start; |
4036 | ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); | 4052 | ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); |
@@ -4048,8 +4064,7 @@ static int get_implied_cluster_alloc(struct super_block *sb, | |||
4048 | (rr_cluster_start == ex_cluster_start)) { | 4064 | (rr_cluster_start == ex_cluster_start)) { |
4049 | if (rr_cluster_start == ex_cluster_end) | 4065 | if (rr_cluster_start == ex_cluster_end) |
4050 | ee_start += ee_len - 1; | 4066 | ee_start += ee_len - 1; |
4051 | map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) + | 4067 | map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset; |
4052 | c_offset; | ||
4053 | map->m_len = min(map->m_len, | 4068 | map->m_len = min(map->m_len, |
4054 | (unsigned) sbi->s_cluster_ratio - c_offset); | 4069 | (unsigned) sbi->s_cluster_ratio - c_offset); |
4055 | /* | 4070 | /* |
@@ -4203,7 +4218,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4203 | */ | 4218 | */ |
4204 | map->m_flags &= ~EXT4_MAP_FROM_CLUSTER; | 4219 | map->m_flags &= ~EXT4_MAP_FROM_CLUSTER; |
4205 | newex.ee_block = cpu_to_le32(map->m_lblk); | 4220 | newex.ee_block = cpu_to_le32(map->m_lblk); |
4206 | cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1); | 4221 | cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk); |
4207 | 4222 | ||
4208 | /* | 4223 | /* |
4209 | * If we are doing bigalloc, check to see if the extent returned | 4224 | * If we are doing bigalloc, check to see if the extent returned |
@@ -4271,7 +4286,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4271 | * needed so that future calls to get_implied_cluster_alloc() | 4286 | * needed so that future calls to get_implied_cluster_alloc() |
4272 | * work correctly. | 4287 | * work correctly. |
4273 | */ | 4288 | */ |
4274 | offset = map->m_lblk & (sbi->s_cluster_ratio - 1); | 4289 | offset = EXT4_LBLK_COFF(sbi, map->m_lblk); |
4275 | ar.len = EXT4_NUM_B2C(sbi, offset+allocated); | 4290 | ar.len = EXT4_NUM_B2C(sbi, offset+allocated); |
4276 | ar.goal -= offset; | 4291 | ar.goal -= offset; |
4277 | ar.logical -= offset; | 4292 | ar.logical -= offset; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 075763474118..31fa964742bc 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -1206,7 +1206,6 @@ static int ext4_journalled_write_end(struct file *file, | |||
1206 | */ | 1206 | */ |
1207 | static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock) | 1207 | static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock) |
1208 | { | 1208 | { |
1209 | int retries = 0; | ||
1210 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1209 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1211 | struct ext4_inode_info *ei = EXT4_I(inode); | 1210 | struct ext4_inode_info *ei = EXT4_I(inode); |
1212 | unsigned int md_needed; | 1211 | unsigned int md_needed; |
@@ -1218,7 +1217,6 @@ static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock) | |||
1218 | * in order to allocate nrblocks | 1217 | * in order to allocate nrblocks |
1219 | * worse case is one extent per block | 1218 | * worse case is one extent per block |
1220 | */ | 1219 | */ |
1221 | repeat: | ||
1222 | spin_lock(&ei->i_block_reservation_lock); | 1220 | spin_lock(&ei->i_block_reservation_lock); |
1223 | /* | 1221 | /* |
1224 | * ext4_calc_metadata_amount() has side effects, which we have | 1222 | * ext4_calc_metadata_amount() has side effects, which we have |
@@ -1238,10 +1236,6 @@ repeat: | |||
1238 | ei->i_da_metadata_calc_len = save_len; | 1236 | ei->i_da_metadata_calc_len = save_len; |
1239 | ei->i_da_metadata_calc_last_lblock = save_last_lblock; | 1237 | ei->i_da_metadata_calc_last_lblock = save_last_lblock; |
1240 | spin_unlock(&ei->i_block_reservation_lock); | 1238 | spin_unlock(&ei->i_block_reservation_lock); |
1241 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | ||
1242 | cond_resched(); | ||
1243 | goto repeat; | ||
1244 | } | ||
1245 | return -ENOSPC; | 1239 | return -ENOSPC; |
1246 | } | 1240 | } |
1247 | ei->i_reserved_meta_blocks += md_needed; | 1241 | ei->i_reserved_meta_blocks += md_needed; |
@@ -1255,7 +1249,6 @@ repeat: | |||
1255 | */ | 1249 | */ |
1256 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) | 1250 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) |
1257 | { | 1251 | { |
1258 | int retries = 0; | ||
1259 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1252 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1260 | struct ext4_inode_info *ei = EXT4_I(inode); | 1253 | struct ext4_inode_info *ei = EXT4_I(inode); |
1261 | unsigned int md_needed; | 1254 | unsigned int md_needed; |
@@ -1277,7 +1270,6 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) | |||
1277 | * in order to allocate nrblocks | 1270 | * in order to allocate nrblocks |
1278 | * worse case is one extent per block | 1271 | * worse case is one extent per block |
1279 | */ | 1272 | */ |
1280 | repeat: | ||
1281 | spin_lock(&ei->i_block_reservation_lock); | 1273 | spin_lock(&ei->i_block_reservation_lock); |
1282 | /* | 1274 | /* |
1283 | * ext4_calc_metadata_amount() has side effects, which we have | 1275 | * ext4_calc_metadata_amount() has side effects, which we have |
@@ -1297,10 +1289,6 @@ repeat: | |||
1297 | ei->i_da_metadata_calc_len = save_len; | 1289 | ei->i_da_metadata_calc_len = save_len; |
1298 | ei->i_da_metadata_calc_last_lblock = save_last_lblock; | 1290 | ei->i_da_metadata_calc_last_lblock = save_last_lblock; |
1299 | spin_unlock(&ei->i_block_reservation_lock); | 1291 | spin_unlock(&ei->i_block_reservation_lock); |
1300 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | ||
1301 | cond_resched(); | ||
1302 | goto repeat; | ||
1303 | } | ||
1304 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); | 1292 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); |
1305 | return -ENOSPC; | 1293 | return -ENOSPC; |
1306 | } | 1294 | } |
@@ -4598,6 +4586,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
4598 | if (attr->ia_size > sbi->s_bitmap_maxbytes) | 4586 | if (attr->ia_size > sbi->s_bitmap_maxbytes) |
4599 | return -EFBIG; | 4587 | return -EFBIG; |
4600 | } | 4588 | } |
4589 | |||
4590 | if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size) | ||
4591 | inode_inc_iversion(inode); | ||
4592 | |||
4601 | if (S_ISREG(inode->i_mode) && | 4593 | if (S_ISREG(inode->i_mode) && |
4602 | (attr->ia_size < inode->i_size)) { | 4594 | (attr->ia_size < inode->i_size)) { |
4603 | if (ext4_should_order_data(inode)) { | 4595 | if (ext4_should_order_data(inode)) { |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 4d113efa024c..04a5c7504be9 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -3442,6 +3442,9 @@ static void ext4_mb_pa_callback(struct rcu_head *head) | |||
3442 | { | 3442 | { |
3443 | struct ext4_prealloc_space *pa; | 3443 | struct ext4_prealloc_space *pa; |
3444 | pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu); | 3444 | pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu); |
3445 | |||
3446 | BUG_ON(atomic_read(&pa->pa_count)); | ||
3447 | BUG_ON(pa->pa_deleted == 0); | ||
3445 | kmem_cache_free(ext4_pspace_cachep, pa); | 3448 | kmem_cache_free(ext4_pspace_cachep, pa); |
3446 | } | 3449 | } |
3447 | 3450 | ||
@@ -3455,11 +3458,13 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac, | |||
3455 | ext4_group_t grp; | 3458 | ext4_group_t grp; |
3456 | ext4_fsblk_t grp_blk; | 3459 | ext4_fsblk_t grp_blk; |
3457 | 3460 | ||
3458 | if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) | ||
3459 | return; | ||
3460 | |||
3461 | /* in this short window concurrent discard can set pa_deleted */ | 3461 | /* in this short window concurrent discard can set pa_deleted */ |
3462 | spin_lock(&pa->pa_lock); | 3462 | spin_lock(&pa->pa_lock); |
3463 | if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) { | ||
3464 | spin_unlock(&pa->pa_lock); | ||
3465 | return; | ||
3466 | } | ||
3467 | |||
3463 | if (pa->pa_deleted == 1) { | 3468 | if (pa->pa_deleted == 1) { |
3464 | spin_unlock(&pa->pa_lock); | 3469 | spin_unlock(&pa->pa_lock); |
3465 | return; | 3470 | return; |
@@ -4121,7 +4126,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, | |||
4121 | ext4_get_group_no_and_offset(sb, goal, &group, &block); | 4126 | ext4_get_group_no_and_offset(sb, goal, &group, &block); |
4122 | 4127 | ||
4123 | /* set up allocation goals */ | 4128 | /* set up allocation goals */ |
4124 | ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1); | 4129 | ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical); |
4125 | ac->ac_status = AC_STATUS_CONTINUE; | 4130 | ac->ac_status = AC_STATUS_CONTINUE; |
4126 | ac->ac_sb = sb; | 4131 | ac->ac_sb = sb; |
4127 | ac->ac_inode = ar->inode; | 4132 | ac->ac_inode = ar->inode; |
@@ -4663,7 +4668,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4663 | * blocks at the beginning or the end unless we are explicitly | 4668 | * blocks at the beginning or the end unless we are explicitly |
4664 | * requested to avoid doing so. | 4669 | * requested to avoid doing so. |
4665 | */ | 4670 | */ |
4666 | overflow = block & (sbi->s_cluster_ratio - 1); | 4671 | overflow = EXT4_PBLK_COFF(sbi, block); |
4667 | if (overflow) { | 4672 | if (overflow) { |
4668 | if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { | 4673 | if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { |
4669 | overflow = sbi->s_cluster_ratio - overflow; | 4674 | overflow = sbi->s_cluster_ratio - overflow; |
@@ -4677,7 +4682,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4677 | count += overflow; | 4682 | count += overflow; |
4678 | } | 4683 | } |
4679 | } | 4684 | } |
4680 | overflow = count & (sbi->s_cluster_ratio - 1); | 4685 | overflow = EXT4_LBLK_COFF(sbi, count); |
4681 | if (overflow) { | 4686 | if (overflow) { |
4682 | if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { | 4687 | if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { |
4683 | if (count > overflow) | 4688 | if (count > overflow) |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c977f4e4e63b..1f7784de05b6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -792,7 +792,7 @@ static void ext4_put_super(struct super_block *sb) | |||
792 | } | 792 | } |
793 | 793 | ||
794 | ext4_es_unregister_shrinker(sbi); | 794 | ext4_es_unregister_shrinker(sbi); |
795 | del_timer(&sbi->s_err_report); | 795 | del_timer_sync(&sbi->s_err_report); |
796 | ext4_release_system_zone(sb); | 796 | ext4_release_system_zone(sb); |
797 | ext4_mb_release(sb); | 797 | ext4_mb_release(sb); |
798 | ext4_ext_release(sb); | 798 | ext4_ext_release(sb); |
@@ -3316,11 +3316,19 @@ int ext4_calculate_overhead(struct super_block *sb) | |||
3316 | } | 3316 | } |
3317 | 3317 | ||
3318 | 3318 | ||
3319 | static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi) | 3319 | static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb) |
3320 | { | 3320 | { |
3321 | ext4_fsblk_t resv_clusters; | 3321 | ext4_fsblk_t resv_clusters; |
3322 | 3322 | ||
3323 | /* | 3323 | /* |
3324 | * There's no need to reserve anything when we aren't using extents. | ||
3325 | * The space estimates are exact, there are no unwritten extents, | ||
3326 | * hole punching doesn't need new metadata... This is needed especially | ||
3327 | * to keep ext2/3 backward compatibility. | ||
3328 | */ | ||
3329 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) | ||
3330 | return 0; | ||
3331 | /* | ||
3324 | * By default we reserve 2% or 4096 clusters, whichever is smaller. | 3332 | * By default we reserve 2% or 4096 clusters, whichever is smaller. |
3325 | * This should cover the situations where we can not afford to run | 3333 | * This should cover the situations where we can not afford to run |
3326 | * out of space like for example punch hole, or converting | 3334 | * out of space like for example punch hole, or converting |
@@ -3328,7 +3336,8 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi) | |||
3328 | * allocation would require 1, or 2 blocks, higher numbers are | 3336 | * allocation would require 1, or 2 blocks, higher numbers are |
3329 | * very rare. | 3337 | * very rare. |
3330 | */ | 3338 | */ |
3331 | resv_clusters = ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits; | 3339 | resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >> |
3340 | EXT4_SB(sb)->s_cluster_bits; | ||
3332 | 3341 | ||
3333 | do_div(resv_clusters, 50); | 3342 | do_div(resv_clusters, 50); |
3334 | resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096); | 3343 | resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096); |
@@ -4071,10 +4080,10 @@ no_journal: | |||
4071 | "available"); | 4080 | "available"); |
4072 | } | 4081 | } |
4073 | 4082 | ||
4074 | err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sbi)); | 4083 | err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb)); |
4075 | if (err) { | 4084 | if (err) { |
4076 | ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " | 4085 | ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " |
4077 | "reserved pool", ext4_calculate_resv_clusters(sbi)); | 4086 | "reserved pool", ext4_calculate_resv_clusters(sb)); |
4078 | goto failed_mount4a; | 4087 | goto failed_mount4a; |
4079 | } | 4088 | } |
4080 | 4089 | ||
@@ -4184,7 +4193,7 @@ failed_mount_wq: | |||
4184 | } | 4193 | } |
4185 | failed_mount3: | 4194 | failed_mount3: |
4186 | ext4_es_unregister_shrinker(sbi); | 4195 | ext4_es_unregister_shrinker(sbi); |
4187 | del_timer(&sbi->s_err_report); | 4196 | del_timer_sync(&sbi->s_err_report); |
4188 | if (sbi->s_flex_groups) | 4197 | if (sbi->s_flex_groups) |
4189 | ext4_kvfree(sbi->s_flex_groups); | 4198 | ext4_kvfree(sbi->s_flex_groups); |
4190 | percpu_counter_destroy(&sbi->s_freeclusters_counter); | 4199 | percpu_counter_destroy(&sbi->s_freeclusters_counter); |
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index 27a0820340b9..2e35da12d292 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile | |||
@@ -1,6 +1,6 @@ | |||
1 | obj-$(CONFIG_F2FS_FS) += f2fs.o | 1 | obj-$(CONFIG_F2FS_FS) += f2fs.o |
2 | 2 | ||
3 | f2fs-y := dir.o file.o inode.o namei.o hash.o super.o | 3 | f2fs-y := dir.o file.o inode.o namei.o hash.o super.o inline.o |
4 | f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o | 4 | f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o |
5 | f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o | 5 | f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o |
6 | f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o | 6 | f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o |
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 5716e5eb4e8e..293d0486a40f 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c | |||
@@ -30,7 +30,7 @@ static struct kmem_cache *inode_entry_slab; | |||
30 | */ | 30 | */ |
31 | struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) | 31 | struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) |
32 | { | 32 | { |
33 | struct address_space *mapping = sbi->meta_inode->i_mapping; | 33 | struct address_space *mapping = META_MAPPING(sbi); |
34 | struct page *page = NULL; | 34 | struct page *page = NULL; |
35 | repeat: | 35 | repeat: |
36 | page = grab_cache_page(mapping, index); | 36 | page = grab_cache_page(mapping, index); |
@@ -50,7 +50,7 @@ repeat: | |||
50 | */ | 50 | */ |
51 | struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) | 51 | struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) |
52 | { | 52 | { |
53 | struct address_space *mapping = sbi->meta_inode->i_mapping; | 53 | struct address_space *mapping = META_MAPPING(sbi); |
54 | struct page *page; | 54 | struct page *page; |
55 | repeat: | 55 | repeat: |
56 | page = grab_cache_page(mapping, index); | 56 | page = grab_cache_page(mapping, index); |
@@ -61,11 +61,12 @@ repeat: | |||
61 | if (PageUptodate(page)) | 61 | if (PageUptodate(page)) |
62 | goto out; | 62 | goto out; |
63 | 63 | ||
64 | if (f2fs_readpage(sbi, page, index, READ_SYNC)) | 64 | if (f2fs_submit_page_bio(sbi, page, index, |
65 | READ_SYNC | REQ_META | REQ_PRIO)) | ||
65 | goto repeat; | 66 | goto repeat; |
66 | 67 | ||
67 | lock_page(page); | 68 | lock_page(page); |
68 | if (page->mapping != mapping) { | 69 | if (unlikely(page->mapping != mapping)) { |
69 | f2fs_put_page(page, 1); | 70 | f2fs_put_page(page, 1); |
70 | goto repeat; | 71 | goto repeat; |
71 | } | 72 | } |
@@ -81,13 +82,12 @@ static int f2fs_write_meta_page(struct page *page, | |||
81 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 82 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
82 | 83 | ||
83 | /* Should not write any meta pages, if any IO error was occurred */ | 84 | /* Should not write any meta pages, if any IO error was occurred */ |
84 | if (wbc->for_reclaim || sbi->por_doing || | 85 | if (unlikely(sbi->por_doing || |
85 | is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) { | 86 | is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG))) |
86 | dec_page_count(sbi, F2FS_DIRTY_META); | 87 | goto redirty_out; |
87 | wbc->pages_skipped++; | 88 | |
88 | set_page_dirty(page); | 89 | if (wbc->for_reclaim) |
89 | return AOP_WRITEPAGE_ACTIVATE; | 90 | goto redirty_out; |
90 | } | ||
91 | 91 | ||
92 | wait_on_page_writeback(page); | 92 | wait_on_page_writeback(page); |
93 | 93 | ||
@@ -95,24 +95,31 @@ static int f2fs_write_meta_page(struct page *page, | |||
95 | dec_page_count(sbi, F2FS_DIRTY_META); | 95 | dec_page_count(sbi, F2FS_DIRTY_META); |
96 | unlock_page(page); | 96 | unlock_page(page); |
97 | return 0; | 97 | return 0; |
98 | |||
99 | redirty_out: | ||
100 | dec_page_count(sbi, F2FS_DIRTY_META); | ||
101 | wbc->pages_skipped++; | ||
102 | set_page_dirty(page); | ||
103 | return AOP_WRITEPAGE_ACTIVATE; | ||
98 | } | 104 | } |
99 | 105 | ||
100 | static int f2fs_write_meta_pages(struct address_space *mapping, | 106 | static int f2fs_write_meta_pages(struct address_space *mapping, |
101 | struct writeback_control *wbc) | 107 | struct writeback_control *wbc) |
102 | { | 108 | { |
103 | struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); | 109 | struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); |
104 | struct block_device *bdev = sbi->sb->s_bdev; | 110 | int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); |
105 | long written; | 111 | long written; |
106 | 112 | ||
107 | if (wbc->for_kupdate) | 113 | if (wbc->for_kupdate) |
108 | return 0; | 114 | return 0; |
109 | 115 | ||
110 | if (get_pages(sbi, F2FS_DIRTY_META) == 0) | 116 | /* collect a number of dirty meta pages and write together */ |
117 | if (get_pages(sbi, F2FS_DIRTY_META) < nrpages) | ||
111 | return 0; | 118 | return 0; |
112 | 119 | ||
113 | /* if mounting is failed, skip writing node pages */ | 120 | /* if mounting is failed, skip writing node pages */ |
114 | mutex_lock(&sbi->cp_mutex); | 121 | mutex_lock(&sbi->cp_mutex); |
115 | written = sync_meta_pages(sbi, META, bio_get_nr_vecs(bdev)); | 122 | written = sync_meta_pages(sbi, META, nrpages); |
116 | mutex_unlock(&sbi->cp_mutex); | 123 | mutex_unlock(&sbi->cp_mutex); |
117 | wbc->nr_to_write -= written; | 124 | wbc->nr_to_write -= written; |
118 | return 0; | 125 | return 0; |
@@ -121,7 +128,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping, | |||
121 | long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, | 128 | long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, |
122 | long nr_to_write) | 129 | long nr_to_write) |
123 | { | 130 | { |
124 | struct address_space *mapping = sbi->meta_inode->i_mapping; | 131 | struct address_space *mapping = META_MAPPING(sbi); |
125 | pgoff_t index = 0, end = LONG_MAX; | 132 | pgoff_t index = 0, end = LONG_MAX; |
126 | struct pagevec pvec; | 133 | struct pagevec pvec; |
127 | long nwritten = 0; | 134 | long nwritten = 0; |
@@ -136,7 +143,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, | |||
136 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 143 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, |
137 | PAGECACHE_TAG_DIRTY, | 144 | PAGECACHE_TAG_DIRTY, |
138 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 145 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
139 | if (nr_pages == 0) | 146 | if (unlikely(nr_pages == 0)) |
140 | break; | 147 | break; |
141 | 148 | ||
142 | for (i = 0; i < nr_pages; i++) { | 149 | for (i = 0; i < nr_pages; i++) { |
@@ -149,7 +156,8 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, | |||
149 | unlock_page(page); | 156 | unlock_page(page); |
150 | break; | 157 | break; |
151 | } | 158 | } |
152 | if (nwritten++ >= nr_to_write) | 159 | nwritten++; |
160 | if (unlikely(nwritten >= nr_to_write)) | ||
153 | break; | 161 | break; |
154 | } | 162 | } |
155 | pagevec_release(&pvec); | 163 | pagevec_release(&pvec); |
@@ -157,7 +165,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, | |||
157 | } | 165 | } |
158 | 166 | ||
159 | if (nwritten) | 167 | if (nwritten) |
160 | f2fs_submit_bio(sbi, type, nr_to_write == LONG_MAX); | 168 | f2fs_submit_merged_bio(sbi, type, WRITE); |
161 | 169 | ||
162 | return nwritten; | 170 | return nwritten; |
163 | } | 171 | } |
@@ -186,31 +194,24 @@ const struct address_space_operations f2fs_meta_aops = { | |||
186 | 194 | ||
187 | int acquire_orphan_inode(struct f2fs_sb_info *sbi) | 195 | int acquire_orphan_inode(struct f2fs_sb_info *sbi) |
188 | { | 196 | { |
189 | unsigned int max_orphans; | ||
190 | int err = 0; | 197 | int err = 0; |
191 | 198 | ||
192 | /* | 199 | spin_lock(&sbi->orphan_inode_lock); |
193 | * considering 512 blocks in a segment 5 blocks are needed for cp | 200 | if (unlikely(sbi->n_orphans >= sbi->max_orphans)) |
194 | * and log segment summaries. Remaining blocks are used to keep | ||
195 | * orphan entries with the limitation one reserved segment | ||
196 | * for cp pack we can have max 1020*507 orphan entries | ||
197 | */ | ||
198 | max_orphans = (sbi->blocks_per_seg - 5) * F2FS_ORPHANS_PER_BLOCK; | ||
199 | mutex_lock(&sbi->orphan_inode_mutex); | ||
200 | if (sbi->n_orphans >= max_orphans) | ||
201 | err = -ENOSPC; | 201 | err = -ENOSPC; |
202 | else | 202 | else |
203 | sbi->n_orphans++; | 203 | sbi->n_orphans++; |
204 | mutex_unlock(&sbi->orphan_inode_mutex); | 204 | spin_unlock(&sbi->orphan_inode_lock); |
205 | |||
205 | return err; | 206 | return err; |
206 | } | 207 | } |
207 | 208 | ||
208 | void release_orphan_inode(struct f2fs_sb_info *sbi) | 209 | void release_orphan_inode(struct f2fs_sb_info *sbi) |
209 | { | 210 | { |
210 | mutex_lock(&sbi->orphan_inode_mutex); | 211 | spin_lock(&sbi->orphan_inode_lock); |
211 | f2fs_bug_on(sbi->n_orphans == 0); | 212 | f2fs_bug_on(sbi->n_orphans == 0); |
212 | sbi->n_orphans--; | 213 | sbi->n_orphans--; |
213 | mutex_unlock(&sbi->orphan_inode_mutex); | 214 | spin_unlock(&sbi->orphan_inode_lock); |
214 | } | 215 | } |
215 | 216 | ||
216 | void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) | 217 | void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) |
@@ -218,27 +219,30 @@ void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) | |||
218 | struct list_head *head, *this; | 219 | struct list_head *head, *this; |
219 | struct orphan_inode_entry *new = NULL, *orphan = NULL; | 220 | struct orphan_inode_entry *new = NULL, *orphan = NULL; |
220 | 221 | ||
221 | mutex_lock(&sbi->orphan_inode_mutex); | 222 | new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); |
223 | new->ino = ino; | ||
224 | |||
225 | spin_lock(&sbi->orphan_inode_lock); | ||
222 | head = &sbi->orphan_inode_list; | 226 | head = &sbi->orphan_inode_list; |
223 | list_for_each(this, head) { | 227 | list_for_each(this, head) { |
224 | orphan = list_entry(this, struct orphan_inode_entry, list); | 228 | orphan = list_entry(this, struct orphan_inode_entry, list); |
225 | if (orphan->ino == ino) | 229 | if (orphan->ino == ino) { |
226 | goto out; | 230 | spin_unlock(&sbi->orphan_inode_lock); |
231 | kmem_cache_free(orphan_entry_slab, new); | ||
232 | return; | ||
233 | } | ||
234 | |||
227 | if (orphan->ino > ino) | 235 | if (orphan->ino > ino) |
228 | break; | 236 | break; |
229 | orphan = NULL; | 237 | orphan = NULL; |
230 | } | 238 | } |
231 | 239 | ||
232 | new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); | ||
233 | new->ino = ino; | ||
234 | |||
235 | /* add new_oentry into list which is sorted by inode number */ | 240 | /* add new_oentry into list which is sorted by inode number */ |
236 | if (orphan) | 241 | if (orphan) |
237 | list_add(&new->list, this->prev); | 242 | list_add(&new->list, this->prev); |
238 | else | 243 | else |
239 | list_add_tail(&new->list, head); | 244 | list_add_tail(&new->list, head); |
240 | out: | 245 | spin_unlock(&sbi->orphan_inode_lock); |
241 | mutex_unlock(&sbi->orphan_inode_mutex); | ||
242 | } | 246 | } |
243 | 247 | ||
244 | void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) | 248 | void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) |
@@ -246,7 +250,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) | |||
246 | struct list_head *head; | 250 | struct list_head *head; |
247 | struct orphan_inode_entry *orphan; | 251 | struct orphan_inode_entry *orphan; |
248 | 252 | ||
249 | mutex_lock(&sbi->orphan_inode_mutex); | 253 | spin_lock(&sbi->orphan_inode_lock); |
250 | head = &sbi->orphan_inode_list; | 254 | head = &sbi->orphan_inode_list; |
251 | list_for_each_entry(orphan, head, list) { | 255 | list_for_each_entry(orphan, head, list) { |
252 | if (orphan->ino == ino) { | 256 | if (orphan->ino == ino) { |
@@ -257,7 +261,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) | |||
257 | break; | 261 | break; |
258 | } | 262 | } |
259 | } | 263 | } |
260 | mutex_unlock(&sbi->orphan_inode_mutex); | 264 | spin_unlock(&sbi->orphan_inode_lock); |
261 | } | 265 | } |
262 | 266 | ||
263 | static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) | 267 | static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) |
@@ -270,12 +274,12 @@ static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) | |||
270 | iput(inode); | 274 | iput(inode); |
271 | } | 275 | } |
272 | 276 | ||
273 | int recover_orphan_inodes(struct f2fs_sb_info *sbi) | 277 | void recover_orphan_inodes(struct f2fs_sb_info *sbi) |
274 | { | 278 | { |
275 | block_t start_blk, orphan_blkaddr, i, j; | 279 | block_t start_blk, orphan_blkaddr, i, j; |
276 | 280 | ||
277 | if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) | 281 | if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) |
278 | return 0; | 282 | return; |
279 | 283 | ||
280 | sbi->por_doing = true; | 284 | sbi->por_doing = true; |
281 | start_blk = __start_cp_addr(sbi) + 1; | 285 | start_blk = __start_cp_addr(sbi) + 1; |
@@ -295,29 +299,39 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) | |||
295 | /* clear Orphan Flag */ | 299 | /* clear Orphan Flag */ |
296 | clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); | 300 | clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); |
297 | sbi->por_doing = false; | 301 | sbi->por_doing = false; |
298 | return 0; | 302 | return; |
299 | } | 303 | } |
300 | 304 | ||
301 | static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) | 305 | static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) |
302 | { | 306 | { |
303 | struct list_head *head, *this, *next; | 307 | struct list_head *head; |
304 | struct f2fs_orphan_block *orphan_blk = NULL; | 308 | struct f2fs_orphan_block *orphan_blk = NULL; |
305 | struct page *page = NULL; | ||
306 | unsigned int nentries = 0; | 309 | unsigned int nentries = 0; |
307 | unsigned short index = 1; | 310 | unsigned short index; |
308 | unsigned short orphan_blocks; | 311 | unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans + |
309 | |||
310 | orphan_blocks = (unsigned short)((sbi->n_orphans + | ||
311 | (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); | 312 | (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); |
313 | struct page *page = NULL; | ||
314 | struct orphan_inode_entry *orphan = NULL; | ||
315 | |||
316 | for (index = 0; index < orphan_blocks; index++) | ||
317 | grab_meta_page(sbi, start_blk + index); | ||
312 | 318 | ||
313 | mutex_lock(&sbi->orphan_inode_mutex); | 319 | index = 1; |
320 | spin_lock(&sbi->orphan_inode_lock); | ||
314 | head = &sbi->orphan_inode_list; | 321 | head = &sbi->orphan_inode_list; |
315 | 322 | ||
316 | /* loop for each orphan inode entry and write them in Jornal block */ | 323 | /* loop for each orphan inode entry and write them in Jornal block */ |
317 | list_for_each_safe(this, next, head) { | 324 | list_for_each_entry(orphan, head, list) { |
318 | struct orphan_inode_entry *orphan; | 325 | if (!page) { |
326 | page = find_get_page(META_MAPPING(sbi), start_blk++); | ||
327 | f2fs_bug_on(!page); | ||
328 | orphan_blk = | ||
329 | (struct f2fs_orphan_block *)page_address(page); | ||
330 | memset(orphan_blk, 0, sizeof(*orphan_blk)); | ||
331 | f2fs_put_page(page, 0); | ||
332 | } | ||
319 | 333 | ||
320 | orphan = list_entry(this, struct orphan_inode_entry, list); | 334 | orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); |
321 | 335 | ||
322 | if (nentries == F2FS_ORPHANS_PER_BLOCK) { | 336 | if (nentries == F2FS_ORPHANS_PER_BLOCK) { |
323 | /* | 337 | /* |
@@ -331,29 +345,20 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) | |||
331 | set_page_dirty(page); | 345 | set_page_dirty(page); |
332 | f2fs_put_page(page, 1); | 346 | f2fs_put_page(page, 1); |
333 | index++; | 347 | index++; |
334 | start_blk++; | ||
335 | nentries = 0; | 348 | nentries = 0; |
336 | page = NULL; | 349 | page = NULL; |
337 | } | 350 | } |
338 | if (page) | 351 | } |
339 | goto page_exist; | ||
340 | 352 | ||
341 | page = grab_meta_page(sbi, start_blk); | 353 | if (page) { |
342 | orphan_blk = (struct f2fs_orphan_block *)page_address(page); | 354 | orphan_blk->blk_addr = cpu_to_le16(index); |
343 | memset(orphan_blk, 0, sizeof(*orphan_blk)); | 355 | orphan_blk->blk_count = cpu_to_le16(orphan_blocks); |
344 | page_exist: | 356 | orphan_blk->entry_count = cpu_to_le32(nentries); |
345 | orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); | 357 | set_page_dirty(page); |
358 | f2fs_put_page(page, 1); | ||
346 | } | 359 | } |
347 | if (!page) | ||
348 | goto end; | ||
349 | 360 | ||
350 | orphan_blk->blk_addr = cpu_to_le16(index); | 361 | spin_unlock(&sbi->orphan_inode_lock); |
351 | orphan_blk->blk_count = cpu_to_le16(orphan_blocks); | ||
352 | orphan_blk->entry_count = cpu_to_le32(nentries); | ||
353 | set_page_dirty(page); | ||
354 | f2fs_put_page(page, 1); | ||
355 | end: | ||
356 | mutex_unlock(&sbi->orphan_inode_mutex); | ||
357 | } | 362 | } |
358 | 363 | ||
359 | static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, | 364 | static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, |
@@ -428,7 +433,8 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) | |||
428 | cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version); | 433 | cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version); |
429 | 434 | ||
430 | /* The second checkpoint pack should start at the next segment */ | 435 | /* The second checkpoint pack should start at the next segment */ |
431 | cp_start_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg); | 436 | cp_start_blk_no += ((unsigned long long)1) << |
437 | le32_to_cpu(fsb->log_blocks_per_seg); | ||
432 | cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version); | 438 | cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version); |
433 | 439 | ||
434 | if (cp1 && cp2) { | 440 | if (cp1 && cp2) { |
@@ -465,7 +471,7 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) | |||
465 | list_for_each(this, head) { | 471 | list_for_each(this, head) { |
466 | struct dir_inode_entry *entry; | 472 | struct dir_inode_entry *entry; |
467 | entry = list_entry(this, struct dir_inode_entry, list); | 473 | entry = list_entry(this, struct dir_inode_entry, list); |
468 | if (entry->inode == inode) | 474 | if (unlikely(entry->inode == inode)) |
469 | return -EEXIST; | 475 | return -EEXIST; |
470 | } | 476 | } |
471 | list_add_tail(&new->list, head); | 477 | list_add_tail(&new->list, head); |
@@ -513,8 +519,8 @@ void add_dirty_dir_inode(struct inode *inode) | |||
513 | void remove_dirty_dir_inode(struct inode *inode) | 519 | void remove_dirty_dir_inode(struct inode *inode) |
514 | { | 520 | { |
515 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 521 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
516 | struct list_head *head = &sbi->dir_inode_list; | 522 | |
517 | struct list_head *this; | 523 | struct list_head *this, *head; |
518 | 524 | ||
519 | if (!S_ISDIR(inode->i_mode)) | 525 | if (!S_ISDIR(inode->i_mode)) |
520 | return; | 526 | return; |
@@ -525,6 +531,7 @@ void remove_dirty_dir_inode(struct inode *inode) | |||
525 | return; | 531 | return; |
526 | } | 532 | } |
527 | 533 | ||
534 | head = &sbi->dir_inode_list; | ||
528 | list_for_each(this, head) { | 535 | list_for_each(this, head) { |
529 | struct dir_inode_entry *entry; | 536 | struct dir_inode_entry *entry; |
530 | entry = list_entry(this, struct dir_inode_entry, list); | 537 | entry = list_entry(this, struct dir_inode_entry, list); |
@@ -546,11 +553,13 @@ void remove_dirty_dir_inode(struct inode *inode) | |||
546 | 553 | ||
547 | struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) | 554 | struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) |
548 | { | 555 | { |
549 | struct list_head *head = &sbi->dir_inode_list; | 556 | |
550 | struct list_head *this; | 557 | struct list_head *this, *head; |
551 | struct inode *inode = NULL; | 558 | struct inode *inode = NULL; |
552 | 559 | ||
553 | spin_lock(&sbi->dir_inode_lock); | 560 | spin_lock(&sbi->dir_inode_lock); |
561 | |||
562 | head = &sbi->dir_inode_list; | ||
554 | list_for_each(this, head) { | 563 | list_for_each(this, head) { |
555 | struct dir_inode_entry *entry; | 564 | struct dir_inode_entry *entry; |
556 | entry = list_entry(this, struct dir_inode_entry, list); | 565 | entry = list_entry(this, struct dir_inode_entry, list); |
@@ -565,11 +574,13 @@ struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) | |||
565 | 574 | ||
566 | void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) | 575 | void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) |
567 | { | 576 | { |
568 | struct list_head *head = &sbi->dir_inode_list; | 577 | struct list_head *head; |
569 | struct dir_inode_entry *entry; | 578 | struct dir_inode_entry *entry; |
570 | struct inode *inode; | 579 | struct inode *inode; |
571 | retry: | 580 | retry: |
572 | spin_lock(&sbi->dir_inode_lock); | 581 | spin_lock(&sbi->dir_inode_lock); |
582 | |||
583 | head = &sbi->dir_inode_list; | ||
573 | if (list_empty(head)) { | 584 | if (list_empty(head)) { |
574 | spin_unlock(&sbi->dir_inode_lock); | 585 | spin_unlock(&sbi->dir_inode_lock); |
575 | return; | 586 | return; |
@@ -585,7 +596,7 @@ retry: | |||
585 | * We should submit bio, since it exists several | 596 | * We should submit bio, since it exists several |
586 | * wribacking dentry pages in the freeing inode. | 597 | * wribacking dentry pages in the freeing inode. |
587 | */ | 598 | */ |
588 | f2fs_submit_bio(sbi, DATA, true); | 599 | f2fs_submit_merged_bio(sbi, DATA, WRITE); |
589 | } | 600 | } |
590 | goto retry; | 601 | goto retry; |
591 | } | 602 | } |
@@ -760,8 +771,8 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
760 | /* wait for previous submitted node/meta pages writeback */ | 771 | /* wait for previous submitted node/meta pages writeback */ |
761 | wait_on_all_pages_writeback(sbi); | 772 | wait_on_all_pages_writeback(sbi); |
762 | 773 | ||
763 | filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX); | 774 | filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX); |
764 | filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX); | 775 | filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX); |
765 | 776 | ||
766 | /* update user_block_counts */ | 777 | /* update user_block_counts */ |
767 | sbi->last_valid_block_count = sbi->total_valid_block_count; | 778 | sbi->last_valid_block_count = sbi->total_valid_block_count; |
@@ -770,7 +781,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
770 | /* Here, we only have one bio having CP pack */ | 781 | /* Here, we only have one bio having CP pack */ |
771 | sync_meta_pages(sbi, META_FLUSH, LONG_MAX); | 782 | sync_meta_pages(sbi, META_FLUSH, LONG_MAX); |
772 | 783 | ||
773 | if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { | 784 | if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) { |
774 | clear_prefree_segments(sbi); | 785 | clear_prefree_segments(sbi); |
775 | F2FS_RESET_SB_DIRT(sbi); | 786 | F2FS_RESET_SB_DIRT(sbi); |
776 | } | 787 | } |
@@ -791,9 +802,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
791 | 802 | ||
792 | trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); | 803 | trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); |
793 | 804 | ||
794 | f2fs_submit_bio(sbi, DATA, true); | 805 | f2fs_submit_merged_bio(sbi, DATA, WRITE); |
795 | f2fs_submit_bio(sbi, NODE, true); | 806 | f2fs_submit_merged_bio(sbi, NODE, WRITE); |
796 | f2fs_submit_bio(sbi, META, true); | 807 | f2fs_submit_merged_bio(sbi, META, WRITE); |
797 | 808 | ||
798 | /* | 809 | /* |
799 | * update checkpoint pack index | 810 | * update checkpoint pack index |
@@ -818,20 +829,28 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
818 | 829 | ||
819 | void init_orphan_info(struct f2fs_sb_info *sbi) | 830 | void init_orphan_info(struct f2fs_sb_info *sbi) |
820 | { | 831 | { |
821 | mutex_init(&sbi->orphan_inode_mutex); | 832 | spin_lock_init(&sbi->orphan_inode_lock); |
822 | INIT_LIST_HEAD(&sbi->orphan_inode_list); | 833 | INIT_LIST_HEAD(&sbi->orphan_inode_list); |
823 | sbi->n_orphans = 0; | 834 | sbi->n_orphans = 0; |
835 | /* | ||
836 | * considering 512 blocks in a segment 8 blocks are needed for cp | ||
837 | * and log segment summaries. Remaining blocks are used to keep | ||
838 | * orphan entries with the limitation one reserved segment | ||
839 | * for cp pack we can have max 1020*504 orphan entries | ||
840 | */ | ||
841 | sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE) | ||
842 | * F2FS_ORPHANS_PER_BLOCK; | ||
824 | } | 843 | } |
825 | 844 | ||
826 | int __init create_checkpoint_caches(void) | 845 | int __init create_checkpoint_caches(void) |
827 | { | 846 | { |
828 | orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", | 847 | orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", |
829 | sizeof(struct orphan_inode_entry), NULL); | 848 | sizeof(struct orphan_inode_entry), NULL); |
830 | if (unlikely(!orphan_entry_slab)) | 849 | if (!orphan_entry_slab) |
831 | return -ENOMEM; | 850 | return -ENOMEM; |
832 | inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", | 851 | inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", |
833 | sizeof(struct dir_inode_entry), NULL); | 852 | sizeof(struct dir_inode_entry), NULL); |
834 | if (unlikely(!inode_entry_slab)) { | 853 | if (!inode_entry_slab) { |
835 | kmem_cache_destroy(orphan_entry_slab); | 854 | kmem_cache_destroy(orphan_entry_slab); |
836 | return -ENOMEM; | 855 | return -ENOMEM; |
837 | } | 856 | } |
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index aa3438c571fa..0ae558723506 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c | |||
@@ -24,6 +24,195 @@ | |||
24 | #include "segment.h" | 24 | #include "segment.h" |
25 | #include <trace/events/f2fs.h> | 25 | #include <trace/events/f2fs.h> |
26 | 26 | ||
27 | static void f2fs_read_end_io(struct bio *bio, int err) | ||
28 | { | ||
29 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
30 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
31 | |||
32 | do { | ||
33 | struct page *page = bvec->bv_page; | ||
34 | |||
35 | if (--bvec >= bio->bi_io_vec) | ||
36 | prefetchw(&bvec->bv_page->flags); | ||
37 | |||
38 | if (unlikely(!uptodate)) { | ||
39 | ClearPageUptodate(page); | ||
40 | SetPageError(page); | ||
41 | } else { | ||
42 | SetPageUptodate(page); | ||
43 | } | ||
44 | unlock_page(page); | ||
45 | } while (bvec >= bio->bi_io_vec); | ||
46 | |||
47 | bio_put(bio); | ||
48 | } | ||
49 | |||
50 | static void f2fs_write_end_io(struct bio *bio, int err) | ||
51 | { | ||
52 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
53 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
54 | struct f2fs_sb_info *sbi = F2FS_SB(bvec->bv_page->mapping->host->i_sb); | ||
55 | |||
56 | do { | ||
57 | struct page *page = bvec->bv_page; | ||
58 | |||
59 | if (--bvec >= bio->bi_io_vec) | ||
60 | prefetchw(&bvec->bv_page->flags); | ||
61 | |||
62 | if (unlikely(!uptodate)) { | ||
63 | SetPageError(page); | ||
64 | set_bit(AS_EIO, &page->mapping->flags); | ||
65 | set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); | ||
66 | sbi->sb->s_flags |= MS_RDONLY; | ||
67 | } | ||
68 | end_page_writeback(page); | ||
69 | dec_page_count(sbi, F2FS_WRITEBACK); | ||
70 | } while (bvec >= bio->bi_io_vec); | ||
71 | |||
72 | if (bio->bi_private) | ||
73 | complete(bio->bi_private); | ||
74 | |||
75 | if (!get_pages(sbi, F2FS_WRITEBACK) && | ||
76 | !list_empty(&sbi->cp_wait.task_list)) | ||
77 | wake_up(&sbi->cp_wait); | ||
78 | |||
79 | bio_put(bio); | ||
80 | } | ||
81 | |||
82 | /* | ||
83 | * Low-level block read/write IO operations. | ||
84 | */ | ||
85 | static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, | ||
86 | int npages, bool is_read) | ||
87 | { | ||
88 | struct bio *bio; | ||
89 | |||
90 | /* No failure on bio allocation */ | ||
91 | bio = bio_alloc(GFP_NOIO, npages); | ||
92 | |||
93 | bio->bi_bdev = sbi->sb->s_bdev; | ||
94 | bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); | ||
95 | bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; | ||
96 | |||
97 | return bio; | ||
98 | } | ||
99 | |||
100 | static void __submit_merged_bio(struct f2fs_bio_info *io) | ||
101 | { | ||
102 | struct f2fs_io_info *fio = &io->fio; | ||
103 | int rw; | ||
104 | |||
105 | if (!io->bio) | ||
106 | return; | ||
107 | |||
108 | rw = fio->rw; | ||
109 | |||
110 | if (is_read_io(rw)) { | ||
111 | trace_f2fs_submit_read_bio(io->sbi->sb, rw, | ||
112 | fio->type, io->bio); | ||
113 | submit_bio(rw, io->bio); | ||
114 | } else { | ||
115 | trace_f2fs_submit_write_bio(io->sbi->sb, rw, | ||
116 | fio->type, io->bio); | ||
117 | /* | ||
118 | * META_FLUSH is only from the checkpoint procedure, and we | ||
119 | * should wait this metadata bio for FS consistency. | ||
120 | */ | ||
121 | if (fio->type == META_FLUSH) { | ||
122 | DECLARE_COMPLETION_ONSTACK(wait); | ||
123 | io->bio->bi_private = &wait; | ||
124 | submit_bio(rw, io->bio); | ||
125 | wait_for_completion(&wait); | ||
126 | } else { | ||
127 | submit_bio(rw, io->bio); | ||
128 | } | ||
129 | } | ||
130 | |||
131 | io->bio = NULL; | ||
132 | } | ||
133 | |||
134 | void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, | ||
135 | enum page_type type, int rw) | ||
136 | { | ||
137 | enum page_type btype = PAGE_TYPE_OF_BIO(type); | ||
138 | struct f2fs_bio_info *io; | ||
139 | |||
140 | io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype]; | ||
141 | |||
142 | mutex_lock(&io->io_mutex); | ||
143 | |||
144 | /* change META to META_FLUSH in the checkpoint procedure */ | ||
145 | if (type >= META_FLUSH) { | ||
146 | io->fio.type = META_FLUSH; | ||
147 | io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO; | ||
148 | } | ||
149 | __submit_merged_bio(io); | ||
150 | mutex_unlock(&io->io_mutex); | ||
151 | } | ||
152 | |||
153 | /* | ||
154 | * Fill the locked page with data located in the block address. | ||
155 | * Return unlocked page. | ||
156 | */ | ||
157 | int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, | ||
158 | block_t blk_addr, int rw) | ||
159 | { | ||
160 | struct bio *bio; | ||
161 | |||
162 | trace_f2fs_submit_page_bio(page, blk_addr, rw); | ||
163 | |||
164 | /* Allocate a new bio */ | ||
165 | bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw)); | ||
166 | |||
167 | if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { | ||
168 | bio_put(bio); | ||
169 | f2fs_put_page(page, 1); | ||
170 | return -EFAULT; | ||
171 | } | ||
172 | |||
173 | submit_bio(rw, bio); | ||
174 | return 0; | ||
175 | } | ||
176 | |||
177 | void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, | ||
178 | block_t blk_addr, struct f2fs_io_info *fio) | ||
179 | { | ||
180 | enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); | ||
181 | struct f2fs_bio_info *io; | ||
182 | bool is_read = is_read_io(fio->rw); | ||
183 | |||
184 | io = is_read ? &sbi->read_io : &sbi->write_io[btype]; | ||
185 | |||
186 | verify_block_addr(sbi, blk_addr); | ||
187 | |||
188 | mutex_lock(&io->io_mutex); | ||
189 | |||
190 | if (!is_read) | ||
191 | inc_page_count(sbi, F2FS_WRITEBACK); | ||
192 | |||
193 | if (io->bio && (io->last_block_in_bio != blk_addr - 1 || | ||
194 | io->fio.rw != fio->rw)) | ||
195 | __submit_merged_bio(io); | ||
196 | alloc_new: | ||
197 | if (io->bio == NULL) { | ||
198 | int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); | ||
199 | |||
200 | io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read); | ||
201 | io->fio = *fio; | ||
202 | } | ||
203 | |||
204 | if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) < | ||
205 | PAGE_CACHE_SIZE) { | ||
206 | __submit_merged_bio(io); | ||
207 | goto alloc_new; | ||
208 | } | ||
209 | |||
210 | io->last_block_in_bio = blk_addr; | ||
211 | |||
212 | mutex_unlock(&io->io_mutex); | ||
213 | trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr); | ||
214 | } | ||
215 | |||
27 | /* | 216 | /* |
28 | * Lock ordering for the change of data block address: | 217 | * Lock ordering for the change of data block address: |
29 | * ->data_page | 218 | * ->data_page |
@@ -37,7 +226,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) | |||
37 | struct page *node_page = dn->node_page; | 226 | struct page *node_page = dn->node_page; |
38 | unsigned int ofs_in_node = dn->ofs_in_node; | 227 | unsigned int ofs_in_node = dn->ofs_in_node; |
39 | 228 | ||
40 | f2fs_wait_on_page_writeback(node_page, NODE, false); | 229 | f2fs_wait_on_page_writeback(node_page, NODE); |
41 | 230 | ||
42 | rn = F2FS_NODE(node_page); | 231 | rn = F2FS_NODE(node_page); |
43 | 232 | ||
@@ -51,19 +240,39 @@ int reserve_new_block(struct dnode_of_data *dn) | |||
51 | { | 240 | { |
52 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | 241 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); |
53 | 242 | ||
54 | if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) | 243 | if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) |
55 | return -EPERM; | 244 | return -EPERM; |
56 | if (!inc_valid_block_count(sbi, dn->inode, 1)) | 245 | if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) |
57 | return -ENOSPC; | 246 | return -ENOSPC; |
58 | 247 | ||
59 | trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); | 248 | trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); |
60 | 249 | ||
61 | __set_data_blkaddr(dn, NEW_ADDR); | 250 | __set_data_blkaddr(dn, NEW_ADDR); |
62 | dn->data_blkaddr = NEW_ADDR; | 251 | dn->data_blkaddr = NEW_ADDR; |
252 | mark_inode_dirty(dn->inode); | ||
63 | sync_inode_page(dn); | 253 | sync_inode_page(dn); |
64 | return 0; | 254 | return 0; |
65 | } | 255 | } |
66 | 256 | ||
257 | int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) | ||
258 | { | ||
259 | bool need_put = dn->inode_page ? false : true; | ||
260 | int err; | ||
261 | |||
262 | /* if inode_page exists, index should be zero */ | ||
263 | f2fs_bug_on(!need_put && index); | ||
264 | |||
265 | err = get_dnode_of_data(dn, index, ALLOC_NODE); | ||
266 | if (err) | ||
267 | return err; | ||
268 | |||
269 | if (dn->data_blkaddr == NULL_ADDR) | ||
270 | err = reserve_new_block(dn); | ||
271 | if (err || need_put) | ||
272 | f2fs_put_dnode(dn); | ||
273 | return err; | ||
274 | } | ||
275 | |||
67 | static int check_extent_cache(struct inode *inode, pgoff_t pgofs, | 276 | static int check_extent_cache(struct inode *inode, pgoff_t pgofs, |
68 | struct buffer_head *bh_result) | 277 | struct buffer_head *bh_result) |
69 | { | 278 | { |
@@ -71,6 +280,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, | |||
71 | pgoff_t start_fofs, end_fofs; | 280 | pgoff_t start_fofs, end_fofs; |
72 | block_t start_blkaddr; | 281 | block_t start_blkaddr; |
73 | 282 | ||
283 | if (is_inode_flag_set(fi, FI_NO_EXTENT)) | ||
284 | return 0; | ||
285 | |||
74 | read_lock(&fi->ext.ext_lock); | 286 | read_lock(&fi->ext.ext_lock); |
75 | if (fi->ext.len == 0) { | 287 | if (fi->ext.len == 0) { |
76 | read_unlock(&fi->ext.ext_lock); | 288 | read_unlock(&fi->ext.ext_lock); |
@@ -109,6 +321,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) | |||
109 | struct f2fs_inode_info *fi = F2FS_I(dn->inode); | 321 | struct f2fs_inode_info *fi = F2FS_I(dn->inode); |
110 | pgoff_t fofs, start_fofs, end_fofs; | 322 | pgoff_t fofs, start_fofs, end_fofs; |
111 | block_t start_blkaddr, end_blkaddr; | 323 | block_t start_blkaddr, end_blkaddr; |
324 | int need_update = true; | ||
112 | 325 | ||
113 | f2fs_bug_on(blk_addr == NEW_ADDR); | 326 | f2fs_bug_on(blk_addr == NEW_ADDR); |
114 | fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + | 327 | fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + |
@@ -117,6 +330,9 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) | |||
117 | /* Update the page address in the parent node */ | 330 | /* Update the page address in the parent node */ |
118 | __set_data_blkaddr(dn, blk_addr); | 331 | __set_data_blkaddr(dn, blk_addr); |
119 | 332 | ||
333 | if (is_inode_flag_set(fi, FI_NO_EXTENT)) | ||
334 | return; | ||
335 | |||
120 | write_lock(&fi->ext.ext_lock); | 336 | write_lock(&fi->ext.ext_lock); |
121 | 337 | ||
122 | start_fofs = fi->ext.fofs; | 338 | start_fofs = fi->ext.fofs; |
@@ -163,14 +379,21 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) | |||
163 | fofs - start_fofs + 1; | 379 | fofs - start_fofs + 1; |
164 | fi->ext.len -= fofs - start_fofs + 1; | 380 | fi->ext.len -= fofs - start_fofs + 1; |
165 | } | 381 | } |
166 | goto end_update; | 382 | } else { |
383 | need_update = false; | ||
167 | } | 384 | } |
168 | write_unlock(&fi->ext.ext_lock); | ||
169 | return; | ||
170 | 385 | ||
386 | /* Finally, if the extent is very fragmented, let's drop the cache. */ | ||
387 | if (fi->ext.len < F2FS_MIN_EXTENT_LEN) { | ||
388 | fi->ext.len = 0; | ||
389 | set_inode_flag(fi, FI_NO_EXTENT); | ||
390 | need_update = true; | ||
391 | } | ||
171 | end_update: | 392 | end_update: |
172 | write_unlock(&fi->ext.ext_lock); | 393 | write_unlock(&fi->ext.ext_lock); |
173 | sync_inode_page(dn); | 394 | if (need_update) |
395 | sync_inode_page(dn); | ||
396 | return; | ||
174 | } | 397 | } |
175 | 398 | ||
176 | struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) | 399 | struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) |
@@ -196,7 +419,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) | |||
196 | return ERR_PTR(-ENOENT); | 419 | return ERR_PTR(-ENOENT); |
197 | 420 | ||
198 | /* By fallocate(), there is no cached page, but with NEW_ADDR */ | 421 | /* By fallocate(), there is no cached page, but with NEW_ADDR */ |
199 | if (dn.data_blkaddr == NEW_ADDR) | 422 | if (unlikely(dn.data_blkaddr == NEW_ADDR)) |
200 | return ERR_PTR(-EINVAL); | 423 | return ERR_PTR(-EINVAL); |
201 | 424 | ||
202 | page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); | 425 | page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); |
@@ -208,11 +431,14 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) | |||
208 | return page; | 431 | return page; |
209 | } | 432 | } |
210 | 433 | ||
211 | err = f2fs_readpage(sbi, page, dn.data_blkaddr, | 434 | err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, |
212 | sync ? READ_SYNC : READA); | 435 | sync ? READ_SYNC : READA); |
436 | if (err) | ||
437 | return ERR_PTR(err); | ||
438 | |||
213 | if (sync) { | 439 | if (sync) { |
214 | wait_on_page_locked(page); | 440 | wait_on_page_locked(page); |
215 | if (!PageUptodate(page)) { | 441 | if (unlikely(!PageUptodate(page))) { |
216 | f2fs_put_page(page, 0); | 442 | f2fs_put_page(page, 0); |
217 | return ERR_PTR(-EIO); | 443 | return ERR_PTR(-EIO); |
218 | } | 444 | } |
@@ -246,7 +472,7 @@ repeat: | |||
246 | } | 472 | } |
247 | f2fs_put_dnode(&dn); | 473 | f2fs_put_dnode(&dn); |
248 | 474 | ||
249 | if (dn.data_blkaddr == NULL_ADDR) { | 475 | if (unlikely(dn.data_blkaddr == NULL_ADDR)) { |
250 | f2fs_put_page(page, 1); | 476 | f2fs_put_page(page, 1); |
251 | return ERR_PTR(-ENOENT); | 477 | return ERR_PTR(-ENOENT); |
252 | } | 478 | } |
@@ -266,16 +492,16 @@ repeat: | |||
266 | return page; | 492 | return page; |
267 | } | 493 | } |
268 | 494 | ||
269 | err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); | 495 | err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC); |
270 | if (err) | 496 | if (err) |
271 | return ERR_PTR(err); | 497 | return ERR_PTR(err); |
272 | 498 | ||
273 | lock_page(page); | 499 | lock_page(page); |
274 | if (!PageUptodate(page)) { | 500 | if (unlikely(!PageUptodate(page))) { |
275 | f2fs_put_page(page, 1); | 501 | f2fs_put_page(page, 1); |
276 | return ERR_PTR(-EIO); | 502 | return ERR_PTR(-EIO); |
277 | } | 503 | } |
278 | if (page->mapping != mapping) { | 504 | if (unlikely(page->mapping != mapping)) { |
279 | f2fs_put_page(page, 1); | 505 | f2fs_put_page(page, 1); |
280 | goto repeat; | 506 | goto repeat; |
281 | } | 507 | } |
@@ -286,12 +512,12 @@ repeat: | |||
286 | * Caller ensures that this data page is never allocated. | 512 | * Caller ensures that this data page is never allocated. |
287 | * A new zero-filled data page is allocated in the page cache. | 513 | * A new zero-filled data page is allocated in the page cache. |
288 | * | 514 | * |
289 | * Also, caller should grab and release a mutex by calling mutex_lock_op() and | 515 | * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and |
290 | * mutex_unlock_op(). | 516 | * f2fs_unlock_op(). |
291 | * Note that, npage is set only by make_empty_dir. | 517 | * Note that, ipage is set only by make_empty_dir. |
292 | */ | 518 | */ |
293 | struct page *get_new_data_page(struct inode *inode, | 519 | struct page *get_new_data_page(struct inode *inode, |
294 | struct page *npage, pgoff_t index, bool new_i_size) | 520 | struct page *ipage, pgoff_t index, bool new_i_size) |
295 | { | 521 | { |
296 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 522 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
297 | struct address_space *mapping = inode->i_mapping; | 523 | struct address_space *mapping = inode->i_mapping; |
@@ -299,24 +525,16 @@ struct page *get_new_data_page(struct inode *inode, | |||
299 | struct dnode_of_data dn; | 525 | struct dnode_of_data dn; |
300 | int err; | 526 | int err; |
301 | 527 | ||
302 | set_new_dnode(&dn, inode, npage, npage, 0); | 528 | set_new_dnode(&dn, inode, ipage, NULL, 0); |
303 | err = get_dnode_of_data(&dn, index, ALLOC_NODE); | 529 | err = f2fs_reserve_block(&dn, index); |
304 | if (err) | 530 | if (err) |
305 | return ERR_PTR(err); | 531 | return ERR_PTR(err); |
306 | |||
307 | if (dn.data_blkaddr == NULL_ADDR) { | ||
308 | if (reserve_new_block(&dn)) { | ||
309 | if (!npage) | ||
310 | f2fs_put_dnode(&dn); | ||
311 | return ERR_PTR(-ENOSPC); | ||
312 | } | ||
313 | } | ||
314 | if (!npage) | ||
315 | f2fs_put_dnode(&dn); | ||
316 | repeat: | 532 | repeat: |
317 | page = grab_cache_page(mapping, index); | 533 | page = grab_cache_page(mapping, index); |
318 | if (!page) | 534 | if (!page) { |
319 | return ERR_PTR(-ENOMEM); | 535 | err = -ENOMEM; |
536 | goto put_err; | ||
537 | } | ||
320 | 538 | ||
321 | if (PageUptodate(page)) | 539 | if (PageUptodate(page)) |
322 | return page; | 540 | return page; |
@@ -325,15 +543,18 @@ repeat: | |||
325 | zero_user_segment(page, 0, PAGE_CACHE_SIZE); | 543 | zero_user_segment(page, 0, PAGE_CACHE_SIZE); |
326 | SetPageUptodate(page); | 544 | SetPageUptodate(page); |
327 | } else { | 545 | } else { |
328 | err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); | 546 | err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, |
547 | READ_SYNC); | ||
329 | if (err) | 548 | if (err) |
330 | return ERR_PTR(err); | 549 | goto put_err; |
550 | |||
331 | lock_page(page); | 551 | lock_page(page); |
332 | if (!PageUptodate(page)) { | 552 | if (unlikely(!PageUptodate(page))) { |
333 | f2fs_put_page(page, 1); | 553 | f2fs_put_page(page, 1); |
334 | return ERR_PTR(-EIO); | 554 | err = -EIO; |
555 | goto put_err; | ||
335 | } | 556 | } |
336 | if (page->mapping != mapping) { | 557 | if (unlikely(page->mapping != mapping)) { |
337 | f2fs_put_page(page, 1); | 558 | f2fs_put_page(page, 1); |
338 | goto repeat; | 559 | goto repeat; |
339 | } | 560 | } |
@@ -344,140 +565,187 @@ repeat: | |||
344 | i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); | 565 | i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); |
345 | /* Only the directory inode sets new_i_size */ | 566 | /* Only the directory inode sets new_i_size */ |
346 | set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); | 567 | set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); |
347 | mark_inode_dirty_sync(inode); | ||
348 | } | 568 | } |
349 | return page; | 569 | return page; |
350 | } | ||
351 | |||
352 | static void read_end_io(struct bio *bio, int err) | ||
353 | { | ||
354 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
355 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
356 | 570 | ||
357 | do { | 571 | put_err: |
358 | struct page *page = bvec->bv_page; | 572 | f2fs_put_dnode(&dn); |
359 | 573 | return ERR_PTR(err); | |
360 | if (--bvec >= bio->bi_io_vec) | ||
361 | prefetchw(&bvec->bv_page->flags); | ||
362 | |||
363 | if (uptodate) { | ||
364 | SetPageUptodate(page); | ||
365 | } else { | ||
366 | ClearPageUptodate(page); | ||
367 | SetPageError(page); | ||
368 | } | ||
369 | unlock_page(page); | ||
370 | } while (bvec >= bio->bi_io_vec); | ||
371 | bio_put(bio); | ||
372 | } | 574 | } |
373 | 575 | ||
374 | /* | 576 | static int __allocate_data_block(struct dnode_of_data *dn) |
375 | * Fill the locked page with data located in the block address. | ||
376 | * Return unlocked page. | ||
377 | */ | ||
378 | int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page, | ||
379 | block_t blk_addr, int type) | ||
380 | { | 577 | { |
381 | struct block_device *bdev = sbi->sb->s_bdev; | 578 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); |
382 | struct bio *bio; | 579 | struct f2fs_summary sum; |
580 | block_t new_blkaddr; | ||
581 | struct node_info ni; | ||
582 | int type; | ||
383 | 583 | ||
384 | trace_f2fs_readpage(page, blk_addr, type); | 584 | if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) |
585 | return -EPERM; | ||
586 | if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) | ||
587 | return -ENOSPC; | ||
385 | 588 | ||
386 | down_read(&sbi->bio_sem); | 589 | __set_data_blkaddr(dn, NEW_ADDR); |
590 | dn->data_blkaddr = NEW_ADDR; | ||
387 | 591 | ||
388 | /* Allocate a new bio */ | 592 | get_node_info(sbi, dn->nid, &ni); |
389 | bio = f2fs_bio_alloc(bdev, 1); | 593 | set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); |
390 | 594 | ||
391 | /* Initialize the bio */ | 595 | type = CURSEG_WARM_DATA; |
392 | bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); | ||
393 | bio->bi_end_io = read_end_io; | ||
394 | 596 | ||
395 | if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { | 597 | allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type); |
396 | bio_put(bio); | ||
397 | up_read(&sbi->bio_sem); | ||
398 | f2fs_put_page(page, 1); | ||
399 | return -EFAULT; | ||
400 | } | ||
401 | 598 | ||
402 | submit_bio(type, bio); | 599 | /* direct IO doesn't use extent cache to maximize the performance */ |
403 | up_read(&sbi->bio_sem); | 600 | set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); |
601 | update_extent_cache(new_blkaddr, dn); | ||
602 | clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); | ||
603 | |||
604 | dn->data_blkaddr = new_blkaddr; | ||
404 | return 0; | 605 | return 0; |
405 | } | 606 | } |
406 | 607 | ||
407 | /* | 608 | /* |
408 | * This function should be used by the data read flow only where it | 609 | * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh. |
409 | * does not check the "create" flag that indicates block allocation. | 610 | * If original data blocks are allocated, then give them to blockdev. |
410 | * The reason for this special functionality is to exploit VFS readahead | 611 | * Otherwise, |
411 | * mechanism. | 612 | * a. preallocate requested block addresses |
613 | * b. do not use extent cache for better performance | ||
614 | * c. give the block addresses to blockdev | ||
412 | */ | 615 | */ |
413 | static int get_data_block_ro(struct inode *inode, sector_t iblock, | 616 | static int get_data_block(struct inode *inode, sector_t iblock, |
414 | struct buffer_head *bh_result, int create) | 617 | struct buffer_head *bh_result, int create) |
415 | { | 618 | { |
619 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
416 | unsigned int blkbits = inode->i_sb->s_blocksize_bits; | 620 | unsigned int blkbits = inode->i_sb->s_blocksize_bits; |
417 | unsigned maxblocks = bh_result->b_size >> blkbits; | 621 | unsigned maxblocks = bh_result->b_size >> blkbits; |
418 | struct dnode_of_data dn; | 622 | struct dnode_of_data dn; |
419 | pgoff_t pgofs; | 623 | int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; |
420 | int err; | 624 | pgoff_t pgofs, end_offset; |
625 | int err = 0, ofs = 1; | ||
626 | bool allocated = false; | ||
421 | 627 | ||
422 | /* Get the page offset from the block offset(iblock) */ | 628 | /* Get the page offset from the block offset(iblock) */ |
423 | pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); | 629 | pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); |
424 | 630 | ||
425 | if (check_extent_cache(inode, pgofs, bh_result)) { | 631 | if (check_extent_cache(inode, pgofs, bh_result)) |
426 | trace_f2fs_get_data_block(inode, iblock, bh_result, 0); | 632 | goto out; |
427 | return 0; | 633 | |
428 | } | 634 | if (create) |
635 | f2fs_lock_op(sbi); | ||
429 | 636 | ||
430 | /* When reading holes, we need its node page */ | 637 | /* When reading holes, we need its node page */ |
431 | set_new_dnode(&dn, inode, NULL, NULL, 0); | 638 | set_new_dnode(&dn, inode, NULL, NULL, 0); |
432 | err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); | 639 | err = get_dnode_of_data(&dn, pgofs, mode); |
433 | if (err) { | 640 | if (err) { |
434 | trace_f2fs_get_data_block(inode, iblock, bh_result, err); | 641 | if (err == -ENOENT) |
435 | return (err == -ENOENT) ? 0 : err; | 642 | err = 0; |
643 | goto unlock_out; | ||
644 | } | ||
645 | if (dn.data_blkaddr == NEW_ADDR) | ||
646 | goto put_out; | ||
647 | |||
648 | if (dn.data_blkaddr != NULL_ADDR) { | ||
649 | map_bh(bh_result, inode->i_sb, dn.data_blkaddr); | ||
650 | } else if (create) { | ||
651 | err = __allocate_data_block(&dn); | ||
652 | if (err) | ||
653 | goto put_out; | ||
654 | allocated = true; | ||
655 | map_bh(bh_result, inode->i_sb, dn.data_blkaddr); | ||
656 | } else { | ||
657 | goto put_out; | ||
436 | } | 658 | } |
437 | 659 | ||
438 | /* It does not support data allocation */ | 660 | end_offset = IS_INODE(dn.node_page) ? |
439 | f2fs_bug_on(create); | 661 | ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; |
662 | bh_result->b_size = (((size_t)1) << blkbits); | ||
663 | dn.ofs_in_node++; | ||
664 | pgofs++; | ||
665 | |||
666 | get_next: | ||
667 | if (dn.ofs_in_node >= end_offset) { | ||
668 | if (allocated) | ||
669 | sync_inode_page(&dn); | ||
670 | allocated = false; | ||
671 | f2fs_put_dnode(&dn); | ||
440 | 672 | ||
441 | if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) { | 673 | set_new_dnode(&dn, inode, NULL, NULL, 0); |
442 | int i; | 674 | err = get_dnode_of_data(&dn, pgofs, mode); |
443 | unsigned int end_offset; | 675 | if (err) { |
676 | if (err == -ENOENT) | ||
677 | err = 0; | ||
678 | goto unlock_out; | ||
679 | } | ||
680 | if (dn.data_blkaddr == NEW_ADDR) | ||
681 | goto put_out; | ||
444 | 682 | ||
445 | end_offset = IS_INODE(dn.node_page) ? | 683 | end_offset = IS_INODE(dn.node_page) ? |
446 | ADDRS_PER_INODE(F2FS_I(inode)) : | 684 | ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; |
447 | ADDRS_PER_BLOCK; | 685 | } |
448 | |||
449 | clear_buffer_new(bh_result); | ||
450 | 686 | ||
687 | if (maxblocks > (bh_result->b_size >> blkbits)) { | ||
688 | block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); | ||
689 | if (blkaddr == NULL_ADDR && create) { | ||
690 | err = __allocate_data_block(&dn); | ||
691 | if (err) | ||
692 | goto sync_out; | ||
693 | allocated = true; | ||
694 | blkaddr = dn.data_blkaddr; | ||
695 | } | ||
451 | /* Give more consecutive addresses for the read ahead */ | 696 | /* Give more consecutive addresses for the read ahead */ |
452 | for (i = 0; i < end_offset - dn.ofs_in_node; i++) | 697 | if (blkaddr == (bh_result->b_blocknr + ofs)) { |
453 | if (((datablock_addr(dn.node_page, | 698 | ofs++; |
454 | dn.ofs_in_node + i)) | 699 | dn.ofs_in_node++; |
455 | != (dn.data_blkaddr + i)) || maxblocks == i) | 700 | pgofs++; |
456 | break; | 701 | bh_result->b_size += (((size_t)1) << blkbits); |
457 | map_bh(bh_result, inode->i_sb, dn.data_blkaddr); | 702 | goto get_next; |
458 | bh_result->b_size = (i << blkbits); | 703 | } |
459 | } | 704 | } |
705 | sync_out: | ||
706 | if (allocated) | ||
707 | sync_inode_page(&dn); | ||
708 | put_out: | ||
460 | f2fs_put_dnode(&dn); | 709 | f2fs_put_dnode(&dn); |
461 | trace_f2fs_get_data_block(inode, iblock, bh_result, 0); | 710 | unlock_out: |
462 | return 0; | 711 | if (create) |
712 | f2fs_unlock_op(sbi); | ||
713 | out: | ||
714 | trace_f2fs_get_data_block(inode, iblock, bh_result, err); | ||
715 | return err; | ||
463 | } | 716 | } |
464 | 717 | ||
465 | static int f2fs_read_data_page(struct file *file, struct page *page) | 718 | static int f2fs_read_data_page(struct file *file, struct page *page) |
466 | { | 719 | { |
467 | return mpage_readpage(page, get_data_block_ro); | 720 | struct inode *inode = page->mapping->host; |
721 | int ret; | ||
722 | |||
723 | /* If the file has inline data, try to read it directlly */ | ||
724 | if (f2fs_has_inline_data(inode)) | ||
725 | ret = f2fs_read_inline_data(inode, page); | ||
726 | else | ||
727 | ret = mpage_readpage(page, get_data_block); | ||
728 | |||
729 | return ret; | ||
468 | } | 730 | } |
469 | 731 | ||
470 | static int f2fs_read_data_pages(struct file *file, | 732 | static int f2fs_read_data_pages(struct file *file, |
471 | struct address_space *mapping, | 733 | struct address_space *mapping, |
472 | struct list_head *pages, unsigned nr_pages) | 734 | struct list_head *pages, unsigned nr_pages) |
473 | { | 735 | { |
474 | return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro); | 736 | struct inode *inode = file->f_mapping->host; |
737 | |||
738 | /* If the file has inline data, skip readpages */ | ||
739 | if (f2fs_has_inline_data(inode)) | ||
740 | return 0; | ||
741 | |||
742 | return mpage_readpages(mapping, pages, nr_pages, get_data_block); | ||
475 | } | 743 | } |
476 | 744 | ||
477 | int do_write_data_page(struct page *page) | 745 | int do_write_data_page(struct page *page, struct f2fs_io_info *fio) |
478 | { | 746 | { |
479 | struct inode *inode = page->mapping->host; | 747 | struct inode *inode = page->mapping->host; |
480 | block_t old_blk_addr, new_blk_addr; | 748 | block_t old_blkaddr, new_blkaddr; |
481 | struct dnode_of_data dn; | 749 | struct dnode_of_data dn; |
482 | int err = 0; | 750 | int err = 0; |
483 | 751 | ||
@@ -486,10 +754,10 @@ int do_write_data_page(struct page *page) | |||
486 | if (err) | 754 | if (err) |
487 | return err; | 755 | return err; |
488 | 756 | ||
489 | old_blk_addr = dn.data_blkaddr; | 757 | old_blkaddr = dn.data_blkaddr; |
490 | 758 | ||
491 | /* This page is already truncated */ | 759 | /* This page is already truncated */ |
492 | if (old_blk_addr == NULL_ADDR) | 760 | if (old_blkaddr == NULL_ADDR) |
493 | goto out_writepage; | 761 | goto out_writepage; |
494 | 762 | ||
495 | set_page_writeback(page); | 763 | set_page_writeback(page); |
@@ -498,15 +766,13 @@ int do_write_data_page(struct page *page) | |||
498 | * If current allocation needs SSR, | 766 | * If current allocation needs SSR, |
499 | * it had better in-place writes for updated data. | 767 | * it had better in-place writes for updated data. |
500 | */ | 768 | */ |
501 | if (unlikely(old_blk_addr != NEW_ADDR && | 769 | if (unlikely(old_blkaddr != NEW_ADDR && |
502 | !is_cold_data(page) && | 770 | !is_cold_data(page) && |
503 | need_inplace_update(inode))) { | 771 | need_inplace_update(inode))) { |
504 | rewrite_data_page(F2FS_SB(inode->i_sb), page, | 772 | rewrite_data_page(page, old_blkaddr, fio); |
505 | old_blk_addr); | ||
506 | } else { | 773 | } else { |
507 | write_data_page(inode, page, &dn, | 774 | write_data_page(page, &dn, &new_blkaddr, fio); |
508 | old_blk_addr, &new_blk_addr); | 775 | update_extent_cache(new_blkaddr, &dn); |
509 | update_extent_cache(new_blk_addr, &dn); | ||
510 | } | 776 | } |
511 | out_writepage: | 777 | out_writepage: |
512 | f2fs_put_dnode(&dn); | 778 | f2fs_put_dnode(&dn); |
@@ -521,9 +787,13 @@ static int f2fs_write_data_page(struct page *page, | |||
521 | loff_t i_size = i_size_read(inode); | 787 | loff_t i_size = i_size_read(inode); |
522 | const pgoff_t end_index = ((unsigned long long) i_size) | 788 | const pgoff_t end_index = ((unsigned long long) i_size) |
523 | >> PAGE_CACHE_SHIFT; | 789 | >> PAGE_CACHE_SHIFT; |
524 | unsigned offset; | 790 | unsigned offset = 0; |
525 | bool need_balance_fs = false; | 791 | bool need_balance_fs = false; |
526 | int err = 0; | 792 | int err = 0; |
793 | struct f2fs_io_info fio = { | ||
794 | .type = DATA, | ||
795 | .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, | ||
796 | }; | ||
527 | 797 | ||
528 | if (page->index < end_index) | 798 | if (page->index < end_index) |
529 | goto write; | 799 | goto write; |
@@ -543,7 +813,7 @@ static int f2fs_write_data_page(struct page *page, | |||
543 | 813 | ||
544 | zero_user_segment(page, offset, PAGE_CACHE_SIZE); | 814 | zero_user_segment(page, offset, PAGE_CACHE_SIZE); |
545 | write: | 815 | write: |
546 | if (sbi->por_doing) { | 816 | if (unlikely(sbi->por_doing)) { |
547 | err = AOP_WRITEPAGE_ACTIVATE; | 817 | err = AOP_WRITEPAGE_ACTIVATE; |
548 | goto redirty_out; | 818 | goto redirty_out; |
549 | } | 819 | } |
@@ -552,10 +822,18 @@ write: | |||
552 | if (S_ISDIR(inode->i_mode)) { | 822 | if (S_ISDIR(inode->i_mode)) { |
553 | dec_page_count(sbi, F2FS_DIRTY_DENTS); | 823 | dec_page_count(sbi, F2FS_DIRTY_DENTS); |
554 | inode_dec_dirty_dents(inode); | 824 | inode_dec_dirty_dents(inode); |
555 | err = do_write_data_page(page); | 825 | err = do_write_data_page(page, &fio); |
556 | } else { | 826 | } else { |
557 | f2fs_lock_op(sbi); | 827 | f2fs_lock_op(sbi); |
558 | err = do_write_data_page(page); | 828 | |
829 | if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode)) { | ||
830 | err = f2fs_write_inline_data(inode, page, offset); | ||
831 | f2fs_unlock_op(sbi); | ||
832 | goto out; | ||
833 | } else { | ||
834 | err = do_write_data_page(page, &fio); | ||
835 | } | ||
836 | |||
559 | f2fs_unlock_op(sbi); | 837 | f2fs_unlock_op(sbi); |
560 | need_balance_fs = true; | 838 | need_balance_fs = true; |
561 | } | 839 | } |
@@ -564,8 +842,10 @@ write: | |||
564 | else if (err) | 842 | else if (err) |
565 | goto redirty_out; | 843 | goto redirty_out; |
566 | 844 | ||
567 | if (wbc->for_reclaim) | 845 | if (wbc->for_reclaim) { |
568 | f2fs_submit_bio(sbi, DATA, true); | 846 | f2fs_submit_merged_bio(sbi, DATA, WRITE); |
847 | need_balance_fs = false; | ||
848 | } | ||
569 | 849 | ||
570 | clear_cold_data(page); | 850 | clear_cold_data(page); |
571 | out: | 851 | out: |
@@ -617,7 +897,8 @@ static int f2fs_write_data_pages(struct address_space *mapping, | |||
617 | ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); | 897 | ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); |
618 | if (locked) | 898 | if (locked) |
619 | mutex_unlock(&sbi->writepages); | 899 | mutex_unlock(&sbi->writepages); |
620 | f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL)); | 900 | |
901 | f2fs_submit_merged_bio(sbi, DATA, WRITE); | ||
621 | 902 | ||
622 | remove_dirty_dir_inode(inode); | 903 | remove_dirty_dir_inode(inode); |
623 | 904 | ||
@@ -638,27 +919,28 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, | |||
638 | 919 | ||
639 | f2fs_balance_fs(sbi); | 920 | f2fs_balance_fs(sbi); |
640 | repeat: | 921 | repeat: |
922 | err = f2fs_convert_inline_data(inode, pos + len); | ||
923 | if (err) | ||
924 | return err; | ||
925 | |||
641 | page = grab_cache_page_write_begin(mapping, index, flags); | 926 | page = grab_cache_page_write_begin(mapping, index, flags); |
642 | if (!page) | 927 | if (!page) |
643 | return -ENOMEM; | 928 | return -ENOMEM; |
644 | *pagep = page; | 929 | *pagep = page; |
645 | 930 | ||
646 | f2fs_lock_op(sbi); | 931 | if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA) |
932 | goto inline_data; | ||
647 | 933 | ||
934 | f2fs_lock_op(sbi); | ||
648 | set_new_dnode(&dn, inode, NULL, NULL, 0); | 935 | set_new_dnode(&dn, inode, NULL, NULL, 0); |
649 | err = get_dnode_of_data(&dn, index, ALLOC_NODE); | 936 | err = f2fs_reserve_block(&dn, index); |
650 | if (err) | ||
651 | goto err; | ||
652 | |||
653 | if (dn.data_blkaddr == NULL_ADDR) | ||
654 | err = reserve_new_block(&dn); | ||
655 | |||
656 | f2fs_put_dnode(&dn); | ||
657 | if (err) | ||
658 | goto err; | ||
659 | |||
660 | f2fs_unlock_op(sbi); | 937 | f2fs_unlock_op(sbi); |
661 | 938 | ||
939 | if (err) { | ||
940 | f2fs_put_page(page, 1); | ||
941 | return err; | ||
942 | } | ||
943 | inline_data: | ||
662 | if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) | 944 | if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) |
663 | return 0; | 945 | return 0; |
664 | 946 | ||
@@ -674,15 +956,19 @@ repeat: | |||
674 | if (dn.data_blkaddr == NEW_ADDR) { | 956 | if (dn.data_blkaddr == NEW_ADDR) { |
675 | zero_user_segment(page, 0, PAGE_CACHE_SIZE); | 957 | zero_user_segment(page, 0, PAGE_CACHE_SIZE); |
676 | } else { | 958 | } else { |
677 | err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); | 959 | if (f2fs_has_inline_data(inode)) |
960 | err = f2fs_read_inline_data(inode, page); | ||
961 | else | ||
962 | err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, | ||
963 | READ_SYNC); | ||
678 | if (err) | 964 | if (err) |
679 | return err; | 965 | return err; |
680 | lock_page(page); | 966 | lock_page(page); |
681 | if (!PageUptodate(page)) { | 967 | if (unlikely(!PageUptodate(page))) { |
682 | f2fs_put_page(page, 1); | 968 | f2fs_put_page(page, 1); |
683 | return -EIO; | 969 | return -EIO; |
684 | } | 970 | } |
685 | if (page->mapping != mapping) { | 971 | if (unlikely(page->mapping != mapping)) { |
686 | f2fs_put_page(page, 1); | 972 | f2fs_put_page(page, 1); |
687 | goto repeat; | 973 | goto repeat; |
688 | } | 974 | } |
@@ -691,11 +977,6 @@ out: | |||
691 | SetPageUptodate(page); | 977 | SetPageUptodate(page); |
692 | clear_cold_data(page); | 978 | clear_cold_data(page); |
693 | return 0; | 979 | return 0; |
694 | |||
695 | err: | ||
696 | f2fs_unlock_op(sbi); | ||
697 | f2fs_put_page(page, 1); | ||
698 | return err; | ||
699 | } | 980 | } |
700 | 981 | ||
701 | static int f2fs_write_end(struct file *file, | 982 | static int f2fs_write_end(struct file *file, |
@@ -714,23 +995,43 @@ static int f2fs_write_end(struct file *file, | |||
714 | update_inode_page(inode); | 995 | update_inode_page(inode); |
715 | } | 996 | } |
716 | 997 | ||
717 | unlock_page(page); | 998 | f2fs_put_page(page, 1); |
718 | page_cache_release(page); | ||
719 | return copied; | 999 | return copied; |
720 | } | 1000 | } |
721 | 1001 | ||
1002 | static int check_direct_IO(struct inode *inode, int rw, | ||
1003 | const struct iovec *iov, loff_t offset, unsigned long nr_segs) | ||
1004 | { | ||
1005 | unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; | ||
1006 | int i; | ||
1007 | |||
1008 | if (rw == READ) | ||
1009 | return 0; | ||
1010 | |||
1011 | if (offset & blocksize_mask) | ||
1012 | return -EINVAL; | ||
1013 | |||
1014 | for (i = 0; i < nr_segs; i++) | ||
1015 | if (iov[i].iov_len & blocksize_mask) | ||
1016 | return -EINVAL; | ||
1017 | return 0; | ||
1018 | } | ||
1019 | |||
722 | static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, | 1020 | static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, |
723 | const struct iovec *iov, loff_t offset, unsigned long nr_segs) | 1021 | const struct iovec *iov, loff_t offset, unsigned long nr_segs) |
724 | { | 1022 | { |
725 | struct file *file = iocb->ki_filp; | 1023 | struct file *file = iocb->ki_filp; |
726 | struct inode *inode = file->f_mapping->host; | 1024 | struct inode *inode = file->f_mapping->host; |
727 | 1025 | ||
728 | if (rw == WRITE) | 1026 | /* Let buffer I/O handle the inline data case. */ |
1027 | if (f2fs_has_inline_data(inode)) | ||
1028 | return 0; | ||
1029 | |||
1030 | if (check_direct_IO(inode, rw, iov, offset, nr_segs)) | ||
729 | return 0; | 1031 | return 0; |
730 | 1032 | ||
731 | /* Needs synchronization with the cleaner */ | ||
732 | return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, | 1033 | return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, |
733 | get_data_block_ro); | 1034 | get_data_block); |
734 | } | 1035 | } |
735 | 1036 | ||
736 | static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, | 1037 | static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, |
@@ -759,6 +1060,8 @@ static int f2fs_set_data_page_dirty(struct page *page) | |||
759 | trace_f2fs_set_page_dirty(page, DATA); | 1060 | trace_f2fs_set_page_dirty(page, DATA); |
760 | 1061 | ||
761 | SetPageUptodate(page); | 1062 | SetPageUptodate(page); |
1063 | mark_inode_dirty(inode); | ||
1064 | |||
762 | if (!PageDirty(page)) { | 1065 | if (!PageDirty(page)) { |
763 | __set_page_dirty_nobuffers(page); | 1066 | __set_page_dirty_nobuffers(page); |
764 | set_dirty_dir_page(inode, page); | 1067 | set_dirty_dir_page(inode, page); |
@@ -769,7 +1072,7 @@ static int f2fs_set_data_page_dirty(struct page *page) | |||
769 | 1072 | ||
770 | static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) | 1073 | static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) |
771 | { | 1074 | { |
772 | return generic_block_bmap(mapping, block, get_data_block_ro); | 1075 | return generic_block_bmap(mapping, block, get_data_block); |
773 | } | 1076 | } |
774 | 1077 | ||
775 | const struct address_space_operations f2fs_dblock_aops = { | 1078 | const struct address_space_operations f2fs_dblock_aops = { |
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index a84b0a8e6854..3de9d20d0c14 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c | |||
@@ -24,7 +24,7 @@ | |||
24 | #include "gc.h" | 24 | #include "gc.h" |
25 | 25 | ||
26 | static LIST_HEAD(f2fs_stat_list); | 26 | static LIST_HEAD(f2fs_stat_list); |
27 | static struct dentry *debugfs_root; | 27 | static struct dentry *f2fs_debugfs_root; |
28 | static DEFINE_MUTEX(f2fs_stat_mutex); | 28 | static DEFINE_MUTEX(f2fs_stat_mutex); |
29 | 29 | ||
30 | static void update_general_status(struct f2fs_sb_info *sbi) | 30 | static void update_general_status(struct f2fs_sb_info *sbi) |
@@ -45,14 +45,15 @@ static void update_general_status(struct f2fs_sb_info *sbi) | |||
45 | si->valid_count = valid_user_blocks(sbi); | 45 | si->valid_count = valid_user_blocks(sbi); |
46 | si->valid_node_count = valid_node_count(sbi); | 46 | si->valid_node_count = valid_node_count(sbi); |
47 | si->valid_inode_count = valid_inode_count(sbi); | 47 | si->valid_inode_count = valid_inode_count(sbi); |
48 | si->inline_inode = sbi->inline_inode; | ||
48 | si->utilization = utilization(sbi); | 49 | si->utilization = utilization(sbi); |
49 | 50 | ||
50 | si->free_segs = free_segments(sbi); | 51 | si->free_segs = free_segments(sbi); |
51 | si->free_secs = free_sections(sbi); | 52 | si->free_secs = free_sections(sbi); |
52 | si->prefree_count = prefree_segments(sbi); | 53 | si->prefree_count = prefree_segments(sbi); |
53 | si->dirty_count = dirty_segments(sbi); | 54 | si->dirty_count = dirty_segments(sbi); |
54 | si->node_pages = sbi->node_inode->i_mapping->nrpages; | 55 | si->node_pages = NODE_MAPPING(sbi)->nrpages; |
55 | si->meta_pages = sbi->meta_inode->i_mapping->nrpages; | 56 | si->meta_pages = META_MAPPING(sbi)->nrpages; |
56 | si->nats = NM_I(sbi)->nat_cnt; | 57 | si->nats = NM_I(sbi)->nat_cnt; |
57 | si->sits = SIT_I(sbi)->dirty_sentries; | 58 | si->sits = SIT_I(sbi)->dirty_sentries; |
58 | si->fnids = NM_I(sbi)->fcnt; | 59 | si->fnids = NM_I(sbi)->fcnt; |
@@ -165,9 +166,9 @@ get_cache: | |||
165 | /* free nids */ | 166 | /* free nids */ |
166 | si->cache_mem = NM_I(sbi)->fcnt; | 167 | si->cache_mem = NM_I(sbi)->fcnt; |
167 | si->cache_mem += NM_I(sbi)->nat_cnt; | 168 | si->cache_mem += NM_I(sbi)->nat_cnt; |
168 | npages = sbi->node_inode->i_mapping->nrpages; | 169 | npages = NODE_MAPPING(sbi)->nrpages; |
169 | si->cache_mem += npages << PAGE_CACHE_SHIFT; | 170 | si->cache_mem += npages << PAGE_CACHE_SHIFT; |
170 | npages = sbi->meta_inode->i_mapping->nrpages; | 171 | npages = META_MAPPING(sbi)->nrpages; |
171 | si->cache_mem += npages << PAGE_CACHE_SHIFT; | 172 | si->cache_mem += npages << PAGE_CACHE_SHIFT; |
172 | si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry); | 173 | si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry); |
173 | si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); | 174 | si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); |
@@ -200,6 +201,8 @@ static int stat_show(struct seq_file *s, void *v) | |||
200 | seq_printf(s, "Other: %u)\n - Data: %u\n", | 201 | seq_printf(s, "Other: %u)\n - Data: %u\n", |
201 | si->valid_node_count - si->valid_inode_count, | 202 | si->valid_node_count - si->valid_inode_count, |
202 | si->valid_count - si->valid_node_count); | 203 | si->valid_count - si->valid_node_count); |
204 | seq_printf(s, " - Inline_data Inode: %u\n", | ||
205 | si->inline_inode); | ||
203 | seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", | 206 | seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", |
204 | si->main_area_segs, si->main_area_sections, | 207 | si->main_area_segs, si->main_area_sections, |
205 | si->main_area_zones); | 208 | si->main_area_zones); |
@@ -242,14 +245,14 @@ static int stat_show(struct seq_file *s, void *v) | |||
242 | seq_printf(s, " - node blocks : %d\n", si->node_blks); | 245 | seq_printf(s, " - node blocks : %d\n", si->node_blks); |
243 | seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", | 246 | seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", |
244 | si->hit_ext, si->total_ext); | 247 | si->hit_ext, si->total_ext); |
245 | seq_printf(s, "\nBalancing F2FS Async:\n"); | 248 | seq_puts(s, "\nBalancing F2FS Async:\n"); |
246 | seq_printf(s, " - nodes %4d in %4d\n", | 249 | seq_printf(s, " - nodes: %4d in %4d\n", |
247 | si->ndirty_node, si->node_pages); | 250 | si->ndirty_node, si->node_pages); |
248 | seq_printf(s, " - dents %4d in dirs:%4d\n", | 251 | seq_printf(s, " - dents: %4d in dirs:%4d\n", |
249 | si->ndirty_dent, si->ndirty_dirs); | 252 | si->ndirty_dent, si->ndirty_dirs); |
250 | seq_printf(s, " - meta %4d in %4d\n", | 253 | seq_printf(s, " - meta: %4d in %4d\n", |
251 | si->ndirty_meta, si->meta_pages); | 254 | si->ndirty_meta, si->meta_pages); |
252 | seq_printf(s, " - NATs %5d > %lu\n", | 255 | seq_printf(s, " - NATs: %5d > %lu\n", |
253 | si->nats, NM_WOUT_THRESHOLD); | 256 | si->nats, NM_WOUT_THRESHOLD); |
254 | seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n", | 257 | seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n", |
255 | si->sits, si->fnids); | 258 | si->sits, si->fnids); |
@@ -340,14 +343,32 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi) | |||
340 | 343 | ||
341 | void __init f2fs_create_root_stats(void) | 344 | void __init f2fs_create_root_stats(void) |
342 | { | 345 | { |
343 | debugfs_root = debugfs_create_dir("f2fs", NULL); | 346 | struct dentry *file; |
344 | if (debugfs_root) | 347 | |
345 | debugfs_create_file("status", S_IRUGO, debugfs_root, | 348 | f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL); |
346 | NULL, &stat_fops); | 349 | if (!f2fs_debugfs_root) |
350 | goto bail; | ||
351 | |||
352 | file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, | ||
353 | NULL, &stat_fops); | ||
354 | if (!file) | ||
355 | goto free_debugfs_dir; | ||
356 | |||
357 | return; | ||
358 | |||
359 | free_debugfs_dir: | ||
360 | debugfs_remove(f2fs_debugfs_root); | ||
361 | |||
362 | bail: | ||
363 | f2fs_debugfs_root = NULL; | ||
364 | return; | ||
347 | } | 365 | } |
348 | 366 | ||
349 | void f2fs_destroy_root_stats(void) | 367 | void f2fs_destroy_root_stats(void) |
350 | { | 368 | { |
351 | debugfs_remove_recursive(debugfs_root); | 369 | if (!f2fs_debugfs_root) |
352 | debugfs_root = NULL; | 370 | return; |
371 | |||
372 | debugfs_remove_recursive(f2fs_debugfs_root); | ||
373 | f2fs_debugfs_root = NULL; | ||
353 | } | 374 | } |
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 594fc1bb64ef..2b7c255bcbdf 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c | |||
@@ -190,9 +190,6 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, | |||
190 | unsigned int max_depth; | 190 | unsigned int max_depth; |
191 | unsigned int level; | 191 | unsigned int level; |
192 | 192 | ||
193 | if (namelen > F2FS_NAME_LEN) | ||
194 | return NULL; | ||
195 | |||
196 | if (npages == 0) | 193 | if (npages == 0) |
197 | return NULL; | 194 | return NULL; |
198 | 195 | ||
@@ -259,20 +256,17 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, | |||
259 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 256 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
260 | mark_inode_dirty(dir); | 257 | mark_inode_dirty(dir); |
261 | 258 | ||
262 | /* update parent inode number before releasing dentry page */ | ||
263 | F2FS_I(inode)->i_pino = dir->i_ino; | ||
264 | |||
265 | f2fs_put_page(page, 1); | 259 | f2fs_put_page(page, 1); |
266 | } | 260 | } |
267 | 261 | ||
268 | static void init_dent_inode(const struct qstr *name, struct page *ipage) | 262 | static void init_dent_inode(const struct qstr *name, struct page *ipage) |
269 | { | 263 | { |
270 | struct f2fs_node *rn; | 264 | struct f2fs_inode *ri; |
271 | 265 | ||
272 | /* copy name info. to this inode page */ | 266 | /* copy name info. to this inode page */ |
273 | rn = F2FS_NODE(ipage); | 267 | ri = F2FS_INODE(ipage); |
274 | rn->i.i_namelen = cpu_to_le32(name->len); | 268 | ri->i_namelen = cpu_to_le32(name->len); |
275 | memcpy(rn->i.i_name, name->name, name->len); | 269 | memcpy(ri->i_name, name->name, name->len); |
276 | set_page_dirty(ipage); | 270 | set_page_dirty(ipage); |
277 | } | 271 | } |
278 | 272 | ||
@@ -348,11 +342,11 @@ static struct page *init_inode_metadata(struct inode *inode, | |||
348 | 342 | ||
349 | err = f2fs_init_acl(inode, dir, page); | 343 | err = f2fs_init_acl(inode, dir, page); |
350 | if (err) | 344 | if (err) |
351 | goto error; | 345 | goto put_error; |
352 | 346 | ||
353 | err = f2fs_init_security(inode, dir, name, page); | 347 | err = f2fs_init_security(inode, dir, name, page); |
354 | if (err) | 348 | if (err) |
355 | goto error; | 349 | goto put_error; |
356 | 350 | ||
357 | wait_on_page_writeback(page); | 351 | wait_on_page_writeback(page); |
358 | } else { | 352 | } else { |
@@ -376,8 +370,9 @@ static struct page *init_inode_metadata(struct inode *inode, | |||
376 | } | 370 | } |
377 | return page; | 371 | return page; |
378 | 372 | ||
379 | error: | 373 | put_error: |
380 | f2fs_put_page(page, 1); | 374 | f2fs_put_page(page, 1); |
375 | error: | ||
381 | remove_inode_page(inode); | 376 | remove_inode_page(inode); |
382 | return ERR_PTR(err); | 377 | return ERR_PTR(err); |
383 | } | 378 | } |
@@ -393,6 +388,8 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode, | |||
393 | clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); | 388 | clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); |
394 | } | 389 | } |
395 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 390 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
391 | mark_inode_dirty(dir); | ||
392 | |||
396 | if (F2FS_I(dir)->i_current_depth != current_depth) { | 393 | if (F2FS_I(dir)->i_current_depth != current_depth) { |
397 | F2FS_I(dir)->i_current_depth = current_depth; | 394 | F2FS_I(dir)->i_current_depth = current_depth; |
398 | set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); | 395 | set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); |
@@ -400,8 +397,6 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode, | |||
400 | 397 | ||
401 | if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) | 398 | if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) |
402 | update_inode_page(dir); | 399 | update_inode_page(dir); |
403 | else | ||
404 | mark_inode_dirty(dir); | ||
405 | 400 | ||
406 | if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) | 401 | if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) |
407 | clear_inode_flag(F2FS_I(inode), FI_INC_LINK); | 402 | clear_inode_flag(F2FS_I(inode), FI_INC_LINK); |
@@ -432,10 +427,11 @@ next: | |||
432 | } | 427 | } |
433 | 428 | ||
434 | /* | 429 | /* |
435 | * Caller should grab and release a mutex by calling mutex_lock_op() and | 430 | * Caller should grab and release a rwsem by calling f2fs_lock_op() and |
436 | * mutex_unlock_op(). | 431 | * f2fs_unlock_op(). |
437 | */ | 432 | */ |
438 | int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode) | 433 | int __f2fs_add_link(struct inode *dir, const struct qstr *name, |
434 | struct inode *inode) | ||
439 | { | 435 | { |
440 | unsigned int bit_pos; | 436 | unsigned int bit_pos; |
441 | unsigned int level; | 437 | unsigned int level; |
@@ -461,7 +457,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *in | |||
461 | } | 457 | } |
462 | 458 | ||
463 | start: | 459 | start: |
464 | if (current_depth == MAX_DIR_HASH_DEPTH) | 460 | if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) |
465 | return -ENOSPC; | 461 | return -ENOSPC; |
466 | 462 | ||
467 | /* Increase the depth, if required */ | 463 | /* Increase the depth, if required */ |
@@ -554,14 +550,11 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, | |||
554 | 550 | ||
555 | dir->i_ctime = dir->i_mtime = CURRENT_TIME; | 551 | dir->i_ctime = dir->i_mtime = CURRENT_TIME; |
556 | 552 | ||
557 | if (inode && S_ISDIR(inode->i_mode)) { | ||
558 | drop_nlink(dir); | ||
559 | update_inode_page(dir); | ||
560 | } else { | ||
561 | mark_inode_dirty(dir); | ||
562 | } | ||
563 | |||
564 | if (inode) { | 553 | if (inode) { |
554 | if (S_ISDIR(inode->i_mode)) { | ||
555 | drop_nlink(dir); | ||
556 | update_inode_page(dir); | ||
557 | } | ||
565 | inode->i_ctime = CURRENT_TIME; | 558 | inode->i_ctime = CURRENT_TIME; |
566 | drop_nlink(inode); | 559 | drop_nlink(inode); |
567 | if (S_ISDIR(inode->i_mode)) { | 560 | if (S_ISDIR(inode->i_mode)) { |
@@ -636,7 +629,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) | |||
636 | 629 | ||
637 | bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK); | 630 | bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK); |
638 | 631 | ||
639 | for ( ; n < npages; n++) { | 632 | for (; n < npages; n++) { |
640 | dentry_page = get_lock_data_page(inode, n); | 633 | dentry_page = get_lock_data_page(inode, n); |
641 | if (IS_ERR(dentry_page)) | 634 | if (IS_ERR(dentry_page)) |
642 | continue; | 635 | continue; |
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 89dc7508faf2..af51a0bd2dee 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h | |||
@@ -22,8 +22,10 @@ | |||
22 | 22 | ||
23 | #ifdef CONFIG_F2FS_CHECK_FS | 23 | #ifdef CONFIG_F2FS_CHECK_FS |
24 | #define f2fs_bug_on(condition) BUG_ON(condition) | 24 | #define f2fs_bug_on(condition) BUG_ON(condition) |
25 | #define f2fs_down_write(x, y) down_write_nest_lock(x, y) | ||
25 | #else | 26 | #else |
26 | #define f2fs_bug_on(condition) | 27 | #define f2fs_bug_on(condition) |
28 | #define f2fs_down_write(x, y) down_write(x) | ||
27 | #endif | 29 | #endif |
28 | 30 | ||
29 | /* | 31 | /* |
@@ -37,6 +39,7 @@ | |||
37 | #define F2FS_MOUNT_POSIX_ACL 0x00000020 | 39 | #define F2FS_MOUNT_POSIX_ACL 0x00000020 |
38 | #define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 | 40 | #define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 |
39 | #define F2FS_MOUNT_INLINE_XATTR 0x00000080 | 41 | #define F2FS_MOUNT_INLINE_XATTR 0x00000080 |
42 | #define F2FS_MOUNT_INLINE_DATA 0x00000100 | ||
40 | 43 | ||
41 | #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) | 44 | #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) |
42 | #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) | 45 | #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) |
@@ -97,6 +100,13 @@ struct dir_inode_entry { | |||
97 | struct inode *inode; /* vfs inode pointer */ | 100 | struct inode *inode; /* vfs inode pointer */ |
98 | }; | 101 | }; |
99 | 102 | ||
103 | /* for the list of blockaddresses to be discarded */ | ||
104 | struct discard_entry { | ||
105 | struct list_head list; /* list head */ | ||
106 | block_t blkaddr; /* block address to be discarded */ | ||
107 | int len; /* # of consecutive blocks of the discard */ | ||
108 | }; | ||
109 | |||
100 | /* for the list of fsync inodes, used only during recovery */ | 110 | /* for the list of fsync inodes, used only during recovery */ |
101 | struct fsync_inode_entry { | 111 | struct fsync_inode_entry { |
102 | struct list_head list; /* list head */ | 112 | struct list_head list; /* list head */ |
@@ -155,13 +165,15 @@ enum { | |||
155 | LOOKUP_NODE, /* look up a node without readahead */ | 165 | LOOKUP_NODE, /* look up a node without readahead */ |
156 | LOOKUP_NODE_RA, /* | 166 | LOOKUP_NODE_RA, /* |
157 | * look up a node with readahead called | 167 | * look up a node with readahead called |
158 | * by get_datablock_ro. | 168 | * by get_data_block. |
159 | */ | 169 | */ |
160 | }; | 170 | }; |
161 | 171 | ||
162 | #define F2FS_LINK_MAX 32000 /* maximum link count per file */ | 172 | #define F2FS_LINK_MAX 32000 /* maximum link count per file */ |
163 | 173 | ||
164 | /* for in-memory extent cache entry */ | 174 | /* for in-memory extent cache entry */ |
175 | #define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */ | ||
176 | |||
165 | struct extent_info { | 177 | struct extent_info { |
166 | rwlock_t ext_lock; /* rwlock for consistency */ | 178 | rwlock_t ext_lock; /* rwlock for consistency */ |
167 | unsigned int fofs; /* start offset in a file */ | 179 | unsigned int fofs; /* start offset in a file */ |
@@ -308,6 +320,14 @@ struct f2fs_sm_info { | |||
308 | 320 | ||
309 | /* a threshold to reclaim prefree segments */ | 321 | /* a threshold to reclaim prefree segments */ |
310 | unsigned int rec_prefree_segments; | 322 | unsigned int rec_prefree_segments; |
323 | |||
324 | /* for small discard management */ | ||
325 | struct list_head discard_list; /* 4KB discard list */ | ||
326 | int nr_discards; /* # of discards in the list */ | ||
327 | int max_discards; /* max. discards to be issued */ | ||
328 | |||
329 | unsigned int ipu_policy; /* in-place-update policy */ | ||
330 | unsigned int min_ipu_util; /* in-place-update threshold */ | ||
311 | }; | 331 | }; |
312 | 332 | ||
313 | /* | 333 | /* |
@@ -338,6 +358,7 @@ enum count_type { | |||
338 | * with waiting the bio's completion | 358 | * with waiting the bio's completion |
339 | * ... Only can be used with META. | 359 | * ... Only can be used with META. |
340 | */ | 360 | */ |
361 | #define PAGE_TYPE_OF_BIO(type) ((type) > META ? META : (type)) | ||
341 | enum page_type { | 362 | enum page_type { |
342 | DATA, | 363 | DATA, |
343 | NODE, | 364 | NODE, |
@@ -346,6 +367,20 @@ enum page_type { | |||
346 | META_FLUSH, | 367 | META_FLUSH, |
347 | }; | 368 | }; |
348 | 369 | ||
370 | struct f2fs_io_info { | ||
371 | enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ | ||
372 | int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */ | ||
373 | }; | ||
374 | |||
375 | #define is_read_io(rw) (((rw) & 1) == READ) | ||
376 | struct f2fs_bio_info { | ||
377 | struct f2fs_sb_info *sbi; /* f2fs superblock */ | ||
378 | struct bio *bio; /* bios to merge */ | ||
379 | sector_t last_block_in_bio; /* last block number */ | ||
380 | struct f2fs_io_info fio; /* store buffered io info. */ | ||
381 | struct mutex io_mutex; /* mutex for bio */ | ||
382 | }; | ||
383 | |||
349 | struct f2fs_sb_info { | 384 | struct f2fs_sb_info { |
350 | struct super_block *sb; /* pointer to VFS super block */ | 385 | struct super_block *sb; /* pointer to VFS super block */ |
351 | struct proc_dir_entry *s_proc; /* proc entry */ | 386 | struct proc_dir_entry *s_proc; /* proc entry */ |
@@ -359,9 +394,10 @@ struct f2fs_sb_info { | |||
359 | 394 | ||
360 | /* for segment-related operations */ | 395 | /* for segment-related operations */ |
361 | struct f2fs_sm_info *sm_info; /* segment manager */ | 396 | struct f2fs_sm_info *sm_info; /* segment manager */ |
362 | struct bio *bio[NR_PAGE_TYPE]; /* bios to merge */ | 397 | |
363 | sector_t last_block_in_bio[NR_PAGE_TYPE]; /* last block number */ | 398 | /* for bio operations */ |
364 | struct rw_semaphore bio_sem; /* IO semaphore */ | 399 | struct f2fs_bio_info read_io; /* for read bios */ |
400 | struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ | ||
365 | 401 | ||
366 | /* for checkpoint */ | 402 | /* for checkpoint */ |
367 | struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ | 403 | struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ |
@@ -376,8 +412,9 @@ struct f2fs_sb_info { | |||
376 | 412 | ||
377 | /* for orphan inode management */ | 413 | /* for orphan inode management */ |
378 | struct list_head orphan_inode_list; /* orphan inode list */ | 414 | struct list_head orphan_inode_list; /* orphan inode list */ |
379 | struct mutex orphan_inode_mutex; /* for orphan inode list */ | 415 | spinlock_t orphan_inode_lock; /* for orphan inode list */ |
380 | unsigned int n_orphans; /* # of orphan inodes */ | 416 | unsigned int n_orphans; /* # of orphan inodes */ |
417 | unsigned int max_orphans; /* max orphan inodes */ | ||
381 | 418 | ||
382 | /* for directory inode management */ | 419 | /* for directory inode management */ |
383 | struct list_head dir_inode_list; /* dir inode list */ | 420 | struct list_head dir_inode_list; /* dir inode list */ |
@@ -414,6 +451,9 @@ struct f2fs_sb_info { | |||
414 | struct f2fs_gc_kthread *gc_thread; /* GC thread */ | 451 | struct f2fs_gc_kthread *gc_thread; /* GC thread */ |
415 | unsigned int cur_victim_sec; /* current victim section num */ | 452 | unsigned int cur_victim_sec; /* current victim section num */ |
416 | 453 | ||
454 | /* maximum # of trials to find a victim segment for SSR and GC */ | ||
455 | unsigned int max_victim_search; | ||
456 | |||
417 | /* | 457 | /* |
418 | * for stat information. | 458 | * for stat information. |
419 | * one is for the LFS mode, and the other is for the SSR mode. | 459 | * one is for the LFS mode, and the other is for the SSR mode. |
@@ -423,6 +463,7 @@ struct f2fs_sb_info { | |||
423 | unsigned int segment_count[2]; /* # of allocated segments */ | 463 | unsigned int segment_count[2]; /* # of allocated segments */ |
424 | unsigned int block_count[2]; /* # of allocated blocks */ | 464 | unsigned int block_count[2]; /* # of allocated blocks */ |
425 | int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ | 465 | int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ |
466 | int inline_inode; /* # of inline_data inodes */ | ||
426 | int bg_gc; /* background gc calls */ | 467 | int bg_gc; /* background gc calls */ |
427 | unsigned int n_dirty_dirs; /* # of dir inodes */ | 468 | unsigned int n_dirty_dirs; /* # of dir inodes */ |
428 | #endif | 469 | #endif |
@@ -462,6 +503,11 @@ static inline struct f2fs_node *F2FS_NODE(struct page *page) | |||
462 | return (struct f2fs_node *)page_address(page); | 503 | return (struct f2fs_node *)page_address(page); |
463 | } | 504 | } |
464 | 505 | ||
506 | static inline struct f2fs_inode *F2FS_INODE(struct page *page) | ||
507 | { | ||
508 | return &((struct f2fs_node *)page_address(page))->i; | ||
509 | } | ||
510 | |||
465 | static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi) | 511 | static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi) |
466 | { | 512 | { |
467 | return (struct f2fs_nm_info *)(sbi->nm_info); | 513 | return (struct f2fs_nm_info *)(sbi->nm_info); |
@@ -487,6 +533,16 @@ static inline struct dirty_seglist_info *DIRTY_I(struct f2fs_sb_info *sbi) | |||
487 | return (struct dirty_seglist_info *)(SM_I(sbi)->dirty_info); | 533 | return (struct dirty_seglist_info *)(SM_I(sbi)->dirty_info); |
488 | } | 534 | } |
489 | 535 | ||
536 | static inline struct address_space *META_MAPPING(struct f2fs_sb_info *sbi) | ||
537 | { | ||
538 | return sbi->meta_inode->i_mapping; | ||
539 | } | ||
540 | |||
541 | static inline struct address_space *NODE_MAPPING(struct f2fs_sb_info *sbi) | ||
542 | { | ||
543 | return sbi->node_inode->i_mapping; | ||
544 | } | ||
545 | |||
490 | static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi) | 546 | static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi) |
491 | { | 547 | { |
492 | sbi->s_dirty = 1; | 548 | sbi->s_dirty = 1; |
@@ -534,7 +590,7 @@ static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi) | |||
534 | 590 | ||
535 | static inline void f2fs_lock_all(struct f2fs_sb_info *sbi) | 591 | static inline void f2fs_lock_all(struct f2fs_sb_info *sbi) |
536 | { | 592 | { |
537 | down_write_nest_lock(&sbi->cp_rwsem, &sbi->cp_mutex); | 593 | f2fs_down_write(&sbi->cp_rwsem, &sbi->cp_mutex); |
538 | } | 594 | } |
539 | 595 | ||
540 | static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) | 596 | static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) |
@@ -548,7 +604,7 @@ static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) | |||
548 | static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) | 604 | static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) |
549 | { | 605 | { |
550 | WARN_ON((nid >= NM_I(sbi)->max_nid)); | 606 | WARN_ON((nid >= NM_I(sbi)->max_nid)); |
551 | if (nid >= NM_I(sbi)->max_nid) | 607 | if (unlikely(nid >= NM_I(sbi)->max_nid)) |
552 | return -EINVAL; | 608 | return -EINVAL; |
553 | return 0; | 609 | return 0; |
554 | } | 610 | } |
@@ -561,9 +617,9 @@ static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) | |||
561 | static inline int F2FS_HAS_BLOCKS(struct inode *inode) | 617 | static inline int F2FS_HAS_BLOCKS(struct inode *inode) |
562 | { | 618 | { |
563 | if (F2FS_I(inode)->i_xattr_nid) | 619 | if (F2FS_I(inode)->i_xattr_nid) |
564 | return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1); | 620 | return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1; |
565 | else | 621 | else |
566 | return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS); | 622 | return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS; |
567 | } | 623 | } |
568 | 624 | ||
569 | static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, | 625 | static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, |
@@ -574,7 +630,7 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, | |||
574 | spin_lock(&sbi->stat_lock); | 630 | spin_lock(&sbi->stat_lock); |
575 | valid_block_count = | 631 | valid_block_count = |
576 | sbi->total_valid_block_count + (block_t)count; | 632 | sbi->total_valid_block_count + (block_t)count; |
577 | if (valid_block_count > sbi->user_block_count) { | 633 | if (unlikely(valid_block_count > sbi->user_block_count)) { |
578 | spin_unlock(&sbi->stat_lock); | 634 | spin_unlock(&sbi->stat_lock); |
579 | return false; | 635 | return false; |
580 | } | 636 | } |
@@ -585,7 +641,7 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, | |||
585 | return true; | 641 | return true; |
586 | } | 642 | } |
587 | 643 | ||
588 | static inline int dec_valid_block_count(struct f2fs_sb_info *sbi, | 644 | static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, |
589 | struct inode *inode, | 645 | struct inode *inode, |
590 | blkcnt_t count) | 646 | blkcnt_t count) |
591 | { | 647 | { |
@@ -595,7 +651,6 @@ static inline int dec_valid_block_count(struct f2fs_sb_info *sbi, | |||
595 | inode->i_blocks -= count; | 651 | inode->i_blocks -= count; |
596 | sbi->total_valid_block_count -= (block_t)count; | 652 | sbi->total_valid_block_count -= (block_t)count; |
597 | spin_unlock(&sbi->stat_lock); | 653 | spin_unlock(&sbi->stat_lock); |
598 | return 0; | ||
599 | } | 654 | } |
600 | 655 | ||
601 | static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) | 656 | static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) |
@@ -686,50 +741,48 @@ static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi) | |||
686 | } | 741 | } |
687 | 742 | ||
688 | static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, | 743 | static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, |
689 | struct inode *inode, | 744 | struct inode *inode) |
690 | unsigned int count) | ||
691 | { | 745 | { |
692 | block_t valid_block_count; | 746 | block_t valid_block_count; |
693 | unsigned int valid_node_count; | 747 | unsigned int valid_node_count; |
694 | 748 | ||
695 | spin_lock(&sbi->stat_lock); | 749 | spin_lock(&sbi->stat_lock); |
696 | 750 | ||
697 | valid_block_count = sbi->total_valid_block_count + (block_t)count; | 751 | valid_block_count = sbi->total_valid_block_count + 1; |
698 | sbi->alloc_valid_block_count += (block_t)count; | 752 | if (unlikely(valid_block_count > sbi->user_block_count)) { |
699 | valid_node_count = sbi->total_valid_node_count + count; | ||
700 | |||
701 | if (valid_block_count > sbi->user_block_count) { | ||
702 | spin_unlock(&sbi->stat_lock); | 753 | spin_unlock(&sbi->stat_lock); |
703 | return false; | 754 | return false; |
704 | } | 755 | } |
705 | 756 | ||
706 | if (valid_node_count > sbi->total_node_count) { | 757 | valid_node_count = sbi->total_valid_node_count + 1; |
758 | if (unlikely(valid_node_count > sbi->total_node_count)) { | ||
707 | spin_unlock(&sbi->stat_lock); | 759 | spin_unlock(&sbi->stat_lock); |
708 | return false; | 760 | return false; |
709 | } | 761 | } |
710 | 762 | ||
711 | if (inode) | 763 | if (inode) |
712 | inode->i_blocks += count; | 764 | inode->i_blocks++; |
713 | sbi->total_valid_node_count = valid_node_count; | 765 | |
714 | sbi->total_valid_block_count = valid_block_count; | 766 | sbi->alloc_valid_block_count++; |
767 | sbi->total_valid_node_count++; | ||
768 | sbi->total_valid_block_count++; | ||
715 | spin_unlock(&sbi->stat_lock); | 769 | spin_unlock(&sbi->stat_lock); |
716 | 770 | ||
717 | return true; | 771 | return true; |
718 | } | 772 | } |
719 | 773 | ||
720 | static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, | 774 | static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, |
721 | struct inode *inode, | 775 | struct inode *inode) |
722 | unsigned int count) | ||
723 | { | 776 | { |
724 | spin_lock(&sbi->stat_lock); | 777 | spin_lock(&sbi->stat_lock); |
725 | 778 | ||
726 | f2fs_bug_on(sbi->total_valid_block_count < count); | 779 | f2fs_bug_on(!sbi->total_valid_block_count); |
727 | f2fs_bug_on(sbi->total_valid_node_count < count); | 780 | f2fs_bug_on(!sbi->total_valid_node_count); |
728 | f2fs_bug_on(inode->i_blocks < count); | 781 | f2fs_bug_on(!inode->i_blocks); |
729 | 782 | ||
730 | inode->i_blocks -= count; | 783 | inode->i_blocks--; |
731 | sbi->total_valid_node_count -= count; | 784 | sbi->total_valid_node_count--; |
732 | sbi->total_valid_block_count -= (block_t)count; | 785 | sbi->total_valid_block_count--; |
733 | 786 | ||
734 | spin_unlock(&sbi->stat_lock); | 787 | spin_unlock(&sbi->stat_lock); |
735 | } | 788 | } |
@@ -751,13 +804,12 @@ static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) | |||
751 | spin_unlock(&sbi->stat_lock); | 804 | spin_unlock(&sbi->stat_lock); |
752 | } | 805 | } |
753 | 806 | ||
754 | static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi) | 807 | static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi) |
755 | { | 808 | { |
756 | spin_lock(&sbi->stat_lock); | 809 | spin_lock(&sbi->stat_lock); |
757 | f2fs_bug_on(!sbi->total_valid_inode_count); | 810 | f2fs_bug_on(!sbi->total_valid_inode_count); |
758 | sbi->total_valid_inode_count--; | 811 | sbi->total_valid_inode_count--; |
759 | spin_unlock(&sbi->stat_lock); | 812 | spin_unlock(&sbi->stat_lock); |
760 | return 0; | ||
761 | } | 813 | } |
762 | 814 | ||
763 | static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) | 815 | static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) |
@@ -771,7 +823,7 @@ static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) | |||
771 | 823 | ||
772 | static inline void f2fs_put_page(struct page *page, int unlock) | 824 | static inline void f2fs_put_page(struct page *page, int unlock) |
773 | { | 825 | { |
774 | if (!page || IS_ERR(page)) | 826 | if (!page) |
775 | return; | 827 | return; |
776 | 828 | ||
777 | if (unlock) { | 829 | if (unlock) { |
@@ -876,7 +928,9 @@ enum { | |||
876 | FI_NO_ALLOC, /* should not allocate any blocks */ | 928 | FI_NO_ALLOC, /* should not allocate any blocks */ |
877 | FI_UPDATE_DIR, /* should update inode block for consistency */ | 929 | FI_UPDATE_DIR, /* should update inode block for consistency */ |
878 | FI_DELAY_IPUT, /* used for the recovery */ | 930 | FI_DELAY_IPUT, /* used for the recovery */ |
931 | FI_NO_EXTENT, /* not to use the extent cache */ | ||
879 | FI_INLINE_XATTR, /* used for inline xattr */ | 932 | FI_INLINE_XATTR, /* used for inline xattr */ |
933 | FI_INLINE_DATA, /* used for inline data*/ | ||
880 | }; | 934 | }; |
881 | 935 | ||
882 | static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) | 936 | static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) |
@@ -914,6 +968,8 @@ static inline void get_inline_info(struct f2fs_inode_info *fi, | |||
914 | { | 968 | { |
915 | if (ri->i_inline & F2FS_INLINE_XATTR) | 969 | if (ri->i_inline & F2FS_INLINE_XATTR) |
916 | set_inode_flag(fi, FI_INLINE_XATTR); | 970 | set_inode_flag(fi, FI_INLINE_XATTR); |
971 | if (ri->i_inline & F2FS_INLINE_DATA) | ||
972 | set_inode_flag(fi, FI_INLINE_DATA); | ||
917 | } | 973 | } |
918 | 974 | ||
919 | static inline void set_raw_inline(struct f2fs_inode_info *fi, | 975 | static inline void set_raw_inline(struct f2fs_inode_info *fi, |
@@ -923,6 +979,8 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi, | |||
923 | 979 | ||
924 | if (is_inode_flag_set(fi, FI_INLINE_XATTR)) | 980 | if (is_inode_flag_set(fi, FI_INLINE_XATTR)) |
925 | ri->i_inline |= F2FS_INLINE_XATTR; | 981 | ri->i_inline |= F2FS_INLINE_XATTR; |
982 | if (is_inode_flag_set(fi, FI_INLINE_DATA)) | ||
983 | ri->i_inline |= F2FS_INLINE_DATA; | ||
926 | } | 984 | } |
927 | 985 | ||
928 | static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi) | 986 | static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi) |
@@ -948,6 +1006,18 @@ static inline int inline_xattr_size(struct inode *inode) | |||
948 | return 0; | 1006 | return 0; |
949 | } | 1007 | } |
950 | 1008 | ||
1009 | static inline int f2fs_has_inline_data(struct inode *inode) | ||
1010 | { | ||
1011 | return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); | ||
1012 | } | ||
1013 | |||
1014 | static inline void *inline_data_addr(struct page *page) | ||
1015 | { | ||
1016 | struct f2fs_inode *ri; | ||
1017 | ri = (struct f2fs_inode *)page_address(page); | ||
1018 | return (void *)&(ri->i_addr[1]); | ||
1019 | } | ||
1020 | |||
951 | static inline int f2fs_readonly(struct super_block *sb) | 1021 | static inline int f2fs_readonly(struct super_block *sb) |
952 | { | 1022 | { |
953 | return sb->s_flags & MS_RDONLY; | 1023 | return sb->s_flags & MS_RDONLY; |
@@ -958,6 +1028,7 @@ static inline int f2fs_readonly(struct super_block *sb) | |||
958 | */ | 1028 | */ |
959 | int f2fs_sync_file(struct file *, loff_t, loff_t, int); | 1029 | int f2fs_sync_file(struct file *, loff_t, loff_t, int); |
960 | void truncate_data_blocks(struct dnode_of_data *); | 1030 | void truncate_data_blocks(struct dnode_of_data *); |
1031 | int truncate_blocks(struct inode *, u64); | ||
961 | void f2fs_truncate(struct inode *); | 1032 | void f2fs_truncate(struct inode *); |
962 | int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); | 1033 | int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); |
963 | int f2fs_setattr(struct dentry *, struct iattr *); | 1034 | int f2fs_setattr(struct dentry *, struct iattr *); |
@@ -1027,7 +1098,7 @@ int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); | |||
1027 | int truncate_inode_blocks(struct inode *, pgoff_t); | 1098 | int truncate_inode_blocks(struct inode *, pgoff_t); |
1028 | int truncate_xattr_node(struct inode *, struct page *); | 1099 | int truncate_xattr_node(struct inode *, struct page *); |
1029 | int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t); | 1100 | int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t); |
1030 | int remove_inode_page(struct inode *); | 1101 | void remove_inode_page(struct inode *); |
1031 | struct page *new_inode_page(struct inode *, const struct qstr *); | 1102 | struct page *new_inode_page(struct inode *, const struct qstr *); |
1032 | struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); | 1103 | struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); |
1033 | void ra_node_page(struct f2fs_sb_info *, nid_t); | 1104 | void ra_node_page(struct f2fs_sb_info *, nid_t); |
@@ -1059,19 +1130,19 @@ void clear_prefree_segments(struct f2fs_sb_info *); | |||
1059 | int npages_for_summary_flush(struct f2fs_sb_info *); | 1130 | int npages_for_summary_flush(struct f2fs_sb_info *); |
1060 | void allocate_new_segments(struct f2fs_sb_info *); | 1131 | void allocate_new_segments(struct f2fs_sb_info *); |
1061 | struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); | 1132 | struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); |
1062 | struct bio *f2fs_bio_alloc(struct block_device *, int); | ||
1063 | void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool); | ||
1064 | void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool); | ||
1065 | void write_meta_page(struct f2fs_sb_info *, struct page *); | 1133 | void write_meta_page(struct f2fs_sb_info *, struct page *); |
1066 | void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int, | 1134 | void write_node_page(struct f2fs_sb_info *, struct page *, |
1067 | block_t, block_t *); | 1135 | struct f2fs_io_info *, unsigned int, block_t, block_t *); |
1068 | void write_data_page(struct inode *, struct page *, struct dnode_of_data*, | 1136 | void write_data_page(struct page *, struct dnode_of_data *, block_t *, |
1069 | block_t, block_t *); | 1137 | struct f2fs_io_info *); |
1070 | void rewrite_data_page(struct f2fs_sb_info *, struct page *, block_t); | 1138 | void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *); |
1071 | void recover_data_page(struct f2fs_sb_info *, struct page *, | 1139 | void recover_data_page(struct f2fs_sb_info *, struct page *, |
1072 | struct f2fs_summary *, block_t, block_t); | 1140 | struct f2fs_summary *, block_t, block_t); |
1073 | void rewrite_node_page(struct f2fs_sb_info *, struct page *, | 1141 | void rewrite_node_page(struct f2fs_sb_info *, struct page *, |
1074 | struct f2fs_summary *, block_t, block_t); | 1142 | struct f2fs_summary *, block_t, block_t); |
1143 | void allocate_data_block(struct f2fs_sb_info *, struct page *, | ||
1144 | block_t, block_t *, struct f2fs_summary *, int); | ||
1145 | void f2fs_wait_on_page_writeback(struct page *, enum page_type); | ||
1075 | void write_data_summaries(struct f2fs_sb_info *, block_t); | 1146 | void write_data_summaries(struct f2fs_sb_info *, block_t); |
1076 | void write_node_summaries(struct f2fs_sb_info *, block_t); | 1147 | void write_node_summaries(struct f2fs_sb_info *, block_t); |
1077 | int lookup_journal_in_cursum(struct f2fs_summary_block *, | 1148 | int lookup_journal_in_cursum(struct f2fs_summary_block *, |
@@ -1079,6 +1150,8 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *, | |||
1079 | void flush_sit_entries(struct f2fs_sb_info *); | 1150 | void flush_sit_entries(struct f2fs_sb_info *); |
1080 | int build_segment_manager(struct f2fs_sb_info *); | 1151 | int build_segment_manager(struct f2fs_sb_info *); |
1081 | void destroy_segment_manager(struct f2fs_sb_info *); | 1152 | void destroy_segment_manager(struct f2fs_sb_info *); |
1153 | int __init create_segment_manager_caches(void); | ||
1154 | void destroy_segment_manager_caches(void); | ||
1082 | 1155 | ||
1083 | /* | 1156 | /* |
1084 | * checkpoint.c | 1157 | * checkpoint.c |
@@ -1090,7 +1163,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *); | |||
1090 | void release_orphan_inode(struct f2fs_sb_info *); | 1163 | void release_orphan_inode(struct f2fs_sb_info *); |
1091 | void add_orphan_inode(struct f2fs_sb_info *, nid_t); | 1164 | void add_orphan_inode(struct f2fs_sb_info *, nid_t); |
1092 | void remove_orphan_inode(struct f2fs_sb_info *, nid_t); | 1165 | void remove_orphan_inode(struct f2fs_sb_info *, nid_t); |
1093 | int recover_orphan_inodes(struct f2fs_sb_info *); | 1166 | void recover_orphan_inodes(struct f2fs_sb_info *); |
1094 | int get_valid_checkpoint(struct f2fs_sb_info *); | 1167 | int get_valid_checkpoint(struct f2fs_sb_info *); |
1095 | void set_dirty_dir_page(struct inode *, struct page *); | 1168 | void set_dirty_dir_page(struct inode *, struct page *); |
1096 | void add_dirty_dir_inode(struct inode *); | 1169 | void add_dirty_dir_inode(struct inode *); |
@@ -1105,13 +1178,17 @@ void destroy_checkpoint_caches(void); | |||
1105 | /* | 1178 | /* |
1106 | * data.c | 1179 | * data.c |
1107 | */ | 1180 | */ |
1181 | void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int); | ||
1182 | int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, block_t, int); | ||
1183 | void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, block_t, | ||
1184 | struct f2fs_io_info *); | ||
1108 | int reserve_new_block(struct dnode_of_data *); | 1185 | int reserve_new_block(struct dnode_of_data *); |
1186 | int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); | ||
1109 | void update_extent_cache(block_t, struct dnode_of_data *); | 1187 | void update_extent_cache(block_t, struct dnode_of_data *); |
1110 | struct page *find_data_page(struct inode *, pgoff_t, bool); | 1188 | struct page *find_data_page(struct inode *, pgoff_t, bool); |
1111 | struct page *get_lock_data_page(struct inode *, pgoff_t); | 1189 | struct page *get_lock_data_page(struct inode *, pgoff_t); |
1112 | struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); | 1190 | struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); |
1113 | int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); | 1191 | int do_write_data_page(struct page *, struct f2fs_io_info *); |
1114 | int do_write_data_page(struct page *); | ||
1115 | 1192 | ||
1116 | /* | 1193 | /* |
1117 | * gc.c | 1194 | * gc.c |
@@ -1144,7 +1221,7 @@ struct f2fs_stat_info { | |||
1144 | int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; | 1221 | int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; |
1145 | int nats, sits, fnids; | 1222 | int nats, sits, fnids; |
1146 | int total_count, utilization; | 1223 | int total_count, utilization; |
1147 | int bg_gc; | 1224 | int bg_gc, inline_inode; |
1148 | unsigned int valid_count, valid_node_count, valid_inode_count; | 1225 | unsigned int valid_count, valid_node_count, valid_inode_count; |
1149 | unsigned int bimodal, avg_vblocks; | 1226 | unsigned int bimodal, avg_vblocks; |
1150 | int util_free, util_valid, util_invalid; | 1227 | int util_free, util_valid, util_invalid; |
@@ -1164,7 +1241,7 @@ struct f2fs_stat_info { | |||
1164 | 1241 | ||
1165 | static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) | 1242 | static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) |
1166 | { | 1243 | { |
1167 | return (struct f2fs_stat_info*)sbi->stat_info; | 1244 | return (struct f2fs_stat_info *)sbi->stat_info; |
1168 | } | 1245 | } |
1169 | 1246 | ||
1170 | #define stat_inc_call_count(si) ((si)->call_count++) | 1247 | #define stat_inc_call_count(si) ((si)->call_count++) |
@@ -1173,6 +1250,17 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) | |||
1173 | #define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--) | 1250 | #define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--) |
1174 | #define stat_inc_total_hit(sb) ((F2FS_SB(sb))->total_hit_ext++) | 1251 | #define stat_inc_total_hit(sb) ((F2FS_SB(sb))->total_hit_ext++) |
1175 | #define stat_inc_read_hit(sb) ((F2FS_SB(sb))->read_hit_ext++) | 1252 | #define stat_inc_read_hit(sb) ((F2FS_SB(sb))->read_hit_ext++) |
1253 | #define stat_inc_inline_inode(inode) \ | ||
1254 | do { \ | ||
1255 | if (f2fs_has_inline_data(inode)) \ | ||
1256 | ((F2FS_SB(inode->i_sb))->inline_inode++); \ | ||
1257 | } while (0) | ||
1258 | #define stat_dec_inline_inode(inode) \ | ||
1259 | do { \ | ||
1260 | if (f2fs_has_inline_data(inode)) \ | ||
1261 | ((F2FS_SB(inode->i_sb))->inline_inode--); \ | ||
1262 | } while (0) | ||
1263 | |||
1176 | #define stat_inc_seg_type(sbi, curseg) \ | 1264 | #define stat_inc_seg_type(sbi, curseg) \ |
1177 | ((sbi)->segment_count[(curseg)->alloc_type]++) | 1265 | ((sbi)->segment_count[(curseg)->alloc_type]++) |
1178 | #define stat_inc_block_count(sbi, curseg) \ | 1266 | #define stat_inc_block_count(sbi, curseg) \ |
@@ -1216,6 +1304,8 @@ void f2fs_destroy_root_stats(void); | |||
1216 | #define stat_dec_dirty_dir(sbi) | 1304 | #define stat_dec_dirty_dir(sbi) |
1217 | #define stat_inc_total_hit(sb) | 1305 | #define stat_inc_total_hit(sb) |
1218 | #define stat_inc_read_hit(sb) | 1306 | #define stat_inc_read_hit(sb) |
1307 | #define stat_inc_inline_inode(inode) | ||
1308 | #define stat_dec_inline_inode(inode) | ||
1219 | #define stat_inc_seg_type(sbi, curseg) | 1309 | #define stat_inc_seg_type(sbi, curseg) |
1220 | #define stat_inc_block_count(sbi, curseg) | 1310 | #define stat_inc_block_count(sbi, curseg) |
1221 | #define stat_inc_seg_count(si, type) | 1311 | #define stat_inc_seg_count(si, type) |
@@ -1238,4 +1328,13 @@ extern const struct address_space_operations f2fs_meta_aops; | |||
1238 | extern const struct inode_operations f2fs_dir_inode_operations; | 1328 | extern const struct inode_operations f2fs_dir_inode_operations; |
1239 | extern const struct inode_operations f2fs_symlink_inode_operations; | 1329 | extern const struct inode_operations f2fs_symlink_inode_operations; |
1240 | extern const struct inode_operations f2fs_special_inode_operations; | 1330 | extern const struct inode_operations f2fs_special_inode_operations; |
1331 | |||
1332 | /* | ||
1333 | * inline.c | ||
1334 | */ | ||
1335 | bool f2fs_may_inline(struct inode *); | ||
1336 | int f2fs_read_inline_data(struct inode *, struct page *); | ||
1337 | int f2fs_convert_inline_data(struct inode *, pgoff_t); | ||
1338 | int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); | ||
1339 | int recover_inline_data(struct inode *, struct page *); | ||
1241 | #endif | 1340 | #endif |
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7d714f4972d5..85e91ca88d57 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c | |||
@@ -33,7 +33,6 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, | |||
33 | struct page *page = vmf->page; | 33 | struct page *page = vmf->page; |
34 | struct inode *inode = file_inode(vma->vm_file); | 34 | struct inode *inode = file_inode(vma->vm_file); |
35 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 35 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
36 | block_t old_blk_addr; | ||
37 | struct dnode_of_data dn; | 36 | struct dnode_of_data dn; |
38 | int err; | 37 | int err; |
39 | 38 | ||
@@ -44,30 +43,16 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, | |||
44 | /* block allocation */ | 43 | /* block allocation */ |
45 | f2fs_lock_op(sbi); | 44 | f2fs_lock_op(sbi); |
46 | set_new_dnode(&dn, inode, NULL, NULL, 0); | 45 | set_new_dnode(&dn, inode, NULL, NULL, 0); |
47 | err = get_dnode_of_data(&dn, page->index, ALLOC_NODE); | 46 | err = f2fs_reserve_block(&dn, page->index); |
48 | if (err) { | ||
49 | f2fs_unlock_op(sbi); | ||
50 | goto out; | ||
51 | } | ||
52 | |||
53 | old_blk_addr = dn.data_blkaddr; | ||
54 | |||
55 | if (old_blk_addr == NULL_ADDR) { | ||
56 | err = reserve_new_block(&dn); | ||
57 | if (err) { | ||
58 | f2fs_put_dnode(&dn); | ||
59 | f2fs_unlock_op(sbi); | ||
60 | goto out; | ||
61 | } | ||
62 | } | ||
63 | f2fs_put_dnode(&dn); | ||
64 | f2fs_unlock_op(sbi); | 47 | f2fs_unlock_op(sbi); |
48 | if (err) | ||
49 | goto out; | ||
65 | 50 | ||
66 | file_update_time(vma->vm_file); | 51 | file_update_time(vma->vm_file); |
67 | lock_page(page); | 52 | lock_page(page); |
68 | if (page->mapping != inode->i_mapping || | 53 | if (unlikely(page->mapping != inode->i_mapping || |
69 | page_offset(page) > i_size_read(inode) || | 54 | page_offset(page) > i_size_read(inode) || |
70 | !PageUptodate(page)) { | 55 | !PageUptodate(page))) { |
71 | unlock_page(page); | 56 | unlock_page(page); |
72 | err = -EFAULT; | 57 | err = -EFAULT; |
73 | goto out; | 58 | goto out; |
@@ -130,12 +115,12 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
130 | int ret = 0; | 115 | int ret = 0; |
131 | bool need_cp = false; | 116 | bool need_cp = false; |
132 | struct writeback_control wbc = { | 117 | struct writeback_control wbc = { |
133 | .sync_mode = WB_SYNC_ALL, | 118 | .sync_mode = WB_SYNC_NONE, |
134 | .nr_to_write = LONG_MAX, | 119 | .nr_to_write = LONG_MAX, |
135 | .for_reclaim = 0, | 120 | .for_reclaim = 0, |
136 | }; | 121 | }; |
137 | 122 | ||
138 | if (f2fs_readonly(inode->i_sb)) | 123 | if (unlikely(f2fs_readonly(inode->i_sb))) |
139 | return 0; | 124 | return 0; |
140 | 125 | ||
141 | trace_f2fs_sync_file_enter(inode); | 126 | trace_f2fs_sync_file_enter(inode); |
@@ -217,7 +202,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) | |||
217 | raw_node = F2FS_NODE(dn->node_page); | 202 | raw_node = F2FS_NODE(dn->node_page); |
218 | addr = blkaddr_in_node(raw_node) + ofs; | 203 | addr = blkaddr_in_node(raw_node) + ofs; |
219 | 204 | ||
220 | for ( ; count > 0; count--, addr++, dn->ofs_in_node++) { | 205 | for (; count > 0; count--, addr++, dn->ofs_in_node++) { |
221 | block_t blkaddr = le32_to_cpu(*addr); | 206 | block_t blkaddr = le32_to_cpu(*addr); |
222 | if (blkaddr == NULL_ADDR) | 207 | if (blkaddr == NULL_ADDR) |
223 | continue; | 208 | continue; |
@@ -256,7 +241,7 @@ static void truncate_partial_data_page(struct inode *inode, u64 from) | |||
256 | return; | 241 | return; |
257 | 242 | ||
258 | lock_page(page); | 243 | lock_page(page); |
259 | if (page->mapping != inode->i_mapping) { | 244 | if (unlikely(page->mapping != inode->i_mapping)) { |
260 | f2fs_put_page(page, 1); | 245 | f2fs_put_page(page, 1); |
261 | return; | 246 | return; |
262 | } | 247 | } |
@@ -266,21 +251,24 @@ static void truncate_partial_data_page(struct inode *inode, u64 from) | |||
266 | f2fs_put_page(page, 1); | 251 | f2fs_put_page(page, 1); |
267 | } | 252 | } |
268 | 253 | ||
269 | static int truncate_blocks(struct inode *inode, u64 from) | 254 | int truncate_blocks(struct inode *inode, u64 from) |
270 | { | 255 | { |
271 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 256 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
272 | unsigned int blocksize = inode->i_sb->s_blocksize; | 257 | unsigned int blocksize = inode->i_sb->s_blocksize; |
273 | struct dnode_of_data dn; | 258 | struct dnode_of_data dn; |
274 | pgoff_t free_from; | 259 | pgoff_t free_from; |
275 | int count = 0; | 260 | int count = 0, err = 0; |
276 | int err; | ||
277 | 261 | ||
278 | trace_f2fs_truncate_blocks_enter(inode, from); | 262 | trace_f2fs_truncate_blocks_enter(inode, from); |
279 | 263 | ||
264 | if (f2fs_has_inline_data(inode)) | ||
265 | goto done; | ||
266 | |||
280 | free_from = (pgoff_t) | 267 | free_from = (pgoff_t) |
281 | ((from + blocksize - 1) >> (sbi->log_blocksize)); | 268 | ((from + blocksize - 1) >> (sbi->log_blocksize)); |
282 | 269 | ||
283 | f2fs_lock_op(sbi); | 270 | f2fs_lock_op(sbi); |
271 | |||
284 | set_new_dnode(&dn, inode, NULL, NULL, 0); | 272 | set_new_dnode(&dn, inode, NULL, NULL, 0); |
285 | err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); | 273 | err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); |
286 | if (err) { | 274 | if (err) { |
@@ -308,7 +296,7 @@ static int truncate_blocks(struct inode *inode, u64 from) | |||
308 | free_next: | 296 | free_next: |
309 | err = truncate_inode_blocks(inode, free_from); | 297 | err = truncate_inode_blocks(inode, free_from); |
310 | f2fs_unlock_op(sbi); | 298 | f2fs_unlock_op(sbi); |
311 | 299 | done: | |
312 | /* lastly zero out the first data page */ | 300 | /* lastly zero out the first data page */ |
313 | truncate_partial_data_page(inode, from); | 301 | truncate_partial_data_page(inode, from); |
314 | 302 | ||
@@ -382,6 +370,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) | |||
382 | 370 | ||
383 | if ((attr->ia_valid & ATTR_SIZE) && | 371 | if ((attr->ia_valid & ATTR_SIZE) && |
384 | attr->ia_size != i_size_read(inode)) { | 372 | attr->ia_size != i_size_read(inode)) { |
373 | err = f2fs_convert_inline_data(inode, attr->ia_size); | ||
374 | if (err) | ||
375 | return err; | ||
376 | |||
385 | truncate_setsize(inode, attr->ia_size); | 377 | truncate_setsize(inode, attr->ia_size); |
386 | f2fs_truncate(inode); | 378 | f2fs_truncate(inode); |
387 | f2fs_balance_fs(F2FS_SB(inode->i_sb)); | 379 | f2fs_balance_fs(F2FS_SB(inode->i_sb)); |
@@ -459,12 +451,16 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) | |||
459 | return 0; | 451 | return 0; |
460 | } | 452 | } |
461 | 453 | ||
462 | static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode) | 454 | static int punch_hole(struct inode *inode, loff_t offset, loff_t len) |
463 | { | 455 | { |
464 | pgoff_t pg_start, pg_end; | 456 | pgoff_t pg_start, pg_end; |
465 | loff_t off_start, off_end; | 457 | loff_t off_start, off_end; |
466 | int ret = 0; | 458 | int ret = 0; |
467 | 459 | ||
460 | ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1); | ||
461 | if (ret) | ||
462 | return ret; | ||
463 | |||
468 | pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; | 464 | pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; |
469 | pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; | 465 | pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; |
470 | 466 | ||
@@ -499,12 +495,6 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode) | |||
499 | } | 495 | } |
500 | } | 496 | } |
501 | 497 | ||
502 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | ||
503 | i_size_read(inode) <= (offset + len)) { | ||
504 | i_size_write(inode, offset); | ||
505 | mark_inode_dirty(inode); | ||
506 | } | ||
507 | |||
508 | return ret; | 498 | return ret; |
509 | } | 499 | } |
510 | 500 | ||
@@ -521,6 +511,10 @@ static int expand_inode_data(struct inode *inode, loff_t offset, | |||
521 | if (ret) | 511 | if (ret) |
522 | return ret; | 512 | return ret; |
523 | 513 | ||
514 | ret = f2fs_convert_inline_data(inode, offset + len); | ||
515 | if (ret) | ||
516 | return ret; | ||
517 | |||
524 | pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; | 518 | pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; |
525 | pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; | 519 | pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; |
526 | 520 | ||
@@ -532,22 +526,10 @@ static int expand_inode_data(struct inode *inode, loff_t offset, | |||
532 | 526 | ||
533 | f2fs_lock_op(sbi); | 527 | f2fs_lock_op(sbi); |
534 | set_new_dnode(&dn, inode, NULL, NULL, 0); | 528 | set_new_dnode(&dn, inode, NULL, NULL, 0); |
535 | ret = get_dnode_of_data(&dn, index, ALLOC_NODE); | 529 | ret = f2fs_reserve_block(&dn, index); |
536 | if (ret) { | ||
537 | f2fs_unlock_op(sbi); | ||
538 | break; | ||
539 | } | ||
540 | |||
541 | if (dn.data_blkaddr == NULL_ADDR) { | ||
542 | ret = reserve_new_block(&dn); | ||
543 | if (ret) { | ||
544 | f2fs_put_dnode(&dn); | ||
545 | f2fs_unlock_op(sbi); | ||
546 | break; | ||
547 | } | ||
548 | } | ||
549 | f2fs_put_dnode(&dn); | ||
550 | f2fs_unlock_op(sbi); | 530 | f2fs_unlock_op(sbi); |
531 | if (ret) | ||
532 | break; | ||
551 | 533 | ||
552 | if (pg_start == pg_end) | 534 | if (pg_start == pg_end) |
553 | new_size = offset + len; | 535 | new_size = offset + len; |
@@ -578,7 +560,7 @@ static long f2fs_fallocate(struct file *file, int mode, | |||
578 | return -EOPNOTSUPP; | 560 | return -EOPNOTSUPP; |
579 | 561 | ||
580 | if (mode & FALLOC_FL_PUNCH_HOLE) | 562 | if (mode & FALLOC_FL_PUNCH_HOLE) |
581 | ret = punch_hole(inode, offset, len, mode); | 563 | ret = punch_hole(inode, offset, len); |
582 | else | 564 | else |
583 | ret = expand_inode_data(inode, offset, len, mode); | 565 | ret = expand_inode_data(inode, offset, len, mode); |
584 | 566 | ||
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index b7ad1ec7e4cc..ea0371e854b4 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c | |||
@@ -119,7 +119,6 @@ int start_gc_thread(struct f2fs_sb_info *sbi) | |||
119 | kfree(gc_th); | 119 | kfree(gc_th); |
120 | sbi->gc_thread = NULL; | 120 | sbi->gc_thread = NULL; |
121 | } | 121 | } |
122 | |||
123 | out: | 122 | out: |
124 | return err; | 123 | return err; |
125 | } | 124 | } |
@@ -164,8 +163,8 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, | |||
164 | p->ofs_unit = sbi->segs_per_sec; | 163 | p->ofs_unit = sbi->segs_per_sec; |
165 | } | 164 | } |
166 | 165 | ||
167 | if (p->max_search > MAX_VICTIM_SEARCH) | 166 | if (p->max_search > sbi->max_victim_search) |
168 | p->max_search = MAX_VICTIM_SEARCH; | 167 | p->max_search = sbi->max_victim_search; |
169 | 168 | ||
170 | p->offset = sbi->last_victim[p->gc_mode]; | 169 | p->offset = sbi->last_victim[p->gc_mode]; |
171 | } | 170 | } |
@@ -429,7 +428,7 @@ next_step: | |||
429 | 428 | ||
430 | /* set page dirty and write it */ | 429 | /* set page dirty and write it */ |
431 | if (gc_type == FG_GC) { | 430 | if (gc_type == FG_GC) { |
432 | f2fs_wait_on_page_writeback(node_page, NODE, true); | 431 | f2fs_wait_on_page_writeback(node_page, NODE); |
433 | set_page_dirty(node_page); | 432 | set_page_dirty(node_page); |
434 | } else { | 433 | } else { |
435 | if (!PageWriteback(node_page)) | 434 | if (!PageWriteback(node_page)) |
@@ -521,6 +520,11 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, | |||
521 | 520 | ||
522 | static void move_data_page(struct inode *inode, struct page *page, int gc_type) | 521 | static void move_data_page(struct inode *inode, struct page *page, int gc_type) |
523 | { | 522 | { |
523 | struct f2fs_io_info fio = { | ||
524 | .type = DATA, | ||
525 | .rw = WRITE_SYNC, | ||
526 | }; | ||
527 | |||
524 | if (gc_type == BG_GC) { | 528 | if (gc_type == BG_GC) { |
525 | if (PageWriteback(page)) | 529 | if (PageWriteback(page)) |
526 | goto out; | 530 | goto out; |
@@ -529,7 +533,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) | |||
529 | } else { | 533 | } else { |
530 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 534 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
531 | 535 | ||
532 | f2fs_wait_on_page_writeback(page, DATA, true); | 536 | f2fs_wait_on_page_writeback(page, DATA); |
533 | 537 | ||
534 | if (clear_page_dirty_for_io(page) && | 538 | if (clear_page_dirty_for_io(page) && |
535 | S_ISDIR(inode->i_mode)) { | 539 | S_ISDIR(inode->i_mode)) { |
@@ -537,7 +541,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) | |||
537 | inode_dec_dirty_dents(inode); | 541 | inode_dec_dirty_dents(inode); |
538 | } | 542 | } |
539 | set_cold_data(page); | 543 | set_cold_data(page); |
540 | do_write_data_page(page); | 544 | do_write_data_page(page, &fio); |
541 | clear_cold_data(page); | 545 | clear_cold_data(page); |
542 | } | 546 | } |
543 | out: | 547 | out: |
@@ -631,7 +635,7 @@ next_iput: | |||
631 | goto next_step; | 635 | goto next_step; |
632 | 636 | ||
633 | if (gc_type == FG_GC) { | 637 | if (gc_type == FG_GC) { |
634 | f2fs_submit_bio(sbi, DATA, true); | 638 | f2fs_submit_merged_bio(sbi, DATA, WRITE); |
635 | 639 | ||
636 | /* | 640 | /* |
637 | * In the case of FG_GC, it'd be better to reclaim this victim | 641 | * In the case of FG_GC, it'd be better to reclaim this victim |
@@ -664,8 +668,6 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, | |||
664 | 668 | ||
665 | /* read segment summary of victim */ | 669 | /* read segment summary of victim */ |
666 | sum_page = get_sum_page(sbi, segno); | 670 | sum_page = get_sum_page(sbi, segno); |
667 | if (IS_ERR(sum_page)) | ||
668 | return; | ||
669 | 671 | ||
670 | blk_start_plug(&plug); | 672 | blk_start_plug(&plug); |
671 | 673 | ||
@@ -697,7 +699,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi) | |||
697 | 699 | ||
698 | INIT_LIST_HEAD(&ilist); | 700 | INIT_LIST_HEAD(&ilist); |
699 | gc_more: | 701 | gc_more: |
700 | if (!(sbi->sb->s_flags & MS_ACTIVE)) | 702 | if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) |
701 | goto stop; | 703 | goto stop; |
702 | 704 | ||
703 | if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { | 705 | if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { |
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 507056d22205..5d5eb6047bf4 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h | |||
@@ -20,7 +20,7 @@ | |||
20 | #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ | 20 | #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ |
21 | 21 | ||
22 | /* Search max. number of dirty segments to select a victim segment */ | 22 | /* Search max. number of dirty segments to select a victim segment */ |
23 | #define MAX_VICTIM_SEARCH 4096 /* covers 8GB */ | 23 | #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */ |
24 | 24 | ||
25 | struct f2fs_gc_kthread { | 25 | struct f2fs_gc_kthread { |
26 | struct task_struct *f2fs_gc_task; | 26 | struct task_struct *f2fs_gc_task; |
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c new file mode 100644 index 000000000000..31ee5b164ff9 --- /dev/null +++ b/fs/f2fs/inline.c | |||
@@ -0,0 +1,222 @@ | |||
1 | /* | ||
2 | * fs/f2fs/inline.c | ||
3 | * Copyright (c) 2013, Intel Corporation | ||
4 | * Authors: Huajun Li <huajun.li@intel.com> | ||
5 | * Haicheng Li <haicheng.li@intel.com> | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/fs.h> | ||
12 | #include <linux/f2fs_fs.h> | ||
13 | |||
14 | #include "f2fs.h" | ||
15 | |||
16 | bool f2fs_may_inline(struct inode *inode) | ||
17 | { | ||
18 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
19 | block_t nr_blocks; | ||
20 | loff_t i_size; | ||
21 | |||
22 | if (!test_opt(sbi, INLINE_DATA)) | ||
23 | return false; | ||
24 | |||
25 | nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2; | ||
26 | if (inode->i_blocks > nr_blocks) | ||
27 | return false; | ||
28 | |||
29 | i_size = i_size_read(inode); | ||
30 | if (i_size > MAX_INLINE_DATA) | ||
31 | return false; | ||
32 | |||
33 | return true; | ||
34 | } | ||
35 | |||
36 | int f2fs_read_inline_data(struct inode *inode, struct page *page) | ||
37 | { | ||
38 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
39 | struct page *ipage; | ||
40 | void *src_addr, *dst_addr; | ||
41 | |||
42 | if (page->index) { | ||
43 | zero_user_segment(page, 0, PAGE_CACHE_SIZE); | ||
44 | goto out; | ||
45 | } | ||
46 | |||
47 | ipage = get_node_page(sbi, inode->i_ino); | ||
48 | if (IS_ERR(ipage)) | ||
49 | return PTR_ERR(ipage); | ||
50 | |||
51 | zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); | ||
52 | |||
53 | /* Copy the whole inline data block */ | ||
54 | src_addr = inline_data_addr(ipage); | ||
55 | dst_addr = kmap(page); | ||
56 | memcpy(dst_addr, src_addr, MAX_INLINE_DATA); | ||
57 | kunmap(page); | ||
58 | f2fs_put_page(ipage, 1); | ||
59 | |||
60 | out: | ||
61 | SetPageUptodate(page); | ||
62 | unlock_page(page); | ||
63 | |||
64 | return 0; | ||
65 | } | ||
66 | |||
67 | static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) | ||
68 | { | ||
69 | int err; | ||
70 | struct page *ipage; | ||
71 | struct dnode_of_data dn; | ||
72 | void *src_addr, *dst_addr; | ||
73 | block_t new_blk_addr; | ||
74 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
75 | struct f2fs_io_info fio = { | ||
76 | .type = DATA, | ||
77 | .rw = WRITE_SYNC | REQ_PRIO, | ||
78 | }; | ||
79 | |||
80 | f2fs_lock_op(sbi); | ||
81 | ipage = get_node_page(sbi, inode->i_ino); | ||
82 | if (IS_ERR(ipage)) | ||
83 | return PTR_ERR(ipage); | ||
84 | |||
85 | /* | ||
86 | * i_addr[0] is not used for inline data, | ||
87 | * so reserving new block will not destroy inline data | ||
88 | */ | ||
89 | set_new_dnode(&dn, inode, ipage, NULL, 0); | ||
90 | err = f2fs_reserve_block(&dn, 0); | ||
91 | if (err) { | ||
92 | f2fs_unlock_op(sbi); | ||
93 | return err; | ||
94 | } | ||
95 | |||
96 | zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); | ||
97 | |||
98 | /* Copy the whole inline data block */ | ||
99 | src_addr = inline_data_addr(ipage); | ||
100 | dst_addr = kmap(page); | ||
101 | memcpy(dst_addr, src_addr, MAX_INLINE_DATA); | ||
102 | kunmap(page); | ||
103 | SetPageUptodate(page); | ||
104 | |||
105 | /* write data page to try to make data consistent */ | ||
106 | set_page_writeback(page); | ||
107 | write_data_page(page, &dn, &new_blk_addr, &fio); | ||
108 | update_extent_cache(new_blk_addr, &dn); | ||
109 | f2fs_wait_on_page_writeback(page, DATA); | ||
110 | |||
111 | /* clear inline data and flag after data writeback */ | ||
112 | zero_user_segment(ipage, INLINE_DATA_OFFSET, | ||
113 | INLINE_DATA_OFFSET + MAX_INLINE_DATA); | ||
114 | clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); | ||
115 | stat_dec_inline_inode(inode); | ||
116 | |||
117 | sync_inode_page(&dn); | ||
118 | f2fs_put_dnode(&dn); | ||
119 | f2fs_unlock_op(sbi); | ||
120 | return err; | ||
121 | } | ||
122 | |||
123 | int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size) | ||
124 | { | ||
125 | struct page *page; | ||
126 | int err; | ||
127 | |||
128 | if (!f2fs_has_inline_data(inode)) | ||
129 | return 0; | ||
130 | else if (to_size <= MAX_INLINE_DATA) | ||
131 | return 0; | ||
132 | |||
133 | page = grab_cache_page_write_begin(inode->i_mapping, 0, AOP_FLAG_NOFS); | ||
134 | if (!page) | ||
135 | return -ENOMEM; | ||
136 | |||
137 | err = __f2fs_convert_inline_data(inode, page); | ||
138 | f2fs_put_page(page, 1); | ||
139 | return err; | ||
140 | } | ||
141 | |||
142 | int f2fs_write_inline_data(struct inode *inode, | ||
143 | struct page *page, unsigned size) | ||
144 | { | ||
145 | void *src_addr, *dst_addr; | ||
146 | struct page *ipage; | ||
147 | struct dnode_of_data dn; | ||
148 | int err; | ||
149 | |||
150 | set_new_dnode(&dn, inode, NULL, NULL, 0); | ||
151 | err = get_dnode_of_data(&dn, 0, LOOKUP_NODE); | ||
152 | if (err) | ||
153 | return err; | ||
154 | ipage = dn.inode_page; | ||
155 | |||
156 | zero_user_segment(ipage, INLINE_DATA_OFFSET, | ||
157 | INLINE_DATA_OFFSET + MAX_INLINE_DATA); | ||
158 | src_addr = kmap(page); | ||
159 | dst_addr = inline_data_addr(ipage); | ||
160 | memcpy(dst_addr, src_addr, size); | ||
161 | kunmap(page); | ||
162 | |||
163 | /* Release the first data block if it is allocated */ | ||
164 | if (!f2fs_has_inline_data(inode)) { | ||
165 | truncate_data_blocks_range(&dn, 1); | ||
166 | set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); | ||
167 | stat_inc_inline_inode(inode); | ||
168 | } | ||
169 | |||
170 | sync_inode_page(&dn); | ||
171 | f2fs_put_dnode(&dn); | ||
172 | |||
173 | return 0; | ||
174 | } | ||
175 | |||
176 | int recover_inline_data(struct inode *inode, struct page *npage) | ||
177 | { | ||
178 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
179 | struct f2fs_inode *ri = NULL; | ||
180 | void *src_addr, *dst_addr; | ||
181 | struct page *ipage; | ||
182 | |||
183 | /* | ||
184 | * The inline_data recovery policy is as follows. | ||
185 | * [prev.] [next] of inline_data flag | ||
186 | * o o -> recover inline_data | ||
187 | * o x -> remove inline_data, and then recover data blocks | ||
188 | * x o -> remove inline_data, and then recover inline_data | ||
189 | * x x -> recover data blocks | ||
190 | */ | ||
191 | if (IS_INODE(npage)) | ||
192 | ri = F2FS_INODE(npage); | ||
193 | |||
194 | if (f2fs_has_inline_data(inode) && | ||
195 | ri && ri->i_inline & F2FS_INLINE_DATA) { | ||
196 | process_inline: | ||
197 | ipage = get_node_page(sbi, inode->i_ino); | ||
198 | f2fs_bug_on(IS_ERR(ipage)); | ||
199 | |||
200 | src_addr = inline_data_addr(npage); | ||
201 | dst_addr = inline_data_addr(ipage); | ||
202 | memcpy(dst_addr, src_addr, MAX_INLINE_DATA); | ||
203 | update_inode(inode, ipage); | ||
204 | f2fs_put_page(ipage, 1); | ||
205 | return -1; | ||
206 | } | ||
207 | |||
208 | if (f2fs_has_inline_data(inode)) { | ||
209 | ipage = get_node_page(sbi, inode->i_ino); | ||
210 | f2fs_bug_on(IS_ERR(ipage)); | ||
211 | zero_user_segment(ipage, INLINE_DATA_OFFSET, | ||
212 | INLINE_DATA_OFFSET + MAX_INLINE_DATA); | ||
213 | clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); | ||
214 | update_inode(inode, ipage); | ||
215 | f2fs_put_page(ipage, 1); | ||
216 | } else if (ri && ri->i_inline & F2FS_INLINE_DATA) { | ||
217 | truncate_blocks(inode, 0); | ||
218 | set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); | ||
219 | goto process_inline; | ||
220 | } | ||
221 | return 0; | ||
222 | } | ||
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index d0eaa9faeca0..4d67ed736dca 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c | |||
@@ -42,9 +42,11 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) | |||
42 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || | 42 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || |
43 | S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { | 43 | S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { |
44 | if (ri->i_addr[0]) | 44 | if (ri->i_addr[0]) |
45 | inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0])); | 45 | inode->i_rdev = |
46 | old_decode_dev(le32_to_cpu(ri->i_addr[0])); | ||
46 | else | 47 | else |
47 | inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1])); | 48 | inode->i_rdev = |
49 | new_decode_dev(le32_to_cpu(ri->i_addr[1])); | ||
48 | } | 50 | } |
49 | } | 51 | } |
50 | 52 | ||
@@ -52,11 +54,13 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) | |||
52 | { | 54 | { |
53 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { | 55 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { |
54 | if (old_valid_dev(inode->i_rdev)) { | 56 | if (old_valid_dev(inode->i_rdev)) { |
55 | ri->i_addr[0] = cpu_to_le32(old_encode_dev(inode->i_rdev)); | 57 | ri->i_addr[0] = |
58 | cpu_to_le32(old_encode_dev(inode->i_rdev)); | ||
56 | ri->i_addr[1] = 0; | 59 | ri->i_addr[1] = 0; |
57 | } else { | 60 | } else { |
58 | ri->i_addr[0] = 0; | 61 | ri->i_addr[0] = 0; |
59 | ri->i_addr[1] = cpu_to_le32(new_encode_dev(inode->i_rdev)); | 62 | ri->i_addr[1] = |
63 | cpu_to_le32(new_encode_dev(inode->i_rdev)); | ||
60 | ri->i_addr[2] = 0; | 64 | ri->i_addr[2] = 0; |
61 | } | 65 | } |
62 | } | 66 | } |
@@ -67,7 +71,6 @@ static int do_read_inode(struct inode *inode) | |||
67 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 71 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
68 | struct f2fs_inode_info *fi = F2FS_I(inode); | 72 | struct f2fs_inode_info *fi = F2FS_I(inode); |
69 | struct page *node_page; | 73 | struct page *node_page; |
70 | struct f2fs_node *rn; | ||
71 | struct f2fs_inode *ri; | 74 | struct f2fs_inode *ri; |
72 | 75 | ||
73 | /* Check if ino is within scope */ | 76 | /* Check if ino is within scope */ |
@@ -81,8 +84,7 @@ static int do_read_inode(struct inode *inode) | |||
81 | if (IS_ERR(node_page)) | 84 | if (IS_ERR(node_page)) |
82 | return PTR_ERR(node_page); | 85 | return PTR_ERR(node_page); |
83 | 86 | ||
84 | rn = F2FS_NODE(node_page); | 87 | ri = F2FS_INODE(node_page); |
85 | ri = &(rn->i); | ||
86 | 88 | ||
87 | inode->i_mode = le16_to_cpu(ri->i_mode); | 89 | inode->i_mode = le16_to_cpu(ri->i_mode); |
88 | i_uid_write(inode, le32_to_cpu(ri->i_uid)); | 90 | i_uid_write(inode, le32_to_cpu(ri->i_uid)); |
@@ -175,13 +177,11 @@ bad_inode: | |||
175 | 177 | ||
176 | void update_inode(struct inode *inode, struct page *node_page) | 178 | void update_inode(struct inode *inode, struct page *node_page) |
177 | { | 179 | { |
178 | struct f2fs_node *rn; | ||
179 | struct f2fs_inode *ri; | 180 | struct f2fs_inode *ri; |
180 | 181 | ||
181 | f2fs_wait_on_page_writeback(node_page, NODE, false); | 182 | f2fs_wait_on_page_writeback(node_page, NODE); |
182 | 183 | ||
183 | rn = F2FS_NODE(node_page); | 184 | ri = F2FS_INODE(node_page); |
184 | ri = &(rn->i); | ||
185 | 185 | ||
186 | ri->i_mode = cpu_to_le16(inode->i_mode); | 186 | ri->i_mode = cpu_to_le16(inode->i_mode); |
187 | ri->i_advise = F2FS_I(inode)->i_advise; | 187 | ri->i_advise = F2FS_I(inode)->i_advise; |
@@ -281,6 +281,7 @@ void f2fs_evict_inode(struct inode *inode) | |||
281 | 281 | ||
282 | f2fs_lock_op(sbi); | 282 | f2fs_lock_op(sbi); |
283 | remove_inode_page(inode); | 283 | remove_inode_page(inode); |
284 | stat_dec_inline_inode(inode); | ||
284 | f2fs_unlock_op(sbi); | 285 | f2fs_unlock_op(sbi); |
285 | 286 | ||
286 | sb_end_intwrite(inode->i_sb); | 287 | sb_end_intwrite(inode->i_sb); |
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 575adac17f8b..3d32f2969c5e 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c | |||
@@ -424,11 +424,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
424 | } | 424 | } |
425 | 425 | ||
426 | f2fs_set_link(new_dir, new_entry, new_page, old_inode); | 426 | f2fs_set_link(new_dir, new_entry, new_page, old_inode); |
427 | F2FS_I(old_inode)->i_pino = new_dir->i_ino; | ||
427 | 428 | ||
428 | new_inode->i_ctime = CURRENT_TIME; | 429 | new_inode->i_ctime = CURRENT_TIME; |
429 | if (old_dir_entry) | 430 | if (old_dir_entry) |
430 | drop_nlink(new_inode); | 431 | drop_nlink(new_inode); |
431 | drop_nlink(new_inode); | 432 | drop_nlink(new_inode); |
433 | mark_inode_dirty(new_inode); | ||
432 | 434 | ||
433 | if (!new_inode->i_nlink) | 435 | if (!new_inode->i_nlink) |
434 | add_orphan_inode(sbi, new_inode->i_ino); | 436 | add_orphan_inode(sbi, new_inode->i_ino); |
@@ -457,11 +459,14 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
457 | if (old_dir != new_dir) { | 459 | if (old_dir != new_dir) { |
458 | f2fs_set_link(old_inode, old_dir_entry, | 460 | f2fs_set_link(old_inode, old_dir_entry, |
459 | old_dir_page, new_dir); | 461 | old_dir_page, new_dir); |
462 | F2FS_I(old_inode)->i_pino = new_dir->i_ino; | ||
463 | update_inode_page(old_inode); | ||
460 | } else { | 464 | } else { |
461 | kunmap(old_dir_page); | 465 | kunmap(old_dir_page); |
462 | f2fs_put_page(old_dir_page, 0); | 466 | f2fs_put_page(old_dir_page, 0); |
463 | } | 467 | } |
464 | drop_nlink(old_dir); | 468 | drop_nlink(old_dir); |
469 | mark_inode_dirty(old_dir); | ||
465 | update_inode_page(old_dir); | 470 | update_inode_page(old_dir); |
466 | } | 471 | } |
467 | 472 | ||
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4ac4150d421d..b0649b76eb4f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c | |||
@@ -87,17 +87,19 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) | |||
87 | */ | 87 | */ |
88 | static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) | 88 | static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) |
89 | { | 89 | { |
90 | struct address_space *mapping = sbi->meta_inode->i_mapping; | 90 | struct address_space *mapping = META_MAPPING(sbi); |
91 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 91 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
92 | struct blk_plug plug; | ||
93 | struct page *page; | 92 | struct page *page; |
94 | pgoff_t index; | 93 | pgoff_t index; |
95 | int i; | 94 | int i; |
95 | struct f2fs_io_info fio = { | ||
96 | .type = META, | ||
97 | .rw = READ_SYNC | REQ_META | REQ_PRIO | ||
98 | }; | ||
96 | 99 | ||
97 | blk_start_plug(&plug); | ||
98 | 100 | ||
99 | for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) { | 101 | for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) { |
100 | if (nid >= nm_i->max_nid) | 102 | if (unlikely(nid >= nm_i->max_nid)) |
101 | nid = 0; | 103 | nid = 0; |
102 | index = current_nat_addr(sbi, nid); | 104 | index = current_nat_addr(sbi, nid); |
103 | 105 | ||
@@ -105,15 +107,15 @@ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) | |||
105 | if (!page) | 107 | if (!page) |
106 | continue; | 108 | continue; |
107 | if (PageUptodate(page)) { | 109 | if (PageUptodate(page)) { |
110 | mark_page_accessed(page); | ||
108 | f2fs_put_page(page, 1); | 111 | f2fs_put_page(page, 1); |
109 | continue; | 112 | continue; |
110 | } | 113 | } |
111 | if (f2fs_readpage(sbi, page, index, READ)) | 114 | f2fs_submit_page_mbio(sbi, page, index, &fio); |
112 | continue; | 115 | mark_page_accessed(page); |
113 | |||
114 | f2fs_put_page(page, 0); | 116 | f2fs_put_page(page, 0); |
115 | } | 117 | } |
116 | blk_finish_plug(&plug); | 118 | f2fs_submit_merged_bio(sbi, META, READ); |
117 | } | 119 | } |
118 | 120 | ||
119 | static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) | 121 | static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) |
@@ -391,8 +393,8 @@ got: | |||
391 | 393 | ||
392 | /* | 394 | /* |
393 | * Caller should call f2fs_put_dnode(dn). | 395 | * Caller should call f2fs_put_dnode(dn). |
394 | * Also, it should grab and release a mutex by calling mutex_lock_op() and | 396 | * Also, it should grab and release a rwsem by calling f2fs_lock_op() and |
395 | * mutex_unlock_op() only if ro is not set RDONLY_NODE. | 397 | * f2fs_unlock_op() only if ro is not set RDONLY_NODE. |
396 | * In the case of RDONLY_NODE, we don't need to care about mutex. | 398 | * In the case of RDONLY_NODE, we don't need to care about mutex. |
397 | */ | 399 | */ |
398 | int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) | 400 | int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) |
@@ -502,7 +504,7 @@ static void truncate_node(struct dnode_of_data *dn) | |||
502 | 504 | ||
503 | /* Deallocate node address */ | 505 | /* Deallocate node address */ |
504 | invalidate_blocks(sbi, ni.blk_addr); | 506 | invalidate_blocks(sbi, ni.blk_addr); |
505 | dec_valid_node_count(sbi, dn->inode, 1); | 507 | dec_valid_node_count(sbi, dn->inode); |
506 | set_node_addr(sbi, &ni, NULL_ADDR); | 508 | set_node_addr(sbi, &ni, NULL_ADDR); |
507 | 509 | ||
508 | if (dn->nid == dn->inode->i_ino) { | 510 | if (dn->nid == dn->inode->i_ino) { |
@@ -516,6 +518,10 @@ invalidate: | |||
516 | F2FS_SET_SB_DIRT(sbi); | 518 | F2FS_SET_SB_DIRT(sbi); |
517 | 519 | ||
518 | f2fs_put_page(dn->node_page, 1); | 520 | f2fs_put_page(dn->node_page, 1); |
521 | |||
522 | invalidate_mapping_pages(NODE_MAPPING(sbi), | ||
523 | dn->node_page->index, dn->node_page->index); | ||
524 | |||
519 | dn->node_page = NULL; | 525 | dn->node_page = NULL; |
520 | trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); | 526 | trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); |
521 | } | 527 | } |
@@ -631,19 +637,19 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, | |||
631 | return 0; | 637 | return 0; |
632 | 638 | ||
633 | /* get indirect nodes in the path */ | 639 | /* get indirect nodes in the path */ |
634 | for (i = 0; i < depth - 1; i++) { | 640 | for (i = 0; i < idx + 1; i++) { |
635 | /* refernece count'll be increased */ | 641 | /* refernece count'll be increased */ |
636 | pages[i] = get_node_page(sbi, nid[i]); | 642 | pages[i] = get_node_page(sbi, nid[i]); |
637 | if (IS_ERR(pages[i])) { | 643 | if (IS_ERR(pages[i])) { |
638 | depth = i + 1; | ||
639 | err = PTR_ERR(pages[i]); | 644 | err = PTR_ERR(pages[i]); |
645 | idx = i - 1; | ||
640 | goto fail; | 646 | goto fail; |
641 | } | 647 | } |
642 | nid[i + 1] = get_nid(pages[i], offset[i + 1], false); | 648 | nid[i + 1] = get_nid(pages[i], offset[i + 1], false); |
643 | } | 649 | } |
644 | 650 | ||
645 | /* free direct nodes linked to a partial indirect node */ | 651 | /* free direct nodes linked to a partial indirect node */ |
646 | for (i = offset[depth - 1]; i < NIDS_PER_BLOCK; i++) { | 652 | for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) { |
647 | child_nid = get_nid(pages[idx], i, false); | 653 | child_nid = get_nid(pages[idx], i, false); |
648 | if (!child_nid) | 654 | if (!child_nid) |
649 | continue; | 655 | continue; |
@@ -654,7 +660,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, | |||
654 | set_nid(pages[idx], i, 0, false); | 660 | set_nid(pages[idx], i, 0, false); |
655 | } | 661 | } |
656 | 662 | ||
657 | if (offset[depth - 1] == 0) { | 663 | if (offset[idx + 1] == 0) { |
658 | dn->node_page = pages[idx]; | 664 | dn->node_page = pages[idx]; |
659 | dn->nid = nid[idx]; | 665 | dn->nid = nid[idx]; |
660 | truncate_node(dn); | 666 | truncate_node(dn); |
@@ -662,9 +668,10 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, | |||
662 | f2fs_put_page(pages[idx], 1); | 668 | f2fs_put_page(pages[idx], 1); |
663 | } | 669 | } |
664 | offset[idx]++; | 670 | offset[idx]++; |
665 | offset[depth - 1] = 0; | 671 | offset[idx + 1] = 0; |
672 | idx--; | ||
666 | fail: | 673 | fail: |
667 | for (i = depth - 3; i >= 0; i--) | 674 | for (i = idx; i >= 0; i--) |
668 | f2fs_put_page(pages[i], 1); | 675 | f2fs_put_page(pages[i], 1); |
669 | 676 | ||
670 | trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err); | 677 | trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err); |
@@ -678,11 +685,10 @@ fail: | |||
678 | int truncate_inode_blocks(struct inode *inode, pgoff_t from) | 685 | int truncate_inode_blocks(struct inode *inode, pgoff_t from) |
679 | { | 686 | { |
680 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 687 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
681 | struct address_space *node_mapping = sbi->node_inode->i_mapping; | ||
682 | int err = 0, cont = 1; | 688 | int err = 0, cont = 1; |
683 | int level, offset[4], noffset[4]; | 689 | int level, offset[4], noffset[4]; |
684 | unsigned int nofs = 0; | 690 | unsigned int nofs = 0; |
685 | struct f2fs_node *rn; | 691 | struct f2fs_inode *ri; |
686 | struct dnode_of_data dn; | 692 | struct dnode_of_data dn; |
687 | struct page *page; | 693 | struct page *page; |
688 | 694 | ||
@@ -699,7 +705,7 @@ restart: | |||
699 | set_new_dnode(&dn, inode, page, NULL, 0); | 705 | set_new_dnode(&dn, inode, page, NULL, 0); |
700 | unlock_page(page); | 706 | unlock_page(page); |
701 | 707 | ||
702 | rn = F2FS_NODE(page); | 708 | ri = F2FS_INODE(page); |
703 | switch (level) { | 709 | switch (level) { |
704 | case 0: | 710 | case 0: |
705 | case 1: | 711 | case 1: |
@@ -709,7 +715,7 @@ restart: | |||
709 | nofs = noffset[1]; | 715 | nofs = noffset[1]; |
710 | if (!offset[level - 1]) | 716 | if (!offset[level - 1]) |
711 | goto skip_partial; | 717 | goto skip_partial; |
712 | err = truncate_partial_nodes(&dn, &rn->i, offset, level); | 718 | err = truncate_partial_nodes(&dn, ri, offset, level); |
713 | if (err < 0 && err != -ENOENT) | 719 | if (err < 0 && err != -ENOENT) |
714 | goto fail; | 720 | goto fail; |
715 | nofs += 1 + NIDS_PER_BLOCK; | 721 | nofs += 1 + NIDS_PER_BLOCK; |
@@ -718,7 +724,7 @@ restart: | |||
718 | nofs = 5 + 2 * NIDS_PER_BLOCK; | 724 | nofs = 5 + 2 * NIDS_PER_BLOCK; |
719 | if (!offset[level - 1]) | 725 | if (!offset[level - 1]) |
720 | goto skip_partial; | 726 | goto skip_partial; |
721 | err = truncate_partial_nodes(&dn, &rn->i, offset, level); | 727 | err = truncate_partial_nodes(&dn, ri, offset, level); |
722 | if (err < 0 && err != -ENOENT) | 728 | if (err < 0 && err != -ENOENT) |
723 | goto fail; | 729 | goto fail; |
724 | break; | 730 | break; |
@@ -728,7 +734,7 @@ restart: | |||
728 | 734 | ||
729 | skip_partial: | 735 | skip_partial: |
730 | while (cont) { | 736 | while (cont) { |
731 | dn.nid = le32_to_cpu(rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]); | 737 | dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]); |
732 | switch (offset[0]) { | 738 | switch (offset[0]) { |
733 | case NODE_DIR1_BLOCK: | 739 | case NODE_DIR1_BLOCK: |
734 | case NODE_DIR2_BLOCK: | 740 | case NODE_DIR2_BLOCK: |
@@ -751,14 +757,14 @@ skip_partial: | |||
751 | if (err < 0 && err != -ENOENT) | 757 | if (err < 0 && err != -ENOENT) |
752 | goto fail; | 758 | goto fail; |
753 | if (offset[1] == 0 && | 759 | if (offset[1] == 0 && |
754 | rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) { | 760 | ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) { |
755 | lock_page(page); | 761 | lock_page(page); |
756 | if (page->mapping != node_mapping) { | 762 | if (unlikely(page->mapping != NODE_MAPPING(sbi))) { |
757 | f2fs_put_page(page, 1); | 763 | f2fs_put_page(page, 1); |
758 | goto restart; | 764 | goto restart; |
759 | } | 765 | } |
760 | wait_on_page_writeback(page); | 766 | wait_on_page_writeback(page); |
761 | rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; | 767 | ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; |
762 | set_page_dirty(page); | 768 | set_page_dirty(page); |
763 | unlock_page(page); | 769 | unlock_page(page); |
764 | } | 770 | } |
@@ -794,38 +800,34 @@ int truncate_xattr_node(struct inode *inode, struct page *page) | |||
794 | set_new_dnode(&dn, inode, page, npage, nid); | 800 | set_new_dnode(&dn, inode, page, npage, nid); |
795 | 801 | ||
796 | if (page) | 802 | if (page) |
797 | dn.inode_page_locked = 1; | 803 | dn.inode_page_locked = true; |
798 | truncate_node(&dn); | 804 | truncate_node(&dn); |
799 | return 0; | 805 | return 0; |
800 | } | 806 | } |
801 | 807 | ||
802 | /* | 808 | /* |
803 | * Caller should grab and release a mutex by calling mutex_lock_op() and | 809 | * Caller should grab and release a rwsem by calling f2fs_lock_op() and |
804 | * mutex_unlock_op(). | 810 | * f2fs_unlock_op(). |
805 | */ | 811 | */ |
806 | int remove_inode_page(struct inode *inode) | 812 | void remove_inode_page(struct inode *inode) |
807 | { | 813 | { |
808 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 814 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
809 | struct page *page; | 815 | struct page *page; |
810 | nid_t ino = inode->i_ino; | 816 | nid_t ino = inode->i_ino; |
811 | struct dnode_of_data dn; | 817 | struct dnode_of_data dn; |
812 | int err; | ||
813 | 818 | ||
814 | page = get_node_page(sbi, ino); | 819 | page = get_node_page(sbi, ino); |
815 | if (IS_ERR(page)) | 820 | if (IS_ERR(page)) |
816 | return PTR_ERR(page); | 821 | return; |
817 | 822 | ||
818 | err = truncate_xattr_node(inode, page); | 823 | if (truncate_xattr_node(inode, page)) { |
819 | if (err) { | ||
820 | f2fs_put_page(page, 1); | 824 | f2fs_put_page(page, 1); |
821 | return err; | 825 | return; |
822 | } | 826 | } |
823 | |||
824 | /* 0 is possible, after f2fs_new_inode() is failed */ | 827 | /* 0 is possible, after f2fs_new_inode() is failed */ |
825 | f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1); | 828 | f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1); |
826 | set_new_dnode(&dn, inode, page, page, ino); | 829 | set_new_dnode(&dn, inode, page, page, ino); |
827 | truncate_node(&dn); | 830 | truncate_node(&dn); |
828 | return 0; | ||
829 | } | 831 | } |
830 | 832 | ||
831 | struct page *new_inode_page(struct inode *inode, const struct qstr *name) | 833 | struct page *new_inode_page(struct inode *inode, const struct qstr *name) |
@@ -843,19 +845,18 @@ struct page *new_node_page(struct dnode_of_data *dn, | |||
843 | unsigned int ofs, struct page *ipage) | 845 | unsigned int ofs, struct page *ipage) |
844 | { | 846 | { |
845 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | 847 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); |
846 | struct address_space *mapping = sbi->node_inode->i_mapping; | ||
847 | struct node_info old_ni, new_ni; | 848 | struct node_info old_ni, new_ni; |
848 | struct page *page; | 849 | struct page *page; |
849 | int err; | 850 | int err; |
850 | 851 | ||
851 | if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) | 852 | if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) |
852 | return ERR_PTR(-EPERM); | 853 | return ERR_PTR(-EPERM); |
853 | 854 | ||
854 | page = grab_cache_page(mapping, dn->nid); | 855 | page = grab_cache_page(NODE_MAPPING(sbi), dn->nid); |
855 | if (!page) | 856 | if (!page) |
856 | return ERR_PTR(-ENOMEM); | 857 | return ERR_PTR(-ENOMEM); |
857 | 858 | ||
858 | if (!inc_valid_node_count(sbi, dn->inode, 1)) { | 859 | if (unlikely(!inc_valid_node_count(sbi, dn->inode))) { |
859 | err = -ENOSPC; | 860 | err = -ENOSPC; |
860 | goto fail; | 861 | goto fail; |
861 | } | 862 | } |
@@ -898,14 +899,14 @@ fail: | |||
898 | * LOCKED_PAGE: f2fs_put_page(page, 1) | 899 | * LOCKED_PAGE: f2fs_put_page(page, 1) |
899 | * error: nothing | 900 | * error: nothing |
900 | */ | 901 | */ |
901 | static int read_node_page(struct page *page, int type) | 902 | static int read_node_page(struct page *page, int rw) |
902 | { | 903 | { |
903 | struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); | 904 | struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); |
904 | struct node_info ni; | 905 | struct node_info ni; |
905 | 906 | ||
906 | get_node_info(sbi, page->index, &ni); | 907 | get_node_info(sbi, page->index, &ni); |
907 | 908 | ||
908 | if (ni.blk_addr == NULL_ADDR) { | 909 | if (unlikely(ni.blk_addr == NULL_ADDR)) { |
909 | f2fs_put_page(page, 1); | 910 | f2fs_put_page(page, 1); |
910 | return -ENOENT; | 911 | return -ENOENT; |
911 | } | 912 | } |
@@ -913,7 +914,7 @@ static int read_node_page(struct page *page, int type) | |||
913 | if (PageUptodate(page)) | 914 | if (PageUptodate(page)) |
914 | return LOCKED_PAGE; | 915 | return LOCKED_PAGE; |
915 | 916 | ||
916 | return f2fs_readpage(sbi, page, ni.blk_addr, type); | 917 | return f2fs_submit_page_bio(sbi, page, ni.blk_addr, rw); |
917 | } | 918 | } |
918 | 919 | ||
919 | /* | 920 | /* |
@@ -921,18 +922,17 @@ static int read_node_page(struct page *page, int type) | |||
921 | */ | 922 | */ |
922 | void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) | 923 | void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) |
923 | { | 924 | { |
924 | struct address_space *mapping = sbi->node_inode->i_mapping; | ||
925 | struct page *apage; | 925 | struct page *apage; |
926 | int err; | 926 | int err; |
927 | 927 | ||
928 | apage = find_get_page(mapping, nid); | 928 | apage = find_get_page(NODE_MAPPING(sbi), nid); |
929 | if (apage && PageUptodate(apage)) { | 929 | if (apage && PageUptodate(apage)) { |
930 | f2fs_put_page(apage, 0); | 930 | f2fs_put_page(apage, 0); |
931 | return; | 931 | return; |
932 | } | 932 | } |
933 | f2fs_put_page(apage, 0); | 933 | f2fs_put_page(apage, 0); |
934 | 934 | ||
935 | apage = grab_cache_page(mapping, nid); | 935 | apage = grab_cache_page(NODE_MAPPING(sbi), nid); |
936 | if (!apage) | 936 | if (!apage) |
937 | return; | 937 | return; |
938 | 938 | ||
@@ -945,11 +945,10 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) | |||
945 | 945 | ||
946 | struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) | 946 | struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) |
947 | { | 947 | { |
948 | struct address_space *mapping = sbi->node_inode->i_mapping; | ||
949 | struct page *page; | 948 | struct page *page; |
950 | int err; | 949 | int err; |
951 | repeat: | 950 | repeat: |
952 | page = grab_cache_page(mapping, nid); | 951 | page = grab_cache_page(NODE_MAPPING(sbi), nid); |
953 | if (!page) | 952 | if (!page) |
954 | return ERR_PTR(-ENOMEM); | 953 | return ERR_PTR(-ENOMEM); |
955 | 954 | ||
@@ -960,11 +959,11 @@ repeat: | |||
960 | goto got_it; | 959 | goto got_it; |
961 | 960 | ||
962 | lock_page(page); | 961 | lock_page(page); |
963 | if (!PageUptodate(page)) { | 962 | if (unlikely(!PageUptodate(page))) { |
964 | f2fs_put_page(page, 1); | 963 | f2fs_put_page(page, 1); |
965 | return ERR_PTR(-EIO); | 964 | return ERR_PTR(-EIO); |
966 | } | 965 | } |
967 | if (page->mapping != mapping) { | 966 | if (unlikely(page->mapping != NODE_MAPPING(sbi))) { |
968 | f2fs_put_page(page, 1); | 967 | f2fs_put_page(page, 1); |
969 | goto repeat; | 968 | goto repeat; |
970 | } | 969 | } |
@@ -981,7 +980,6 @@ got_it: | |||
981 | struct page *get_node_page_ra(struct page *parent, int start) | 980 | struct page *get_node_page_ra(struct page *parent, int start) |
982 | { | 981 | { |
983 | struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); | 982 | struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); |
984 | struct address_space *mapping = sbi->node_inode->i_mapping; | ||
985 | struct blk_plug plug; | 983 | struct blk_plug plug; |
986 | struct page *page; | 984 | struct page *page; |
987 | int err, i, end; | 985 | int err, i, end; |
@@ -992,7 +990,7 @@ struct page *get_node_page_ra(struct page *parent, int start) | |||
992 | if (!nid) | 990 | if (!nid) |
993 | return ERR_PTR(-ENOENT); | 991 | return ERR_PTR(-ENOENT); |
994 | repeat: | 992 | repeat: |
995 | page = grab_cache_page(mapping, nid); | 993 | page = grab_cache_page(NODE_MAPPING(sbi), nid); |
996 | if (!page) | 994 | if (!page) |
997 | return ERR_PTR(-ENOMEM); | 995 | return ERR_PTR(-ENOMEM); |
998 | 996 | ||
@@ -1017,12 +1015,12 @@ repeat: | |||
1017 | blk_finish_plug(&plug); | 1015 | blk_finish_plug(&plug); |
1018 | 1016 | ||
1019 | lock_page(page); | 1017 | lock_page(page); |
1020 | if (page->mapping != mapping) { | 1018 | if (unlikely(page->mapping != NODE_MAPPING(sbi))) { |
1021 | f2fs_put_page(page, 1); | 1019 | f2fs_put_page(page, 1); |
1022 | goto repeat; | 1020 | goto repeat; |
1023 | } | 1021 | } |
1024 | page_hit: | 1022 | page_hit: |
1025 | if (!PageUptodate(page)) { | 1023 | if (unlikely(!PageUptodate(page))) { |
1026 | f2fs_put_page(page, 1); | 1024 | f2fs_put_page(page, 1); |
1027 | return ERR_PTR(-EIO); | 1025 | return ERR_PTR(-EIO); |
1028 | } | 1026 | } |
@@ -1048,7 +1046,6 @@ void sync_inode_page(struct dnode_of_data *dn) | |||
1048 | int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, | 1046 | int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, |
1049 | struct writeback_control *wbc) | 1047 | struct writeback_control *wbc) |
1050 | { | 1048 | { |
1051 | struct address_space *mapping = sbi->node_inode->i_mapping; | ||
1052 | pgoff_t index, end; | 1049 | pgoff_t index, end; |
1053 | struct pagevec pvec; | 1050 | struct pagevec pvec; |
1054 | int step = ino ? 2 : 0; | 1051 | int step = ino ? 2 : 0; |
@@ -1062,7 +1059,7 @@ next_step: | |||
1062 | 1059 | ||
1063 | while (index <= end) { | 1060 | while (index <= end) { |
1064 | int i, nr_pages; | 1061 | int i, nr_pages; |
1065 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 1062 | nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, |
1066 | PAGECACHE_TAG_DIRTY, | 1063 | PAGECACHE_TAG_DIRTY, |
1067 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 1064 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
1068 | if (nr_pages == 0) | 1065 | if (nr_pages == 0) |
@@ -1095,7 +1092,7 @@ next_step: | |||
1095 | else if (!trylock_page(page)) | 1092 | else if (!trylock_page(page)) |
1096 | continue; | 1093 | continue; |
1097 | 1094 | ||
1098 | if (unlikely(page->mapping != mapping)) { | 1095 | if (unlikely(page->mapping != NODE_MAPPING(sbi))) { |
1099 | continue_unlock: | 1096 | continue_unlock: |
1100 | unlock_page(page); | 1097 | unlock_page(page); |
1101 | continue; | 1098 | continue; |
@@ -1122,7 +1119,7 @@ continue_unlock: | |||
1122 | set_fsync_mark(page, 0); | 1119 | set_fsync_mark(page, 0); |
1123 | set_dentry_mark(page, 0); | 1120 | set_dentry_mark(page, 0); |
1124 | } | 1121 | } |
1125 | mapping->a_ops->writepage(page, wbc); | 1122 | NODE_MAPPING(sbi)->a_ops->writepage(page, wbc); |
1126 | wrote++; | 1123 | wrote++; |
1127 | 1124 | ||
1128 | if (--wbc->nr_to_write == 0) | 1125 | if (--wbc->nr_to_write == 0) |
@@ -1143,31 +1140,31 @@ continue_unlock: | |||
1143 | } | 1140 | } |
1144 | 1141 | ||
1145 | if (wrote) | 1142 | if (wrote) |
1146 | f2fs_submit_bio(sbi, NODE, wbc->sync_mode == WB_SYNC_ALL); | 1143 | f2fs_submit_merged_bio(sbi, NODE, WRITE); |
1147 | |||
1148 | return nwritten; | 1144 | return nwritten; |
1149 | } | 1145 | } |
1150 | 1146 | ||
1151 | int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) | 1147 | int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) |
1152 | { | 1148 | { |
1153 | struct address_space *mapping = sbi->node_inode->i_mapping; | ||
1154 | pgoff_t index = 0, end = LONG_MAX; | 1149 | pgoff_t index = 0, end = LONG_MAX; |
1155 | struct pagevec pvec; | 1150 | struct pagevec pvec; |
1156 | int nr_pages; | ||
1157 | int ret2 = 0, ret = 0; | 1151 | int ret2 = 0, ret = 0; |
1158 | 1152 | ||
1159 | pagevec_init(&pvec, 0); | 1153 | pagevec_init(&pvec, 0); |
1160 | while ((index <= end) && | 1154 | |
1161 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 1155 | while (index <= end) { |
1162 | PAGECACHE_TAG_WRITEBACK, | 1156 | int i, nr_pages; |
1163 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) { | 1157 | nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, |
1164 | unsigned i; | 1158 | PAGECACHE_TAG_WRITEBACK, |
1159 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | ||
1160 | if (nr_pages == 0) | ||
1161 | break; | ||
1165 | 1162 | ||
1166 | for (i = 0; i < nr_pages; i++) { | 1163 | for (i = 0; i < nr_pages; i++) { |
1167 | struct page *page = pvec.pages[i]; | 1164 | struct page *page = pvec.pages[i]; |
1168 | 1165 | ||
1169 | /* until radix tree lookup accepts end_index */ | 1166 | /* until radix tree lookup accepts end_index */ |
1170 | if (page->index > end) | 1167 | if (unlikely(page->index > end)) |
1171 | continue; | 1168 | continue; |
1172 | 1169 | ||
1173 | if (ino && ino_of_node(page) == ino) { | 1170 | if (ino && ino_of_node(page) == ino) { |
@@ -1180,9 +1177,9 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) | |||
1180 | cond_resched(); | 1177 | cond_resched(); |
1181 | } | 1178 | } |
1182 | 1179 | ||
1183 | if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) | 1180 | if (unlikely(test_and_clear_bit(AS_ENOSPC, &NODE_MAPPING(sbi)->flags))) |
1184 | ret2 = -ENOSPC; | 1181 | ret2 = -ENOSPC; |
1185 | if (test_and_clear_bit(AS_EIO, &mapping->flags)) | 1182 | if (unlikely(test_and_clear_bit(AS_EIO, &NODE_MAPPING(sbi)->flags))) |
1186 | ret2 = -EIO; | 1183 | ret2 = -EIO; |
1187 | if (!ret) | 1184 | if (!ret) |
1188 | ret = ret2; | 1185 | ret = ret2; |
@@ -1196,8 +1193,12 @@ static int f2fs_write_node_page(struct page *page, | |||
1196 | nid_t nid; | 1193 | nid_t nid; |
1197 | block_t new_addr; | 1194 | block_t new_addr; |
1198 | struct node_info ni; | 1195 | struct node_info ni; |
1196 | struct f2fs_io_info fio = { | ||
1197 | .type = NODE, | ||
1198 | .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, | ||
1199 | }; | ||
1199 | 1200 | ||
1200 | if (sbi->por_doing) | 1201 | if (unlikely(sbi->por_doing)) |
1201 | goto redirty_out; | 1202 | goto redirty_out; |
1202 | 1203 | ||
1203 | wait_on_page_writeback(page); | 1204 | wait_on_page_writeback(page); |
@@ -1209,7 +1210,7 @@ static int f2fs_write_node_page(struct page *page, | |||
1209 | get_node_info(sbi, nid, &ni); | 1210 | get_node_info(sbi, nid, &ni); |
1210 | 1211 | ||
1211 | /* This page is already truncated */ | 1212 | /* This page is already truncated */ |
1212 | if (ni.blk_addr == NULL_ADDR) { | 1213 | if (unlikely(ni.blk_addr == NULL_ADDR)) { |
1213 | dec_page_count(sbi, F2FS_DIRTY_NODES); | 1214 | dec_page_count(sbi, F2FS_DIRTY_NODES); |
1214 | unlock_page(page); | 1215 | unlock_page(page); |
1215 | return 0; | 1216 | return 0; |
@@ -1220,7 +1221,7 @@ static int f2fs_write_node_page(struct page *page, | |||
1220 | 1221 | ||
1221 | mutex_lock(&sbi->node_write); | 1222 | mutex_lock(&sbi->node_write); |
1222 | set_page_writeback(page); | 1223 | set_page_writeback(page); |
1223 | write_node_page(sbi, page, nid, ni.blk_addr, &new_addr); | 1224 | write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); |
1224 | set_node_addr(sbi, &ni, new_addr); | 1225 | set_node_addr(sbi, &ni, new_addr); |
1225 | dec_page_count(sbi, F2FS_DIRTY_NODES); | 1226 | dec_page_count(sbi, F2FS_DIRTY_NODES); |
1226 | mutex_unlock(&sbi->node_write); | 1227 | mutex_unlock(&sbi->node_write); |
@@ -1255,6 +1256,7 @@ static int f2fs_write_node_pages(struct address_space *mapping, | |||
1255 | 1256 | ||
1256 | /* if mounting is failed, skip writing node pages */ | 1257 | /* if mounting is failed, skip writing node pages */ |
1257 | wbc->nr_to_write = 3 * max_hw_blocks(sbi); | 1258 | wbc->nr_to_write = 3 * max_hw_blocks(sbi); |
1259 | wbc->sync_mode = WB_SYNC_NONE; | ||
1258 | sync_node_pages(sbi, 0, wbc); | 1260 | sync_node_pages(sbi, 0, wbc); |
1259 | wbc->nr_to_write = nr_to_write - (3 * max_hw_blocks(sbi) - | 1261 | wbc->nr_to_write = nr_to_write - (3 * max_hw_blocks(sbi) - |
1260 | wbc->nr_to_write); | 1262 | wbc->nr_to_write); |
@@ -1333,7 +1335,7 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) | |||
1333 | return -1; | 1335 | return -1; |
1334 | 1336 | ||
1335 | /* 0 nid should not be used */ | 1337 | /* 0 nid should not be used */ |
1336 | if (nid == 0) | 1338 | if (unlikely(nid == 0)) |
1337 | return 0; | 1339 | return 0; |
1338 | 1340 | ||
1339 | if (build) { | 1341 | if (build) { |
@@ -1386,7 +1388,7 @@ static void scan_nat_page(struct f2fs_nm_info *nm_i, | |||
1386 | 1388 | ||
1387 | for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { | 1389 | for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { |
1388 | 1390 | ||
1389 | if (start_nid >= nm_i->max_nid) | 1391 | if (unlikely(start_nid >= nm_i->max_nid)) |
1390 | break; | 1392 | break; |
1391 | 1393 | ||
1392 | blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); | 1394 | blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); |
@@ -1420,7 +1422,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi) | |||
1420 | f2fs_put_page(page, 1); | 1422 | f2fs_put_page(page, 1); |
1421 | 1423 | ||
1422 | nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); | 1424 | nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); |
1423 | if (nid >= nm_i->max_nid) | 1425 | if (unlikely(nid >= nm_i->max_nid)) |
1424 | nid = 0; | 1426 | nid = 0; |
1425 | 1427 | ||
1426 | if (i++ == FREE_NID_PAGES) | 1428 | if (i++ == FREE_NID_PAGES) |
@@ -1454,7 +1456,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) | |||
1454 | struct free_nid *i = NULL; | 1456 | struct free_nid *i = NULL; |
1455 | struct list_head *this; | 1457 | struct list_head *this; |
1456 | retry: | 1458 | retry: |
1457 | if (sbi->total_valid_node_count + 1 >= nm_i->max_nid) | 1459 | if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid)) |
1458 | return false; | 1460 | return false; |
1459 | 1461 | ||
1460 | spin_lock(&nm_i->free_nid_list_lock); | 1462 | spin_lock(&nm_i->free_nid_list_lock); |
@@ -1535,13 +1537,12 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, | |||
1535 | 1537 | ||
1536 | int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) | 1538 | int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) |
1537 | { | 1539 | { |
1538 | struct address_space *mapping = sbi->node_inode->i_mapping; | 1540 | struct f2fs_inode *src, *dst; |
1539 | struct f2fs_node *src, *dst; | ||
1540 | nid_t ino = ino_of_node(page); | 1541 | nid_t ino = ino_of_node(page); |
1541 | struct node_info old_ni, new_ni; | 1542 | struct node_info old_ni, new_ni; |
1542 | struct page *ipage; | 1543 | struct page *ipage; |
1543 | 1544 | ||
1544 | ipage = grab_cache_page(mapping, ino); | 1545 | ipage = grab_cache_page(NODE_MAPPING(sbi), ino); |
1545 | if (!ipage) | 1546 | if (!ipage) |
1546 | return -ENOMEM; | 1547 | return -ENOMEM; |
1547 | 1548 | ||
@@ -1552,19 +1553,19 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) | |||
1552 | SetPageUptodate(ipage); | 1553 | SetPageUptodate(ipage); |
1553 | fill_node_footer(ipage, ino, ino, 0, true); | 1554 | fill_node_footer(ipage, ino, ino, 0, true); |
1554 | 1555 | ||
1555 | src = F2FS_NODE(page); | 1556 | src = F2FS_INODE(page); |
1556 | dst = F2FS_NODE(ipage); | 1557 | dst = F2FS_INODE(ipage); |
1557 | 1558 | ||
1558 | memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i); | 1559 | memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src); |
1559 | dst->i.i_size = 0; | 1560 | dst->i_size = 0; |
1560 | dst->i.i_blocks = cpu_to_le64(1); | 1561 | dst->i_blocks = cpu_to_le64(1); |
1561 | dst->i.i_links = cpu_to_le32(1); | 1562 | dst->i_links = cpu_to_le32(1); |
1562 | dst->i.i_xattr_nid = 0; | 1563 | dst->i_xattr_nid = 0; |
1563 | 1564 | ||
1564 | new_ni = old_ni; | 1565 | new_ni = old_ni; |
1565 | new_ni.ino = ino; | 1566 | new_ni.ino = ino; |
1566 | 1567 | ||
1567 | if (!inc_valid_node_count(sbi, NULL, 1)) | 1568 | if (unlikely(!inc_valid_node_count(sbi, NULL))) |
1568 | WARN_ON(1); | 1569 | WARN_ON(1); |
1569 | set_node_addr(sbi, &new_ni, NEW_ADDR); | 1570 | set_node_addr(sbi, &new_ni, NEW_ADDR); |
1570 | inc_valid_inode_count(sbi); | 1571 | inc_valid_inode_count(sbi); |
@@ -1572,47 +1573,88 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) | |||
1572 | return 0; | 1573 | return 0; |
1573 | } | 1574 | } |
1574 | 1575 | ||
1576 | /* | ||
1577 | * ra_sum_pages() merge contiguous pages into one bio and submit. | ||
1578 | * these pre-readed pages are linked in pages list. | ||
1579 | */ | ||
1580 | static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages, | ||
1581 | int start, int nrpages) | ||
1582 | { | ||
1583 | struct page *page; | ||
1584 | int page_idx = start; | ||
1585 | struct f2fs_io_info fio = { | ||
1586 | .type = META, | ||
1587 | .rw = READ_SYNC | REQ_META | REQ_PRIO | ||
1588 | }; | ||
1589 | |||
1590 | for (; page_idx < start + nrpages; page_idx++) { | ||
1591 | /* alloc temporal page for read node summary info*/ | ||
1592 | page = alloc_page(GFP_F2FS_ZERO); | ||
1593 | if (!page) { | ||
1594 | struct page *tmp; | ||
1595 | list_for_each_entry_safe(page, tmp, pages, lru) { | ||
1596 | list_del(&page->lru); | ||
1597 | unlock_page(page); | ||
1598 | __free_pages(page, 0); | ||
1599 | } | ||
1600 | return -ENOMEM; | ||
1601 | } | ||
1602 | |||
1603 | lock_page(page); | ||
1604 | page->index = page_idx; | ||
1605 | list_add_tail(&page->lru, pages); | ||
1606 | } | ||
1607 | |||
1608 | list_for_each_entry(page, pages, lru) | ||
1609 | f2fs_submit_page_mbio(sbi, page, page->index, &fio); | ||
1610 | |||
1611 | f2fs_submit_merged_bio(sbi, META, READ); | ||
1612 | return 0; | ||
1613 | } | ||
1614 | |||
1575 | int restore_node_summary(struct f2fs_sb_info *sbi, | 1615 | int restore_node_summary(struct f2fs_sb_info *sbi, |
1576 | unsigned int segno, struct f2fs_summary_block *sum) | 1616 | unsigned int segno, struct f2fs_summary_block *sum) |
1577 | { | 1617 | { |
1578 | struct f2fs_node *rn; | 1618 | struct f2fs_node *rn; |
1579 | struct f2fs_summary *sum_entry; | 1619 | struct f2fs_summary *sum_entry; |
1580 | struct page *page; | 1620 | struct page *page, *tmp; |
1581 | block_t addr; | 1621 | block_t addr; |
1582 | int i, last_offset; | 1622 | int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); |
1583 | 1623 | int i, last_offset, nrpages, err = 0; | |
1584 | /* alloc temporal page for read node */ | 1624 | LIST_HEAD(page_list); |
1585 | page = alloc_page(GFP_NOFS | __GFP_ZERO); | ||
1586 | if (!page) | ||
1587 | return -ENOMEM; | ||
1588 | lock_page(page); | ||
1589 | 1625 | ||
1590 | /* scan the node segment */ | 1626 | /* scan the node segment */ |
1591 | last_offset = sbi->blocks_per_seg; | 1627 | last_offset = sbi->blocks_per_seg; |
1592 | addr = START_BLOCK(sbi, segno); | 1628 | addr = START_BLOCK(sbi, segno); |
1593 | sum_entry = &sum->entries[0]; | 1629 | sum_entry = &sum->entries[0]; |
1594 | 1630 | ||
1595 | for (i = 0; i < last_offset; i++, sum_entry++) { | 1631 | for (i = 0; i < last_offset; i += nrpages, addr += nrpages) { |
1596 | /* | 1632 | nrpages = min(last_offset - i, bio_blocks); |
1597 | * In order to read next node page, | ||
1598 | * we must clear PageUptodate flag. | ||
1599 | */ | ||
1600 | ClearPageUptodate(page); | ||
1601 | 1633 | ||
1602 | if (f2fs_readpage(sbi, page, addr, READ_SYNC)) | 1634 | /* read ahead node pages */ |
1603 | goto out; | 1635 | err = ra_sum_pages(sbi, &page_list, addr, nrpages); |
1636 | if (err) | ||
1637 | return err; | ||
1604 | 1638 | ||
1605 | lock_page(page); | 1639 | list_for_each_entry_safe(page, tmp, &page_list, lru) { |
1606 | rn = F2FS_NODE(page); | 1640 | |
1607 | sum_entry->nid = rn->footer.nid; | 1641 | lock_page(page); |
1608 | sum_entry->version = 0; | 1642 | if (unlikely(!PageUptodate(page))) { |
1609 | sum_entry->ofs_in_node = 0; | 1643 | err = -EIO; |
1610 | addr++; | 1644 | } else { |
1645 | rn = F2FS_NODE(page); | ||
1646 | sum_entry->nid = rn->footer.nid; | ||
1647 | sum_entry->version = 0; | ||
1648 | sum_entry->ofs_in_node = 0; | ||
1649 | sum_entry++; | ||
1650 | } | ||
1651 | |||
1652 | list_del(&page->lru); | ||
1653 | unlock_page(page); | ||
1654 | __free_pages(page, 0); | ||
1655 | } | ||
1611 | } | 1656 | } |
1612 | unlock_page(page); | 1657 | return err; |
1613 | out: | ||
1614 | __free_pages(page, 0); | ||
1615 | return 0; | ||
1616 | } | 1658 | } |
1617 | 1659 | ||
1618 | static bool flush_nats_in_journal(struct f2fs_sb_info *sbi) | 1660 | static bool flush_nats_in_journal(struct f2fs_sb_info *sbi) |
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 3496bb3e15dc..c4c79885c993 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h | |||
@@ -224,7 +224,13 @@ static inline block_t next_blkaddr_of_node(struct page *node_page) | |||
224 | * | `- direct node (5 + N => 5 + 2N - 1) | 224 | * | `- direct node (5 + N => 5 + 2N - 1) |
225 | * `- double indirect node (5 + 2N) | 225 | * `- double indirect node (5 + 2N) |
226 | * `- indirect node (6 + 2N) | 226 | * `- indirect node (6 + 2N) |
227 | * `- direct node (x(N + 1)) | 227 | * `- direct node |
228 | * ...... | ||
229 | * `- indirect node ((6 + 2N) + x(N + 1)) | ||
230 | * `- direct node | ||
231 | * ...... | ||
232 | * `- indirect node ((6 + 2N) + (N - 1)(N + 1)) | ||
233 | * `- direct node | ||
228 | */ | 234 | */ |
229 | static inline bool IS_DNODE(struct page *node_page) | 235 | static inline bool IS_DNODE(struct page *node_page) |
230 | { | 236 | { |
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index fdc81161f254..976a7a934db5 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c | |||
@@ -40,8 +40,7 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, | |||
40 | 40 | ||
41 | static int recover_dentry(struct page *ipage, struct inode *inode) | 41 | static int recover_dentry(struct page *ipage, struct inode *inode) |
42 | { | 42 | { |
43 | struct f2fs_node *raw_node = F2FS_NODE(ipage); | 43 | struct f2fs_inode *raw_inode = F2FS_INODE(ipage); |
44 | struct f2fs_inode *raw_inode = &(raw_node->i); | ||
45 | nid_t pino = le32_to_cpu(raw_inode->i_pino); | 44 | nid_t pino = le32_to_cpu(raw_inode->i_pino); |
46 | struct f2fs_dir_entry *de; | 45 | struct f2fs_dir_entry *de; |
47 | struct qstr name; | 46 | struct qstr name; |
@@ -62,6 +61,12 @@ static int recover_dentry(struct page *ipage, struct inode *inode) | |||
62 | 61 | ||
63 | name.len = le32_to_cpu(raw_inode->i_namelen); | 62 | name.len = le32_to_cpu(raw_inode->i_namelen); |
64 | name.name = raw_inode->i_name; | 63 | name.name = raw_inode->i_name; |
64 | |||
65 | if (unlikely(name.len > F2FS_NAME_LEN)) { | ||
66 | WARN_ON(1); | ||
67 | err = -ENAMETOOLONG; | ||
68 | goto out; | ||
69 | } | ||
65 | retry: | 70 | retry: |
66 | de = f2fs_find_entry(dir, &name, &page); | 71 | de = f2fs_find_entry(dir, &name, &page); |
67 | if (de && inode->i_ino == le32_to_cpu(de->ino)) | 72 | if (de && inode->i_ino == le32_to_cpu(de->ino)) |
@@ -90,17 +95,16 @@ out_unmap_put: | |||
90 | kunmap(page); | 95 | kunmap(page); |
91 | f2fs_put_page(page, 0); | 96 | f2fs_put_page(page, 0); |
92 | out: | 97 | out: |
93 | f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: " | 98 | f2fs_msg(inode->i_sb, KERN_NOTICE, |
94 | "ino = %x, name = %s, dir = %lx, err = %d", | 99 | "%s: ino = %x, name = %s, dir = %lx, err = %d", |
95 | ino_of_node(ipage), raw_inode->i_name, | 100 | __func__, ino_of_node(ipage), raw_inode->i_name, |
96 | IS_ERR(dir) ? 0 : dir->i_ino, err); | 101 | IS_ERR(dir) ? 0 : dir->i_ino, err); |
97 | return err; | 102 | return err; |
98 | } | 103 | } |
99 | 104 | ||
100 | static int recover_inode(struct inode *inode, struct page *node_page) | 105 | static int recover_inode(struct inode *inode, struct page *node_page) |
101 | { | 106 | { |
102 | struct f2fs_node *raw_node = F2FS_NODE(node_page); | 107 | struct f2fs_inode *raw_inode = F2FS_INODE(node_page); |
103 | struct f2fs_inode *raw_inode = &(raw_node->i); | ||
104 | 108 | ||
105 | if (!IS_INODE(node_page)) | 109 | if (!IS_INODE(node_page)) |
106 | return 0; | 110 | return 0; |
@@ -143,9 +147,9 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) | |||
143 | while (1) { | 147 | while (1) { |
144 | struct fsync_inode_entry *entry; | 148 | struct fsync_inode_entry *entry; |
145 | 149 | ||
146 | err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); | 150 | err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC); |
147 | if (err) | 151 | if (err) |
148 | goto out; | 152 | return err; |
149 | 153 | ||
150 | lock_page(page); | 154 | lock_page(page); |
151 | 155 | ||
@@ -191,9 +195,10 @@ next: | |||
191 | /* check next segment */ | 195 | /* check next segment */ |
192 | blkaddr = next_blkaddr_of_node(page); | 196 | blkaddr = next_blkaddr_of_node(page); |
193 | } | 197 | } |
198 | |||
194 | unlock_page(page); | 199 | unlock_page(page); |
195 | out: | ||
196 | __free_pages(page, 0); | 200 | __free_pages(page, 0); |
201 | |||
197 | return err; | 202 | return err; |
198 | } | 203 | } |
199 | 204 | ||
@@ -293,6 +298,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
293 | struct node_info ni; | 298 | struct node_info ni; |
294 | int err = 0, recovered = 0; | 299 | int err = 0, recovered = 0; |
295 | 300 | ||
301 | if (recover_inline_data(inode, page)) | ||
302 | goto out; | ||
303 | |||
296 | start = start_bidx_of_node(ofs_of_node(page), fi); | 304 | start = start_bidx_of_node(ofs_of_node(page), fi); |
297 | if (IS_INODE(page)) | 305 | if (IS_INODE(page)) |
298 | end = start + ADDRS_PER_INODE(fi); | 306 | end = start + ADDRS_PER_INODE(fi); |
@@ -300,12 +308,13 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
300 | end = start + ADDRS_PER_BLOCK; | 308 | end = start + ADDRS_PER_BLOCK; |
301 | 309 | ||
302 | f2fs_lock_op(sbi); | 310 | f2fs_lock_op(sbi); |
311 | |||
303 | set_new_dnode(&dn, inode, NULL, NULL, 0); | 312 | set_new_dnode(&dn, inode, NULL, NULL, 0); |
304 | 313 | ||
305 | err = get_dnode_of_data(&dn, start, ALLOC_NODE); | 314 | err = get_dnode_of_data(&dn, start, ALLOC_NODE); |
306 | if (err) { | 315 | if (err) { |
307 | f2fs_unlock_op(sbi); | 316 | f2fs_unlock_op(sbi); |
308 | return err; | 317 | goto out; |
309 | } | 318 | } |
310 | 319 | ||
311 | wait_on_page_writeback(dn.node_page); | 320 | wait_on_page_writeback(dn.node_page); |
@@ -356,10 +365,10 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
356 | err: | 365 | err: |
357 | f2fs_put_dnode(&dn); | 366 | f2fs_put_dnode(&dn); |
358 | f2fs_unlock_op(sbi); | 367 | f2fs_unlock_op(sbi); |
359 | 368 | out: | |
360 | f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, " | 369 | f2fs_msg(sbi->sb, KERN_NOTICE, |
361 | "recovered_data = %d blocks, err = %d", | 370 | "recover_data: ino = %lx, recovered = %d blocks, err = %d", |
362 | inode->i_ino, recovered, err); | 371 | inode->i_ino, recovered, err); |
363 | return err; | 372 | return err; |
364 | } | 373 | } |
365 | 374 | ||
@@ -377,7 +386,7 @@ static int recover_data(struct f2fs_sb_info *sbi, | |||
377 | blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); | 386 | blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); |
378 | 387 | ||
379 | /* read node page */ | 388 | /* read node page */ |
380 | page = alloc_page(GFP_NOFS | __GFP_ZERO); | 389 | page = alloc_page(GFP_F2FS_ZERO); |
381 | if (!page) | 390 | if (!page) |
382 | return -ENOMEM; | 391 | return -ENOMEM; |
383 | 392 | ||
@@ -386,9 +395,9 @@ static int recover_data(struct f2fs_sb_info *sbi, | |||
386 | while (1) { | 395 | while (1) { |
387 | struct fsync_inode_entry *entry; | 396 | struct fsync_inode_entry *entry; |
388 | 397 | ||
389 | err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); | 398 | err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC); |
390 | if (err) | 399 | if (err) |
391 | goto out; | 400 | return err; |
392 | 401 | ||
393 | lock_page(page); | 402 | lock_page(page); |
394 | 403 | ||
@@ -412,8 +421,8 @@ next: | |||
412 | /* check next segment */ | 421 | /* check next segment */ |
413 | blkaddr = next_blkaddr_of_node(page); | 422 | blkaddr = next_blkaddr_of_node(page); |
414 | } | 423 | } |
424 | |||
415 | unlock_page(page); | 425 | unlock_page(page); |
416 | out: | ||
417 | __free_pages(page, 0); | 426 | __free_pages(page, 0); |
418 | 427 | ||
419 | if (!err) | 428 | if (!err) |
@@ -429,7 +438,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) | |||
429 | 438 | ||
430 | fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", | 439 | fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", |
431 | sizeof(struct fsync_inode_entry), NULL); | 440 | sizeof(struct fsync_inode_entry), NULL); |
432 | if (unlikely(!fsync_entry_slab)) | 441 | if (!fsync_entry_slab) |
433 | return -ENOMEM; | 442 | return -ENOMEM; |
434 | 443 | ||
435 | INIT_LIST_HEAD(&inode_list); | 444 | INIT_LIST_HEAD(&inode_list); |
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fa284d397199..7caac5f2ca9e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c | |||
@@ -14,12 +14,163 @@ | |||
14 | #include <linux/blkdev.h> | 14 | #include <linux/blkdev.h> |
15 | #include <linux/prefetch.h> | 15 | #include <linux/prefetch.h> |
16 | #include <linux/vmalloc.h> | 16 | #include <linux/vmalloc.h> |
17 | #include <linux/swap.h> | ||
17 | 18 | ||
18 | #include "f2fs.h" | 19 | #include "f2fs.h" |
19 | #include "segment.h" | 20 | #include "segment.h" |
20 | #include "node.h" | 21 | #include "node.h" |
21 | #include <trace/events/f2fs.h> | 22 | #include <trace/events/f2fs.h> |
22 | 23 | ||
24 | #define __reverse_ffz(x) __reverse_ffs(~(x)) | ||
25 | |||
26 | static struct kmem_cache *discard_entry_slab; | ||
27 | |||
28 | /* | ||
29 | * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since | ||
30 | * MSB and LSB are reversed in a byte by f2fs_set_bit. | ||
31 | */ | ||
32 | static inline unsigned long __reverse_ffs(unsigned long word) | ||
33 | { | ||
34 | int num = 0; | ||
35 | |||
36 | #if BITS_PER_LONG == 64 | ||
37 | if ((word & 0xffffffff) == 0) { | ||
38 | num += 32; | ||
39 | word >>= 32; | ||
40 | } | ||
41 | #endif | ||
42 | if ((word & 0xffff) == 0) { | ||
43 | num += 16; | ||
44 | word >>= 16; | ||
45 | } | ||
46 | if ((word & 0xff) == 0) { | ||
47 | num += 8; | ||
48 | word >>= 8; | ||
49 | } | ||
50 | if ((word & 0xf0) == 0) | ||
51 | num += 4; | ||
52 | else | ||
53 | word >>= 4; | ||
54 | if ((word & 0xc) == 0) | ||
55 | num += 2; | ||
56 | else | ||
57 | word >>= 2; | ||
58 | if ((word & 0x2) == 0) | ||
59 | num += 1; | ||
60 | return num; | ||
61 | } | ||
62 | |||
63 | /* | ||
64 | * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c becasue | ||
65 | * f2fs_set_bit makes MSB and LSB reversed in a byte. | ||
66 | * Example: | ||
67 | * LSB <--> MSB | ||
68 | * f2fs_set_bit(0, bitmap) => 0000 0001 | ||
69 | * f2fs_set_bit(7, bitmap) => 1000 0000 | ||
70 | */ | ||
71 | static unsigned long __find_rev_next_bit(const unsigned long *addr, | ||
72 | unsigned long size, unsigned long offset) | ||
73 | { | ||
74 | const unsigned long *p = addr + BIT_WORD(offset); | ||
75 | unsigned long result = offset & ~(BITS_PER_LONG - 1); | ||
76 | unsigned long tmp; | ||
77 | unsigned long mask, submask; | ||
78 | unsigned long quot, rest; | ||
79 | |||
80 | if (offset >= size) | ||
81 | return size; | ||
82 | |||
83 | size -= result; | ||
84 | offset %= BITS_PER_LONG; | ||
85 | if (!offset) | ||
86 | goto aligned; | ||
87 | |||
88 | tmp = *(p++); | ||
89 | quot = (offset >> 3) << 3; | ||
90 | rest = offset & 0x7; | ||
91 | mask = ~0UL << quot; | ||
92 | submask = (unsigned char)(0xff << rest) >> rest; | ||
93 | submask <<= quot; | ||
94 | mask &= submask; | ||
95 | tmp &= mask; | ||
96 | if (size < BITS_PER_LONG) | ||
97 | goto found_first; | ||
98 | if (tmp) | ||
99 | goto found_middle; | ||
100 | |||
101 | size -= BITS_PER_LONG; | ||
102 | result += BITS_PER_LONG; | ||
103 | aligned: | ||
104 | while (size & ~(BITS_PER_LONG-1)) { | ||
105 | tmp = *(p++); | ||
106 | if (tmp) | ||
107 | goto found_middle; | ||
108 | result += BITS_PER_LONG; | ||
109 | size -= BITS_PER_LONG; | ||
110 | } | ||
111 | if (!size) | ||
112 | return result; | ||
113 | tmp = *p; | ||
114 | found_first: | ||
115 | tmp &= (~0UL >> (BITS_PER_LONG - size)); | ||
116 | if (tmp == 0UL) /* Are any bits set? */ | ||
117 | return result + size; /* Nope. */ | ||
118 | found_middle: | ||
119 | return result + __reverse_ffs(tmp); | ||
120 | } | ||
121 | |||
122 | static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, | ||
123 | unsigned long size, unsigned long offset) | ||
124 | { | ||
125 | const unsigned long *p = addr + BIT_WORD(offset); | ||
126 | unsigned long result = offset & ~(BITS_PER_LONG - 1); | ||
127 | unsigned long tmp; | ||
128 | unsigned long mask, submask; | ||
129 | unsigned long quot, rest; | ||
130 | |||
131 | if (offset >= size) | ||
132 | return size; | ||
133 | |||
134 | size -= result; | ||
135 | offset %= BITS_PER_LONG; | ||
136 | if (!offset) | ||
137 | goto aligned; | ||
138 | |||
139 | tmp = *(p++); | ||
140 | quot = (offset >> 3) << 3; | ||
141 | rest = offset & 0x7; | ||
142 | mask = ~(~0UL << quot); | ||
143 | submask = (unsigned char)~((unsigned char)(0xff << rest) >> rest); | ||
144 | submask <<= quot; | ||
145 | mask += submask; | ||
146 | tmp |= mask; | ||
147 | if (size < BITS_PER_LONG) | ||
148 | goto found_first; | ||
149 | if (~tmp) | ||
150 | goto found_middle; | ||
151 | |||
152 | size -= BITS_PER_LONG; | ||
153 | result += BITS_PER_LONG; | ||
154 | aligned: | ||
155 | while (size & ~(BITS_PER_LONG - 1)) { | ||
156 | tmp = *(p++); | ||
157 | if (~tmp) | ||
158 | goto found_middle; | ||
159 | result += BITS_PER_LONG; | ||
160 | size -= BITS_PER_LONG; | ||
161 | } | ||
162 | if (!size) | ||
163 | return result; | ||
164 | tmp = *p; | ||
165 | |||
166 | found_first: | ||
167 | tmp |= ~0UL << size; | ||
168 | if (tmp == ~0UL) /* Are any bits zero? */ | ||
169 | return result + size; /* Nope. */ | ||
170 | found_middle: | ||
171 | return result + __reverse_ffz(tmp); | ||
172 | } | ||
173 | |||
23 | /* | 174 | /* |
24 | * This function balances dirty node and dentry pages. | 175 | * This function balances dirty node and dentry pages. |
25 | * In addition, it controls garbage collection. | 176 | * In addition, it controls garbage collection. |
@@ -116,6 +267,56 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) | |||
116 | mutex_unlock(&dirty_i->seglist_lock); | 267 | mutex_unlock(&dirty_i->seglist_lock); |
117 | } | 268 | } |
118 | 269 | ||
270 | static void f2fs_issue_discard(struct f2fs_sb_info *sbi, | ||
271 | block_t blkstart, block_t blklen) | ||
272 | { | ||
273 | sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart); | ||
274 | sector_t len = SECTOR_FROM_BLOCK(sbi, blklen); | ||
275 | blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); | ||
276 | trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); | ||
277 | } | ||
278 | |||
279 | static void add_discard_addrs(struct f2fs_sb_info *sbi, | ||
280 | unsigned int segno, struct seg_entry *se) | ||
281 | { | ||
282 | struct list_head *head = &SM_I(sbi)->discard_list; | ||
283 | struct discard_entry *new; | ||
284 | int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); | ||
285 | int max_blocks = sbi->blocks_per_seg; | ||
286 | unsigned long *cur_map = (unsigned long *)se->cur_valid_map; | ||
287 | unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; | ||
288 | unsigned long dmap[entries]; | ||
289 | unsigned int start = 0, end = -1; | ||
290 | int i; | ||
291 | |||
292 | if (!test_opt(sbi, DISCARD)) | ||
293 | return; | ||
294 | |||
295 | /* zero block will be discarded through the prefree list */ | ||
296 | if (!se->valid_blocks || se->valid_blocks == max_blocks) | ||
297 | return; | ||
298 | |||
299 | /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ | ||
300 | for (i = 0; i < entries; i++) | ||
301 | dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; | ||
302 | |||
303 | while (SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { | ||
304 | start = __find_rev_next_bit(dmap, max_blocks, end + 1); | ||
305 | if (start >= max_blocks) | ||
306 | break; | ||
307 | |||
308 | end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); | ||
309 | |||
310 | new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); | ||
311 | INIT_LIST_HEAD(&new->list); | ||
312 | new->blkaddr = START_BLOCK(sbi, segno) + start; | ||
313 | new->len = end - start; | ||
314 | |||
315 | list_add_tail(&new->list, head); | ||
316 | SM_I(sbi)->nr_discards += end - start; | ||
317 | } | ||
318 | } | ||
319 | |||
119 | /* | 320 | /* |
120 | * Should call clear_prefree_segments after checkpoint is done. | 321 | * Should call clear_prefree_segments after checkpoint is done. |
121 | */ | 322 | */ |
@@ -138,6 +339,9 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) | |||
138 | 339 | ||
139 | void clear_prefree_segments(struct f2fs_sb_info *sbi) | 340 | void clear_prefree_segments(struct f2fs_sb_info *sbi) |
140 | { | 341 | { |
342 | struct list_head *head = &(SM_I(sbi)->discard_list); | ||
343 | struct list_head *this, *next; | ||
344 | struct discard_entry *entry; | ||
141 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | 345 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); |
142 | unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; | 346 | unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; |
143 | unsigned int total_segs = TOTAL_SEGS(sbi); | 347 | unsigned int total_segs = TOTAL_SEGS(sbi); |
@@ -160,14 +364,19 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi) | |||
160 | if (!test_opt(sbi, DISCARD)) | 364 | if (!test_opt(sbi, DISCARD)) |
161 | continue; | 365 | continue; |
162 | 366 | ||
163 | blkdev_issue_discard(sbi->sb->s_bdev, | 367 | f2fs_issue_discard(sbi, START_BLOCK(sbi, start), |
164 | START_BLOCK(sbi, start) << | 368 | (end - start) << sbi->log_blocks_per_seg); |
165 | sbi->log_sectors_per_block, | ||
166 | (1 << (sbi->log_sectors_per_block + | ||
167 | sbi->log_blocks_per_seg)) * (end - start), | ||
168 | GFP_NOFS, 0); | ||
169 | } | 369 | } |
170 | mutex_unlock(&dirty_i->seglist_lock); | 370 | mutex_unlock(&dirty_i->seglist_lock); |
371 | |||
372 | /* send small discards */ | ||
373 | list_for_each_safe(this, next, head) { | ||
374 | entry = list_entry(this, struct discard_entry, list); | ||
375 | f2fs_issue_discard(sbi, entry->blkaddr, entry->len); | ||
376 | list_del(&entry->list); | ||
377 | SM_I(sbi)->nr_discards -= entry->len; | ||
378 | kmem_cache_free(discard_entry_slab, entry); | ||
379 | } | ||
171 | } | 380 | } |
172 | 381 | ||
173 | static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) | 382 | static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) |
@@ -459,13 +668,18 @@ static void __next_free_blkoff(struct f2fs_sb_info *sbi, | |||
459 | struct curseg_info *seg, block_t start) | 668 | struct curseg_info *seg, block_t start) |
460 | { | 669 | { |
461 | struct seg_entry *se = get_seg_entry(sbi, seg->segno); | 670 | struct seg_entry *se = get_seg_entry(sbi, seg->segno); |
462 | block_t ofs; | 671 | int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); |
463 | for (ofs = start; ofs < sbi->blocks_per_seg; ofs++) { | 672 | unsigned long target_map[entries]; |
464 | if (!f2fs_test_bit(ofs, se->ckpt_valid_map) | 673 | unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; |
465 | && !f2fs_test_bit(ofs, se->cur_valid_map)) | 674 | unsigned long *cur_map = (unsigned long *)se->cur_valid_map; |
466 | break; | 675 | int i, pos; |
467 | } | 676 | |
468 | seg->next_blkoff = ofs; | 677 | for (i = 0; i < entries; i++) |
678 | target_map[i] = ckpt_map[i] | cur_map[i]; | ||
679 | |||
680 | pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start); | ||
681 | |||
682 | seg->next_blkoff = pos; | ||
469 | } | 683 | } |
470 | 684 | ||
471 | /* | 685 | /* |
@@ -573,148 +787,6 @@ static const struct segment_allocation default_salloc_ops = { | |||
573 | .allocate_segment = allocate_segment_by_default, | 787 | .allocate_segment = allocate_segment_by_default, |
574 | }; | 788 | }; |
575 | 789 | ||
576 | static void f2fs_end_io_write(struct bio *bio, int err) | ||
577 | { | ||
578 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
579 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
580 | struct bio_private *p = bio->bi_private; | ||
581 | |||
582 | do { | ||
583 | struct page *page = bvec->bv_page; | ||
584 | |||
585 | if (--bvec >= bio->bi_io_vec) | ||
586 | prefetchw(&bvec->bv_page->flags); | ||
587 | if (!uptodate) { | ||
588 | SetPageError(page); | ||
589 | if (page->mapping) | ||
590 | set_bit(AS_EIO, &page->mapping->flags); | ||
591 | set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG); | ||
592 | p->sbi->sb->s_flags |= MS_RDONLY; | ||
593 | } | ||
594 | end_page_writeback(page); | ||
595 | dec_page_count(p->sbi, F2FS_WRITEBACK); | ||
596 | } while (bvec >= bio->bi_io_vec); | ||
597 | |||
598 | if (p->is_sync) | ||
599 | complete(p->wait); | ||
600 | |||
601 | if (!get_pages(p->sbi, F2FS_WRITEBACK) && | ||
602 | !list_empty(&p->sbi->cp_wait.task_list)) | ||
603 | wake_up(&p->sbi->cp_wait); | ||
604 | |||
605 | kfree(p); | ||
606 | bio_put(bio); | ||
607 | } | ||
608 | |||
609 | struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages) | ||
610 | { | ||
611 | struct bio *bio; | ||
612 | |||
613 | /* No failure on bio allocation */ | ||
614 | bio = bio_alloc(GFP_NOIO, npages); | ||
615 | bio->bi_bdev = bdev; | ||
616 | bio->bi_private = NULL; | ||
617 | |||
618 | return bio; | ||
619 | } | ||
620 | |||
621 | static void do_submit_bio(struct f2fs_sb_info *sbi, | ||
622 | enum page_type type, bool sync) | ||
623 | { | ||
624 | int rw = sync ? WRITE_SYNC : WRITE; | ||
625 | enum page_type btype = type > META ? META : type; | ||
626 | |||
627 | if (type >= META_FLUSH) | ||
628 | rw = WRITE_FLUSH_FUA; | ||
629 | |||
630 | if (btype == META) | ||
631 | rw |= REQ_META; | ||
632 | |||
633 | if (sbi->bio[btype]) { | ||
634 | struct bio_private *p = sbi->bio[btype]->bi_private; | ||
635 | p->sbi = sbi; | ||
636 | sbi->bio[btype]->bi_end_io = f2fs_end_io_write; | ||
637 | |||
638 | trace_f2fs_do_submit_bio(sbi->sb, btype, sync, sbi->bio[btype]); | ||
639 | |||
640 | if (type == META_FLUSH) { | ||
641 | DECLARE_COMPLETION_ONSTACK(wait); | ||
642 | p->is_sync = true; | ||
643 | p->wait = &wait; | ||
644 | submit_bio(rw, sbi->bio[btype]); | ||
645 | wait_for_completion(&wait); | ||
646 | } else { | ||
647 | p->is_sync = false; | ||
648 | submit_bio(rw, sbi->bio[btype]); | ||
649 | } | ||
650 | sbi->bio[btype] = NULL; | ||
651 | } | ||
652 | } | ||
653 | |||
654 | void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync) | ||
655 | { | ||
656 | down_write(&sbi->bio_sem); | ||
657 | do_submit_bio(sbi, type, sync); | ||
658 | up_write(&sbi->bio_sem); | ||
659 | } | ||
660 | |||
661 | static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page, | ||
662 | block_t blk_addr, enum page_type type) | ||
663 | { | ||
664 | struct block_device *bdev = sbi->sb->s_bdev; | ||
665 | int bio_blocks; | ||
666 | |||
667 | verify_block_addr(sbi, blk_addr); | ||
668 | |||
669 | down_write(&sbi->bio_sem); | ||
670 | |||
671 | inc_page_count(sbi, F2FS_WRITEBACK); | ||
672 | |||
673 | if (sbi->bio[type] && sbi->last_block_in_bio[type] != blk_addr - 1) | ||
674 | do_submit_bio(sbi, type, false); | ||
675 | alloc_new: | ||
676 | if (sbi->bio[type] == NULL) { | ||
677 | struct bio_private *priv; | ||
678 | retry: | ||
679 | priv = kmalloc(sizeof(struct bio_private), GFP_NOFS); | ||
680 | if (!priv) { | ||
681 | cond_resched(); | ||
682 | goto retry; | ||
683 | } | ||
684 | |||
685 | bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); | ||
686 | sbi->bio[type] = f2fs_bio_alloc(bdev, bio_blocks); | ||
687 | sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); | ||
688 | sbi->bio[type]->bi_private = priv; | ||
689 | /* | ||
690 | * The end_io will be assigned at the sumbission phase. | ||
691 | * Until then, let bio_add_page() merge consecutive IOs as much | ||
692 | * as possible. | ||
693 | */ | ||
694 | } | ||
695 | |||
696 | if (bio_add_page(sbi->bio[type], page, PAGE_CACHE_SIZE, 0) < | ||
697 | PAGE_CACHE_SIZE) { | ||
698 | do_submit_bio(sbi, type, false); | ||
699 | goto alloc_new; | ||
700 | } | ||
701 | |||
702 | sbi->last_block_in_bio[type] = blk_addr; | ||
703 | |||
704 | up_write(&sbi->bio_sem); | ||
705 | trace_f2fs_submit_write_page(page, blk_addr, type); | ||
706 | } | ||
707 | |||
708 | void f2fs_wait_on_page_writeback(struct page *page, | ||
709 | enum page_type type, bool sync) | ||
710 | { | ||
711 | struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); | ||
712 | if (PageWriteback(page)) { | ||
713 | f2fs_submit_bio(sbi, type, sync); | ||
714 | wait_on_page_writeback(page); | ||
715 | } | ||
716 | } | ||
717 | |||
718 | static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) | 790 | static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) |
719 | { | 791 | { |
720 | struct curseg_info *curseg = CURSEG_I(sbi, type); | 792 | struct curseg_info *curseg = CURSEG_I(sbi, type); |
@@ -782,16 +854,14 @@ static int __get_segment_type(struct page *page, enum page_type p_type) | |||
782 | return __get_segment_type_6(page, p_type); | 854 | return __get_segment_type_6(page, p_type); |
783 | } | 855 | } |
784 | 856 | ||
785 | static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, | 857 | void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, |
786 | block_t old_blkaddr, block_t *new_blkaddr, | 858 | block_t old_blkaddr, block_t *new_blkaddr, |
787 | struct f2fs_summary *sum, enum page_type p_type) | 859 | struct f2fs_summary *sum, int type) |
788 | { | 860 | { |
789 | struct sit_info *sit_i = SIT_I(sbi); | 861 | struct sit_info *sit_i = SIT_I(sbi); |
790 | struct curseg_info *curseg; | 862 | struct curseg_info *curseg; |
791 | unsigned int old_cursegno; | 863 | unsigned int old_cursegno; |
792 | int type; | ||
793 | 864 | ||
794 | type = __get_segment_type(page, p_type); | ||
795 | curseg = CURSEG_I(sbi, type); | 865 | curseg = CURSEG_I(sbi, type); |
796 | 866 | ||
797 | mutex_lock(&curseg->curseg_mutex); | 867 | mutex_lock(&curseg->curseg_mutex); |
@@ -824,49 +894,64 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, | |||
824 | locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); | 894 | locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); |
825 | mutex_unlock(&sit_i->sentry_lock); | 895 | mutex_unlock(&sit_i->sentry_lock); |
826 | 896 | ||
827 | if (p_type == NODE) | 897 | if (page && IS_NODESEG(type)) |
828 | fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); | 898 | fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); |
829 | 899 | ||
830 | /* writeout dirty page into bdev */ | ||
831 | submit_write_page(sbi, page, *new_blkaddr, p_type); | ||
832 | |||
833 | mutex_unlock(&curseg->curseg_mutex); | 900 | mutex_unlock(&curseg->curseg_mutex); |
834 | } | 901 | } |
835 | 902 | ||
903 | static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, | ||
904 | block_t old_blkaddr, block_t *new_blkaddr, | ||
905 | struct f2fs_summary *sum, struct f2fs_io_info *fio) | ||
906 | { | ||
907 | int type = __get_segment_type(page, fio->type); | ||
908 | |||
909 | allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type); | ||
910 | |||
911 | /* writeout dirty page into bdev */ | ||
912 | f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio); | ||
913 | } | ||
914 | |||
836 | void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) | 915 | void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) |
837 | { | 916 | { |
917 | struct f2fs_io_info fio = { | ||
918 | .type = META, | ||
919 | .rw = WRITE_SYNC | REQ_META | REQ_PRIO | ||
920 | }; | ||
921 | |||
838 | set_page_writeback(page); | 922 | set_page_writeback(page); |
839 | submit_write_page(sbi, page, page->index, META); | 923 | f2fs_submit_page_mbio(sbi, page, page->index, &fio); |
840 | } | 924 | } |
841 | 925 | ||
842 | void write_node_page(struct f2fs_sb_info *sbi, struct page *page, | 926 | void write_node_page(struct f2fs_sb_info *sbi, struct page *page, |
927 | struct f2fs_io_info *fio, | ||
843 | unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr) | 928 | unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr) |
844 | { | 929 | { |
845 | struct f2fs_summary sum; | 930 | struct f2fs_summary sum; |
846 | set_summary(&sum, nid, 0, 0); | 931 | set_summary(&sum, nid, 0, 0); |
847 | do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE); | 932 | do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, fio); |
848 | } | 933 | } |
849 | 934 | ||
850 | void write_data_page(struct inode *inode, struct page *page, | 935 | void write_data_page(struct page *page, struct dnode_of_data *dn, |
851 | struct dnode_of_data *dn, block_t old_blkaddr, | 936 | block_t *new_blkaddr, struct f2fs_io_info *fio) |
852 | block_t *new_blkaddr) | ||
853 | { | 937 | { |
854 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 938 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); |
855 | struct f2fs_summary sum; | 939 | struct f2fs_summary sum; |
856 | struct node_info ni; | 940 | struct node_info ni; |
857 | 941 | ||
858 | f2fs_bug_on(old_blkaddr == NULL_ADDR); | 942 | f2fs_bug_on(dn->data_blkaddr == NULL_ADDR); |
859 | get_node_info(sbi, dn->nid, &ni); | 943 | get_node_info(sbi, dn->nid, &ni); |
860 | set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); | 944 | set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); |
861 | 945 | ||
862 | do_write_page(sbi, page, old_blkaddr, | 946 | do_write_page(sbi, page, dn->data_blkaddr, new_blkaddr, &sum, fio); |
863 | new_blkaddr, &sum, DATA); | ||
864 | } | 947 | } |
865 | 948 | ||
866 | void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page, | 949 | void rewrite_data_page(struct page *page, block_t old_blkaddr, |
867 | block_t old_blk_addr) | 950 | struct f2fs_io_info *fio) |
868 | { | 951 | { |
869 | submit_write_page(sbi, page, old_blk_addr, DATA); | 952 | struct inode *inode = page->mapping->host; |
953 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
954 | f2fs_submit_page_mbio(sbi, page, old_blkaddr, fio); | ||
870 | } | 955 | } |
871 | 956 | ||
872 | void recover_data_page(struct f2fs_sb_info *sbi, | 957 | void recover_data_page(struct f2fs_sb_info *sbi, |
@@ -925,6 +1010,10 @@ void rewrite_node_page(struct f2fs_sb_info *sbi, | |||
925 | unsigned int segno, old_cursegno; | 1010 | unsigned int segno, old_cursegno; |
926 | block_t next_blkaddr = next_blkaddr_of_node(page); | 1011 | block_t next_blkaddr = next_blkaddr_of_node(page); |
927 | unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr); | 1012 | unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr); |
1013 | struct f2fs_io_info fio = { | ||
1014 | .type = NODE, | ||
1015 | .rw = WRITE_SYNC, | ||
1016 | }; | ||
928 | 1017 | ||
929 | curseg = CURSEG_I(sbi, type); | 1018 | curseg = CURSEG_I(sbi, type); |
930 | 1019 | ||
@@ -953,8 +1042,8 @@ void rewrite_node_page(struct f2fs_sb_info *sbi, | |||
953 | 1042 | ||
954 | /* rewrite node page */ | 1043 | /* rewrite node page */ |
955 | set_page_writeback(page); | 1044 | set_page_writeback(page); |
956 | submit_write_page(sbi, page, new_blkaddr, NODE); | 1045 | f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio); |
957 | f2fs_submit_bio(sbi, NODE, true); | 1046 | f2fs_submit_merged_bio(sbi, NODE, WRITE); |
958 | refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); | 1047 | refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); |
959 | 1048 | ||
960 | locate_dirty_segment(sbi, old_cursegno); | 1049 | locate_dirty_segment(sbi, old_cursegno); |
@@ -964,6 +1053,16 @@ void rewrite_node_page(struct f2fs_sb_info *sbi, | |||
964 | mutex_unlock(&curseg->curseg_mutex); | 1053 | mutex_unlock(&curseg->curseg_mutex); |
965 | } | 1054 | } |
966 | 1055 | ||
1056 | void f2fs_wait_on_page_writeback(struct page *page, | ||
1057 | enum page_type type) | ||
1058 | { | ||
1059 | struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); | ||
1060 | if (PageWriteback(page)) { | ||
1061 | f2fs_submit_merged_bio(sbi, type, WRITE); | ||
1062 | wait_on_page_writeback(page); | ||
1063 | } | ||
1064 | } | ||
1065 | |||
967 | static int read_compacted_summaries(struct f2fs_sb_info *sbi) | 1066 | static int read_compacted_summaries(struct f2fs_sb_info *sbi) |
968 | { | 1067 | { |
969 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | 1068 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); |
@@ -1314,6 +1413,10 @@ void flush_sit_entries(struct f2fs_sb_info *sbi) | |||
1314 | 1413 | ||
1315 | sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); | 1414 | sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); |
1316 | 1415 | ||
1416 | /* add discard candidates */ | ||
1417 | if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) | ||
1418 | add_discard_addrs(sbi, segno, se); | ||
1419 | |||
1317 | if (flushed) | 1420 | if (flushed) |
1318 | goto to_sit_page; | 1421 | goto to_sit_page; |
1319 | 1422 | ||
@@ -1480,41 +1583,94 @@ static int build_curseg(struct f2fs_sb_info *sbi) | |||
1480 | return restore_curseg_summaries(sbi); | 1583 | return restore_curseg_summaries(sbi); |
1481 | } | 1584 | } |
1482 | 1585 | ||
1586 | static int ra_sit_pages(struct f2fs_sb_info *sbi, int start, int nrpages) | ||
1587 | { | ||
1588 | struct address_space *mapping = META_MAPPING(sbi); | ||
1589 | struct page *page; | ||
1590 | block_t blk_addr, prev_blk_addr = 0; | ||
1591 | int sit_blk_cnt = SIT_BLK_CNT(sbi); | ||
1592 | int blkno = start; | ||
1593 | struct f2fs_io_info fio = { | ||
1594 | .type = META, | ||
1595 | .rw = READ_SYNC | REQ_META | REQ_PRIO | ||
1596 | }; | ||
1597 | |||
1598 | for (; blkno < start + nrpages && blkno < sit_blk_cnt; blkno++) { | ||
1599 | |||
1600 | blk_addr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK); | ||
1601 | |||
1602 | if (blkno != start && prev_blk_addr + 1 != blk_addr) | ||
1603 | break; | ||
1604 | prev_blk_addr = blk_addr; | ||
1605 | repeat: | ||
1606 | page = grab_cache_page(mapping, blk_addr); | ||
1607 | if (!page) { | ||
1608 | cond_resched(); | ||
1609 | goto repeat; | ||
1610 | } | ||
1611 | if (PageUptodate(page)) { | ||
1612 | mark_page_accessed(page); | ||
1613 | f2fs_put_page(page, 1); | ||
1614 | continue; | ||
1615 | } | ||
1616 | |||
1617 | f2fs_submit_page_mbio(sbi, page, blk_addr, &fio); | ||
1618 | |||
1619 | mark_page_accessed(page); | ||
1620 | f2fs_put_page(page, 0); | ||
1621 | } | ||
1622 | |||
1623 | f2fs_submit_merged_bio(sbi, META, READ); | ||
1624 | return blkno - start; | ||
1625 | } | ||
1626 | |||
1483 | static void build_sit_entries(struct f2fs_sb_info *sbi) | 1627 | static void build_sit_entries(struct f2fs_sb_info *sbi) |
1484 | { | 1628 | { |
1485 | struct sit_info *sit_i = SIT_I(sbi); | 1629 | struct sit_info *sit_i = SIT_I(sbi); |
1486 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); | 1630 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); |
1487 | struct f2fs_summary_block *sum = curseg->sum_blk; | 1631 | struct f2fs_summary_block *sum = curseg->sum_blk; |
1488 | unsigned int start; | 1632 | int sit_blk_cnt = SIT_BLK_CNT(sbi); |
1633 | unsigned int i, start, end; | ||
1634 | unsigned int readed, start_blk = 0; | ||
1635 | int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); | ||
1489 | 1636 | ||
1490 | for (start = 0; start < TOTAL_SEGS(sbi); start++) { | 1637 | do { |
1491 | struct seg_entry *se = &sit_i->sentries[start]; | 1638 | readed = ra_sit_pages(sbi, start_blk, nrpages); |
1492 | struct f2fs_sit_block *sit_blk; | 1639 | |
1493 | struct f2fs_sit_entry sit; | 1640 | start = start_blk * sit_i->sents_per_block; |
1494 | struct page *page; | 1641 | end = (start_blk + readed) * sit_i->sents_per_block; |
1495 | int i; | 1642 | |
1496 | 1643 | for (; start < end && start < TOTAL_SEGS(sbi); start++) { | |
1497 | mutex_lock(&curseg->curseg_mutex); | 1644 | struct seg_entry *se = &sit_i->sentries[start]; |
1498 | for (i = 0; i < sits_in_cursum(sum); i++) { | 1645 | struct f2fs_sit_block *sit_blk; |
1499 | if (le32_to_cpu(segno_in_journal(sum, i)) == start) { | 1646 | struct f2fs_sit_entry sit; |
1500 | sit = sit_in_journal(sum, i); | 1647 | struct page *page; |
1501 | mutex_unlock(&curseg->curseg_mutex); | 1648 | |
1502 | goto got_it; | 1649 | mutex_lock(&curseg->curseg_mutex); |
1650 | for (i = 0; i < sits_in_cursum(sum); i++) { | ||
1651 | if (le32_to_cpu(segno_in_journal(sum, i)) | ||
1652 | == start) { | ||
1653 | sit = sit_in_journal(sum, i); | ||
1654 | mutex_unlock(&curseg->curseg_mutex); | ||
1655 | goto got_it; | ||
1656 | } | ||
1503 | } | 1657 | } |
1504 | } | 1658 | mutex_unlock(&curseg->curseg_mutex); |
1505 | mutex_unlock(&curseg->curseg_mutex); | 1659 | |
1506 | page = get_current_sit_page(sbi, start); | 1660 | page = get_current_sit_page(sbi, start); |
1507 | sit_blk = (struct f2fs_sit_block *)page_address(page); | 1661 | sit_blk = (struct f2fs_sit_block *)page_address(page); |
1508 | sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; | 1662 | sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; |
1509 | f2fs_put_page(page, 1); | 1663 | f2fs_put_page(page, 1); |
1510 | got_it: | 1664 | got_it: |
1511 | check_block_count(sbi, start, &sit); | 1665 | check_block_count(sbi, start, &sit); |
1512 | seg_info_from_raw_sit(se, &sit); | 1666 | seg_info_from_raw_sit(se, &sit); |
1513 | if (sbi->segs_per_sec > 1) { | 1667 | if (sbi->segs_per_sec > 1) { |
1514 | struct sec_entry *e = get_sec_entry(sbi, start); | 1668 | struct sec_entry *e = get_sec_entry(sbi, start); |
1515 | e->valid_blocks += se->valid_blocks; | 1669 | e->valid_blocks += se->valid_blocks; |
1670 | } | ||
1516 | } | 1671 | } |
1517 | } | 1672 | start_blk += readed; |
1673 | } while (start_blk < sit_blk_cnt); | ||
1518 | } | 1674 | } |
1519 | 1675 | ||
1520 | static void init_free_segmap(struct f2fs_sb_info *sbi) | 1676 | static void init_free_segmap(struct f2fs_sb_info *sbi) |
@@ -1644,6 +1800,12 @@ int build_segment_manager(struct f2fs_sb_info *sbi) | |||
1644 | sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); | 1800 | sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); |
1645 | sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); | 1801 | sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); |
1646 | sm_info->rec_prefree_segments = DEF_RECLAIM_PREFREE_SEGMENTS; | 1802 | sm_info->rec_prefree_segments = DEF_RECLAIM_PREFREE_SEGMENTS; |
1803 | sm_info->ipu_policy = F2FS_IPU_DISABLE; | ||
1804 | sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; | ||
1805 | |||
1806 | INIT_LIST_HEAD(&sm_info->discard_list); | ||
1807 | sm_info->nr_discards = 0; | ||
1808 | sm_info->max_discards = 0; | ||
1647 | 1809 | ||
1648 | err = build_sit_info(sbi); | 1810 | err = build_sit_info(sbi); |
1649 | if (err) | 1811 | if (err) |
@@ -1760,3 +1922,17 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi) | |||
1760 | sbi->sm_info = NULL; | 1922 | sbi->sm_info = NULL; |
1761 | kfree(sm_info); | 1923 | kfree(sm_info); |
1762 | } | 1924 | } |
1925 | |||
1926 | int __init create_segment_manager_caches(void) | ||
1927 | { | ||
1928 | discard_entry_slab = f2fs_kmem_cache_create("discard_entry", | ||
1929 | sizeof(struct discard_entry), NULL); | ||
1930 | if (!discard_entry_slab) | ||
1931 | return -ENOMEM; | ||
1932 | return 0; | ||
1933 | } | ||
1934 | |||
1935 | void destroy_segment_manager_caches(void) | ||
1936 | { | ||
1937 | kmem_cache_destroy(discard_entry_slab); | ||
1938 | } | ||
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 269f690b4e24..5731682d7516 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h | |||
@@ -20,13 +20,8 @@ | |||
20 | #define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) | 20 | #define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) |
21 | #define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) | 21 | #define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) |
22 | 22 | ||
23 | #define IS_DATASEG(t) \ | 23 | #define IS_DATASEG(t) (t <= CURSEG_COLD_DATA) |
24 | ((t == CURSEG_HOT_DATA) || (t == CURSEG_COLD_DATA) || \ | 24 | #define IS_NODESEG(t) (t >= CURSEG_HOT_NODE) |
25 | (t == CURSEG_WARM_DATA)) | ||
26 | |||
27 | #define IS_NODESEG(t) \ | ||
28 | ((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) || \ | ||
29 | (t == CURSEG_WARM_NODE)) | ||
30 | 25 | ||
31 | #define IS_CURSEG(sbi, seg) \ | 26 | #define IS_CURSEG(sbi, seg) \ |
32 | ((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ | 27 | ((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ |
@@ -83,25 +78,20 @@ | |||
83 | (segno / SIT_ENTRY_PER_BLOCK) | 78 | (segno / SIT_ENTRY_PER_BLOCK) |
84 | #define START_SEGNO(sit_i, segno) \ | 79 | #define START_SEGNO(sit_i, segno) \ |
85 | (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK) | 80 | (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK) |
81 | #define SIT_BLK_CNT(sbi) \ | ||
82 | ((TOTAL_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK) | ||
86 | #define f2fs_bitmap_size(nr) \ | 83 | #define f2fs_bitmap_size(nr) \ |
87 | (BITS_TO_LONGS(nr) * sizeof(unsigned long)) | 84 | (BITS_TO_LONGS(nr) * sizeof(unsigned long)) |
88 | #define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments) | 85 | #define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments) |
89 | #define TOTAL_SECS(sbi) (sbi->total_sections) | 86 | #define TOTAL_SECS(sbi) (sbi->total_sections) |
90 | 87 | ||
91 | #define SECTOR_FROM_BLOCK(sbi, blk_addr) \ | 88 | #define SECTOR_FROM_BLOCK(sbi, blk_addr) \ |
92 | (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) | 89 | (((sector_t)blk_addr) << (sbi)->log_sectors_per_block) |
93 | #define SECTOR_TO_BLOCK(sbi, sectors) \ | 90 | #define SECTOR_TO_BLOCK(sbi, sectors) \ |
94 | (sectors >> ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) | 91 | (sectors >> (sbi)->log_sectors_per_block) |
95 | #define MAX_BIO_BLOCKS(max_hw_blocks) \ | 92 | #define MAX_BIO_BLOCKS(max_hw_blocks) \ |
96 | (min((int)max_hw_blocks, BIO_MAX_PAGES)) | 93 | (min((int)max_hw_blocks, BIO_MAX_PAGES)) |
97 | 94 | ||
98 | /* during checkpoint, bio_private is used to synchronize the last bio */ | ||
99 | struct bio_private { | ||
100 | struct f2fs_sb_info *sbi; | ||
101 | bool is_sync; | ||
102 | void *wait; | ||
103 | }; | ||
104 | |||
105 | /* | 95 | /* |
106 | * indicate a block allocation direction: RIGHT and LEFT. | 96 | * indicate a block allocation direction: RIGHT and LEFT. |
107 | * RIGHT means allocating new sections towards the end of volume. | 97 | * RIGHT means allocating new sections towards the end of volume. |
@@ -458,8 +448,8 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi) | |||
458 | 448 | ||
459 | static inline bool need_SSR(struct f2fs_sb_info *sbi) | 449 | static inline bool need_SSR(struct f2fs_sb_info *sbi) |
460 | { | 450 | { |
461 | return ((prefree_segments(sbi) / sbi->segs_per_sec) | 451 | return (prefree_segments(sbi) / sbi->segs_per_sec) |
462 | + free_sections(sbi) < overprovision_sections(sbi)); | 452 | + free_sections(sbi) < overprovision_sections(sbi); |
463 | } | 453 | } |
464 | 454 | ||
465 | static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) | 455 | static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) |
@@ -467,38 +457,71 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) | |||
467 | int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); | 457 | int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); |
468 | int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); | 458 | int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); |
469 | 459 | ||
470 | if (sbi->por_doing) | 460 | if (unlikely(sbi->por_doing)) |
471 | return false; | 461 | return false; |
472 | 462 | ||
473 | return ((free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + | 463 | return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + |
474 | reserved_sections(sbi))); | 464 | reserved_sections(sbi)); |
475 | } | 465 | } |
476 | 466 | ||
477 | static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi) | 467 | static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi) |
478 | { | 468 | { |
479 | return (prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments); | 469 | return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments; |
480 | } | 470 | } |
481 | 471 | ||
482 | static inline int utilization(struct f2fs_sb_info *sbi) | 472 | static inline int utilization(struct f2fs_sb_info *sbi) |
483 | { | 473 | { |
484 | return div_u64((u64)valid_user_blocks(sbi) * 100, sbi->user_block_count); | 474 | return div_u64((u64)valid_user_blocks(sbi) * 100, |
475 | sbi->user_block_count); | ||
485 | } | 476 | } |
486 | 477 | ||
487 | /* | 478 | /* |
488 | * Sometimes f2fs may be better to drop out-of-place update policy. | 479 | * Sometimes f2fs may be better to drop out-of-place update policy. |
489 | * So, if fs utilization is over MIN_IPU_UTIL, then f2fs tries to write | 480 | * And, users can control the policy through sysfs entries. |
490 | * data in the original place likewise other traditional file systems. | 481 | * There are five policies with triggering conditions as follows. |
491 | * But, currently set 100 in percentage, which means it is disabled. | 482 | * F2FS_IPU_FORCE - all the time, |
492 | * See below need_inplace_update(). | 483 | * F2FS_IPU_SSR - if SSR mode is activated, |
484 | * F2FS_IPU_UTIL - if FS utilization is over threashold, | ||
485 | * F2FS_IPU_SSR_UTIL - if SSR mode is activated and FS utilization is over | ||
486 | * threashold, | ||
487 | * F2FS_IPUT_DISABLE - disable IPU. (=default option) | ||
493 | */ | 488 | */ |
494 | #define MIN_IPU_UTIL 100 | 489 | #define DEF_MIN_IPU_UTIL 70 |
490 | |||
491 | enum { | ||
492 | F2FS_IPU_FORCE, | ||
493 | F2FS_IPU_SSR, | ||
494 | F2FS_IPU_UTIL, | ||
495 | F2FS_IPU_SSR_UTIL, | ||
496 | F2FS_IPU_DISABLE, | ||
497 | }; | ||
498 | |||
495 | static inline bool need_inplace_update(struct inode *inode) | 499 | static inline bool need_inplace_update(struct inode *inode) |
496 | { | 500 | { |
497 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 501 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
502 | |||
503 | /* IPU can be done only for the user data */ | ||
498 | if (S_ISDIR(inode->i_mode)) | 504 | if (S_ISDIR(inode->i_mode)) |
499 | return false; | 505 | return false; |
500 | if (need_SSR(sbi) && utilization(sbi) > MIN_IPU_UTIL) | 506 | |
507 | switch (SM_I(sbi)->ipu_policy) { | ||
508 | case F2FS_IPU_FORCE: | ||
501 | return true; | 509 | return true; |
510 | case F2FS_IPU_SSR: | ||
511 | if (need_SSR(sbi)) | ||
512 | return true; | ||
513 | break; | ||
514 | case F2FS_IPU_UTIL: | ||
515 | if (utilization(sbi) > SM_I(sbi)->min_ipu_util) | ||
516 | return true; | ||
517 | break; | ||
518 | case F2FS_IPU_SSR_UTIL: | ||
519 | if (need_SSR(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util) | ||
520 | return true; | ||
521 | break; | ||
522 | case F2FS_IPU_DISABLE: | ||
523 | break; | ||
524 | } | ||
502 | return false; | 525 | return false; |
503 | } | 526 | } |
504 | 527 | ||
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index bafff72de8e8..1a85f83abd53 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c | |||
@@ -50,6 +50,7 @@ enum { | |||
50 | Opt_active_logs, | 50 | Opt_active_logs, |
51 | Opt_disable_ext_identify, | 51 | Opt_disable_ext_identify, |
52 | Opt_inline_xattr, | 52 | Opt_inline_xattr, |
53 | Opt_inline_data, | ||
53 | Opt_err, | 54 | Opt_err, |
54 | }; | 55 | }; |
55 | 56 | ||
@@ -65,6 +66,7 @@ static match_table_t f2fs_tokens = { | |||
65 | {Opt_active_logs, "active_logs=%u"}, | 66 | {Opt_active_logs, "active_logs=%u"}, |
66 | {Opt_disable_ext_identify, "disable_ext_identify"}, | 67 | {Opt_disable_ext_identify, "disable_ext_identify"}, |
67 | {Opt_inline_xattr, "inline_xattr"}, | 68 | {Opt_inline_xattr, "inline_xattr"}, |
69 | {Opt_inline_data, "inline_data"}, | ||
68 | {Opt_err, NULL}, | 70 | {Opt_err, NULL}, |
69 | }; | 71 | }; |
70 | 72 | ||
@@ -72,6 +74,7 @@ static match_table_t f2fs_tokens = { | |||
72 | enum { | 74 | enum { |
73 | GC_THREAD, /* struct f2fs_gc_thread */ | 75 | GC_THREAD, /* struct f2fs_gc_thread */ |
74 | SM_INFO, /* struct f2fs_sm_info */ | 76 | SM_INFO, /* struct f2fs_sm_info */ |
77 | F2FS_SBI, /* struct f2fs_sb_info */ | ||
75 | }; | 78 | }; |
76 | 79 | ||
77 | struct f2fs_attr { | 80 | struct f2fs_attr { |
@@ -89,6 +92,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) | |||
89 | return (unsigned char *)sbi->gc_thread; | 92 | return (unsigned char *)sbi->gc_thread; |
90 | else if (struct_type == SM_INFO) | 93 | else if (struct_type == SM_INFO) |
91 | return (unsigned char *)SM_I(sbi); | 94 | return (unsigned char *)SM_I(sbi); |
95 | else if (struct_type == F2FS_SBI) | ||
96 | return (unsigned char *)sbi; | ||
92 | return NULL; | 97 | return NULL; |
93 | } | 98 | } |
94 | 99 | ||
@@ -175,6 +180,10 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time); | |||
175 | F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); | 180 | F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); |
176 | F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); | 181 | F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); |
177 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); | 182 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); |
183 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); | ||
184 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); | ||
185 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); | ||
186 | F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); | ||
178 | 187 | ||
179 | #define ATTR_LIST(name) (&f2fs_attr_##name.attr) | 188 | #define ATTR_LIST(name) (&f2fs_attr_##name.attr) |
180 | static struct attribute *f2fs_attrs[] = { | 189 | static struct attribute *f2fs_attrs[] = { |
@@ -183,6 +192,10 @@ static struct attribute *f2fs_attrs[] = { | |||
183 | ATTR_LIST(gc_no_gc_sleep_time), | 192 | ATTR_LIST(gc_no_gc_sleep_time), |
184 | ATTR_LIST(gc_idle), | 193 | ATTR_LIST(gc_idle), |
185 | ATTR_LIST(reclaim_segments), | 194 | ATTR_LIST(reclaim_segments), |
195 | ATTR_LIST(max_small_discards), | ||
196 | ATTR_LIST(ipu_policy), | ||
197 | ATTR_LIST(min_ipu_util), | ||
198 | ATTR_LIST(max_victim_search), | ||
186 | NULL, | 199 | NULL, |
187 | }; | 200 | }; |
188 | 201 | ||
@@ -311,6 +324,9 @@ static int parse_options(struct super_block *sb, char *options) | |||
311 | case Opt_disable_ext_identify: | 324 | case Opt_disable_ext_identify: |
312 | set_opt(sbi, DISABLE_EXT_IDENTIFY); | 325 | set_opt(sbi, DISABLE_EXT_IDENTIFY); |
313 | break; | 326 | break; |
327 | case Opt_inline_data: | ||
328 | set_opt(sbi, INLINE_DATA); | ||
329 | break; | ||
314 | default: | 330 | default: |
315 | f2fs_msg(sb, KERN_ERR, | 331 | f2fs_msg(sb, KERN_ERR, |
316 | "Unrecognized mount option \"%s\" or missing value", | 332 | "Unrecognized mount option \"%s\" or missing value", |
@@ -325,7 +341,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) | |||
325 | { | 341 | { |
326 | struct f2fs_inode_info *fi; | 342 | struct f2fs_inode_info *fi; |
327 | 343 | ||
328 | fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_NOFS | __GFP_ZERO); | 344 | fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_F2FS_ZERO); |
329 | if (!fi) | 345 | if (!fi) |
330 | return NULL; | 346 | return NULL; |
331 | 347 | ||
@@ -508,7 +524,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) | |||
508 | #endif | 524 | #endif |
509 | if (test_opt(sbi, DISABLE_EXT_IDENTIFY)) | 525 | if (test_opt(sbi, DISABLE_EXT_IDENTIFY)) |
510 | seq_puts(seq, ",disable_ext_identify"); | 526 | seq_puts(seq, ",disable_ext_identify"); |
511 | 527 | if (test_opt(sbi, INLINE_DATA)) | |
528 | seq_puts(seq, ",inline_data"); | ||
512 | seq_printf(seq, ",active_logs=%u", sbi->active_logs); | 529 | seq_printf(seq, ",active_logs=%u", sbi->active_logs); |
513 | 530 | ||
514 | return 0; | 531 | return 0; |
@@ -518,7 +535,8 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset) | |||
518 | { | 535 | { |
519 | struct super_block *sb = seq->private; | 536 | struct super_block *sb = seq->private; |
520 | struct f2fs_sb_info *sbi = F2FS_SB(sb); | 537 | struct f2fs_sb_info *sbi = F2FS_SB(sb); |
521 | unsigned int total_segs = le32_to_cpu(sbi->raw_super->segment_count_main); | 538 | unsigned int total_segs = |
539 | le32_to_cpu(sbi->raw_super->segment_count_main); | ||
522 | int i; | 540 | int i; |
523 | 541 | ||
524 | for (i = 0; i < total_segs; i++) { | 542 | for (i = 0; i < total_segs; i++) { |
@@ -618,7 +636,7 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb, | |||
618 | struct f2fs_sb_info *sbi = F2FS_SB(sb); | 636 | struct f2fs_sb_info *sbi = F2FS_SB(sb); |
619 | struct inode *inode; | 637 | struct inode *inode; |
620 | 638 | ||
621 | if (ino < F2FS_ROOT_INO(sbi)) | 639 | if (unlikely(ino < F2FS_ROOT_INO(sbi))) |
622 | return ERR_PTR(-ESTALE); | 640 | return ERR_PTR(-ESTALE); |
623 | 641 | ||
624 | /* | 642 | /* |
@@ -629,7 +647,7 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb, | |||
629 | inode = f2fs_iget(sb, ino); | 647 | inode = f2fs_iget(sb, ino); |
630 | if (IS_ERR(inode)) | 648 | if (IS_ERR(inode)) |
631 | return ERR_CAST(inode); | 649 | return ERR_CAST(inode); |
632 | if (generation && inode->i_generation != generation) { | 650 | if (unlikely(generation && inode->i_generation != generation)) { |
633 | /* we didn't find the right inode.. */ | 651 | /* we didn't find the right inode.. */ |
634 | iput(inode); | 652 | iput(inode); |
635 | return ERR_PTR(-ESTALE); | 653 | return ERR_PTR(-ESTALE); |
@@ -732,10 +750,10 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi) | |||
732 | fsmeta += le32_to_cpu(ckpt->rsvd_segment_count); | 750 | fsmeta += le32_to_cpu(ckpt->rsvd_segment_count); |
733 | fsmeta += le32_to_cpu(raw_super->segment_count_ssa); | 751 | fsmeta += le32_to_cpu(raw_super->segment_count_ssa); |
734 | 752 | ||
735 | if (fsmeta >= total) | 753 | if (unlikely(fsmeta >= total)) |
736 | return 1; | 754 | return 1; |
737 | 755 | ||
738 | if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { | 756 | if (unlikely(is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) { |
739 | f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); | 757 | f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); |
740 | return 1; | 758 | return 1; |
741 | } | 759 | } |
@@ -763,6 +781,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) | |||
763 | sbi->node_ino_num = le32_to_cpu(raw_super->node_ino); | 781 | sbi->node_ino_num = le32_to_cpu(raw_super->node_ino); |
764 | sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino); | 782 | sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino); |
765 | sbi->cur_victim_sec = NULL_SECNO; | 783 | sbi->cur_victim_sec = NULL_SECNO; |
784 | sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; | ||
766 | 785 | ||
767 | for (i = 0; i < NR_COUNT_TYPE; i++) | 786 | for (i = 0; i < NR_COUNT_TYPE; i++) |
768 | atomic_set(&sbi->nr_pages[i], 0); | 787 | atomic_set(&sbi->nr_pages[i], 0); |
@@ -798,9 +817,10 @@ retry: | |||
798 | /* sanity checking of raw super */ | 817 | /* sanity checking of raw super */ |
799 | if (sanity_check_raw_super(sb, *raw_super)) { | 818 | if (sanity_check_raw_super(sb, *raw_super)) { |
800 | brelse(*raw_super_buf); | 819 | brelse(*raw_super_buf); |
801 | f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem " | 820 | f2fs_msg(sb, KERN_ERR, |
802 | "in %dth superblock", block + 1); | 821 | "Can't find valid F2FS filesystem in %dth superblock", |
803 | if(block == 0) { | 822 | block + 1); |
823 | if (block == 0) { | ||
804 | block++; | 824 | block++; |
805 | goto retry; | 825 | goto retry; |
806 | } else { | 826 | } else { |
@@ -818,6 +838,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) | |||
818 | struct buffer_head *raw_super_buf; | 838 | struct buffer_head *raw_super_buf; |
819 | struct inode *root; | 839 | struct inode *root; |
820 | long err = -EINVAL; | 840 | long err = -EINVAL; |
841 | int i; | ||
821 | 842 | ||
822 | /* allocate memory for f2fs-specific super block info */ | 843 | /* allocate memory for f2fs-specific super block info */ |
823 | sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); | 844 | sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); |
@@ -825,7 +846,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) | |||
825 | return -ENOMEM; | 846 | return -ENOMEM; |
826 | 847 | ||
827 | /* set a block size */ | 848 | /* set a block size */ |
828 | if (!sb_set_blocksize(sb, F2FS_BLKSIZE)) { | 849 | if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) { |
829 | f2fs_msg(sb, KERN_ERR, "unable to set blocksize"); | 850 | f2fs_msg(sb, KERN_ERR, "unable to set blocksize"); |
830 | goto free_sbi; | 851 | goto free_sbi; |
831 | } | 852 | } |
@@ -874,7 +895,16 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) | |||
874 | mutex_init(&sbi->node_write); | 895 | mutex_init(&sbi->node_write); |
875 | sbi->por_doing = false; | 896 | sbi->por_doing = false; |
876 | spin_lock_init(&sbi->stat_lock); | 897 | spin_lock_init(&sbi->stat_lock); |
877 | init_rwsem(&sbi->bio_sem); | 898 | |
899 | mutex_init(&sbi->read_io.io_mutex); | ||
900 | sbi->read_io.sbi = sbi; | ||
901 | sbi->read_io.bio = NULL; | ||
902 | for (i = 0; i < NR_PAGE_TYPE; i++) { | ||
903 | mutex_init(&sbi->write_io[i].io_mutex); | ||
904 | sbi->write_io[i].sbi = sbi; | ||
905 | sbi->write_io[i].bio = NULL; | ||
906 | } | ||
907 | |||
878 | init_rwsem(&sbi->cp_rwsem); | 908 | init_rwsem(&sbi->cp_rwsem); |
879 | init_waitqueue_head(&sbi->cp_wait); | 909 | init_waitqueue_head(&sbi->cp_wait); |
880 | init_sb_info(sbi); | 910 | init_sb_info(sbi); |
@@ -939,9 +969,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) | |||
939 | } | 969 | } |
940 | 970 | ||
941 | /* if there are nt orphan nodes free them */ | 971 | /* if there are nt orphan nodes free them */ |
942 | err = -EINVAL; | 972 | recover_orphan_inodes(sbi); |
943 | if (recover_orphan_inodes(sbi)) | ||
944 | goto free_node_inode; | ||
945 | 973 | ||
946 | /* read root inode and dentry */ | 974 | /* read root inode and dentry */ |
947 | root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); | 975 | root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); |
@@ -950,8 +978,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) | |||
950 | err = PTR_ERR(root); | 978 | err = PTR_ERR(root); |
951 | goto free_node_inode; | 979 | goto free_node_inode; |
952 | } | 980 | } |
953 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) | 981 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { |
982 | err = -EINVAL; | ||
954 | goto free_root_inode; | 983 | goto free_root_inode; |
984 | } | ||
955 | 985 | ||
956 | sb->s_root = d_make_root(root); /* allocate root dentry */ | 986 | sb->s_root = d_make_root(root); /* allocate root dentry */ |
957 | if (!sb->s_root) { | 987 | if (!sb->s_root) { |
@@ -1053,7 +1083,7 @@ static int __init init_inodecache(void) | |||
1053 | { | 1083 | { |
1054 | f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache", | 1084 | f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache", |
1055 | sizeof(struct f2fs_inode_info), NULL); | 1085 | sizeof(struct f2fs_inode_info), NULL); |
1056 | if (f2fs_inode_cachep == NULL) | 1086 | if (!f2fs_inode_cachep) |
1057 | return -ENOMEM; | 1087 | return -ENOMEM; |
1058 | return 0; | 1088 | return 0; |
1059 | } | 1089 | } |
@@ -1078,9 +1108,12 @@ static int __init init_f2fs_fs(void) | |||
1078 | err = create_node_manager_caches(); | 1108 | err = create_node_manager_caches(); |
1079 | if (err) | 1109 | if (err) |
1080 | goto free_inodecache; | 1110 | goto free_inodecache; |
1081 | err = create_gc_caches(); | 1111 | err = create_segment_manager_caches(); |
1082 | if (err) | 1112 | if (err) |
1083 | goto free_node_manager_caches; | 1113 | goto free_node_manager_caches; |
1114 | err = create_gc_caches(); | ||
1115 | if (err) | ||
1116 | goto free_segment_manager_caches; | ||
1084 | err = create_checkpoint_caches(); | 1117 | err = create_checkpoint_caches(); |
1085 | if (err) | 1118 | if (err) |
1086 | goto free_gc_caches; | 1119 | goto free_gc_caches; |
@@ -1102,6 +1135,8 @@ free_checkpoint_caches: | |||
1102 | destroy_checkpoint_caches(); | 1135 | destroy_checkpoint_caches(); |
1103 | free_gc_caches: | 1136 | free_gc_caches: |
1104 | destroy_gc_caches(); | 1137 | destroy_gc_caches(); |
1138 | free_segment_manager_caches: | ||
1139 | destroy_segment_manager_caches(); | ||
1105 | free_node_manager_caches: | 1140 | free_node_manager_caches: |
1106 | destroy_node_manager_caches(); | 1141 | destroy_node_manager_caches(); |
1107 | free_inodecache: | 1142 | free_inodecache: |
@@ -1117,6 +1152,7 @@ static void __exit exit_f2fs_fs(void) | |||
1117 | unregister_filesystem(&f2fs_fs_type); | 1152 | unregister_filesystem(&f2fs_fs_type); |
1118 | destroy_checkpoint_caches(); | 1153 | destroy_checkpoint_caches(); |
1119 | destroy_gc_caches(); | 1154 | destroy_gc_caches(); |
1155 | destroy_segment_manager_caches(); | ||
1120 | destroy_node_manager_caches(); | 1156 | destroy_node_manager_caches(); |
1121 | destroy_inodecache(); | 1157 | destroy_inodecache(); |
1122 | kset_unregister(f2fs_kset); | 1158 | kset_unregister(f2fs_kset); |
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index aa7a3f139fe5..b0fb8a27f3da 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c | |||
@@ -522,7 +522,7 @@ static int __f2fs_setxattr(struct inode *inode, int name_index, | |||
522 | if (found) | 522 | if (found) |
523 | free = free + ENTRY_SIZE(here); | 523 | free = free + ENTRY_SIZE(here); |
524 | 524 | ||
525 | if (free < newsize) { | 525 | if (unlikely(free < newsize)) { |
526 | error = -ENOSPC; | 526 | error = -ENOSPC; |
527 | goto exit; | 527 | goto exit; |
528 | } | 528 | } |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 1f4a10ece2f1..e0259a163f98 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -516,13 +516,16 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, | |||
516 | } | 516 | } |
517 | WARN_ON(inode->i_state & I_SYNC); | 517 | WARN_ON(inode->i_state & I_SYNC); |
518 | /* | 518 | /* |
519 | * Skip inode if it is clean. We don't want to mess with writeback | 519 | * Skip inode if it is clean and we have no outstanding writeback in |
520 | * lists in this function since flusher thread may be doing for example | 520 | * WB_SYNC_ALL mode. We don't want to mess with writeback lists in this |
521 | * sync in parallel and if we move the inode, it could get skipped. So | 521 | * function since flusher thread may be doing for example sync in |
522 | * here we make sure inode is on some writeback list and leave it there | 522 | * parallel and if we move the inode, it could get skipped. So here we |
523 | * unless we have completely cleaned the inode. | 523 | * make sure inode is on some writeback list and leave it there unless |
524 | * we have completely cleaned the inode. | ||
524 | */ | 525 | */ |
525 | if (!(inode->i_state & I_DIRTY)) | 526 | if (!(inode->i_state & I_DIRTY) && |
527 | (wbc->sync_mode != WB_SYNC_ALL || | ||
528 | !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))) | ||
526 | goto out; | 529 | goto out; |
527 | inode->i_state |= I_SYNC; | 530 | inode->i_state |= I_SYNC; |
528 | spin_unlock(&inode->i_lock); | 531 | spin_unlock(&inode->i_lock); |
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index ef74ad5fd362..0a648bb455ae 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -1296,22 +1296,6 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, | |||
1296 | return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs)); | 1296 | return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs)); |
1297 | } | 1297 | } |
1298 | 1298 | ||
1299 | static int fuse_dev_pipe_buf_steal(struct pipe_inode_info *pipe, | ||
1300 | struct pipe_buffer *buf) | ||
1301 | { | ||
1302 | return 1; | ||
1303 | } | ||
1304 | |||
1305 | static const struct pipe_buf_operations fuse_dev_pipe_buf_ops = { | ||
1306 | .can_merge = 0, | ||
1307 | .map = generic_pipe_buf_map, | ||
1308 | .unmap = generic_pipe_buf_unmap, | ||
1309 | .confirm = generic_pipe_buf_confirm, | ||
1310 | .release = generic_pipe_buf_release, | ||
1311 | .steal = fuse_dev_pipe_buf_steal, | ||
1312 | .get = generic_pipe_buf_get, | ||
1313 | }; | ||
1314 | |||
1315 | static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, | 1299 | static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, |
1316 | struct pipe_inode_info *pipe, | 1300 | struct pipe_inode_info *pipe, |
1317 | size_t len, unsigned int flags) | 1301 | size_t len, unsigned int flags) |
@@ -1358,7 +1342,11 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, | |||
1358 | buf->page = bufs[page_nr].page; | 1342 | buf->page = bufs[page_nr].page; |
1359 | buf->offset = bufs[page_nr].offset; | 1343 | buf->offset = bufs[page_nr].offset; |
1360 | buf->len = bufs[page_nr].len; | 1344 | buf->len = bufs[page_nr].len; |
1361 | buf->ops = &fuse_dev_pipe_buf_ops; | 1345 | /* |
1346 | * Need to be careful about this. Having buf->ops in module | ||
1347 | * code can Oops if the buffer persists after module unload. | ||
1348 | */ | ||
1349 | buf->ops = &nosteal_pipe_buf_ops; | ||
1362 | 1350 | ||
1363 | pipe->nrbufs++; | 1351 | pipe->nrbufs++; |
1364 | page_nr++; | 1352 | page_nr++; |
@@ -1599,7 +1587,8 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, | |||
1599 | 1587 | ||
1600 | this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset); | 1588 | this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset); |
1601 | err = fuse_copy_page(cs, &page, offset, this_num, 0); | 1589 | err = fuse_copy_page(cs, &page, offset, this_num, 0); |
1602 | if (!err && offset == 0 && (num != 0 || file_size == end)) | 1590 | if (!err && offset == 0 && |
1591 | (this_num == PAGE_CACHE_SIZE || file_size == end)) | ||
1603 | SetPageUptodate(page); | 1592 | SetPageUptodate(page); |
1604 | unlock_page(page); | 1593 | unlock_page(page); |
1605 | page_cache_release(page); | 1594 | page_cache_release(page); |
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index c3eb2c46c8f1..1d1292c581c3 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c | |||
@@ -112,6 +112,16 @@ void fuse_invalidate_attr(struct inode *inode) | |||
112 | get_fuse_inode(inode)->i_time = 0; | 112 | get_fuse_inode(inode)->i_time = 0; |
113 | } | 113 | } |
114 | 114 | ||
115 | /** | ||
116 | * Mark the attributes as stale due to an atime change. Avoid the invalidate if | ||
117 | * atime is not used. | ||
118 | */ | ||
119 | void fuse_invalidate_atime(struct inode *inode) | ||
120 | { | ||
121 | if (!IS_RDONLY(inode)) | ||
122 | fuse_invalidate_attr(inode); | ||
123 | } | ||
124 | |||
115 | /* | 125 | /* |
116 | * Just mark the entry as stale, so that a next attempt to look it up | 126 | * Just mark the entry as stale, so that a next attempt to look it up |
117 | * will result in a new lookup call to userspace | 127 | * will result in a new lookup call to userspace |
@@ -1371,7 +1381,7 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx) | |||
1371 | } | 1381 | } |
1372 | 1382 | ||
1373 | __free_page(page); | 1383 | __free_page(page); |
1374 | fuse_invalidate_attr(inode); /* atime changed */ | 1384 | fuse_invalidate_atime(inode); |
1375 | return err; | 1385 | return err; |
1376 | } | 1386 | } |
1377 | 1387 | ||
@@ -1404,7 +1414,7 @@ static char *read_link(struct dentry *dentry) | |||
1404 | link[req->out.args[0].size] = '\0'; | 1414 | link[req->out.args[0].size] = '\0'; |
1405 | out: | 1415 | out: |
1406 | fuse_put_request(fc, req); | 1416 | fuse_put_request(fc, req); |
1407 | fuse_invalidate_attr(inode); /* atime changed */ | 1417 | fuse_invalidate_atime(inode); |
1408 | return link; | 1418 | return link; |
1409 | } | 1419 | } |
1410 | 1420 | ||
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 7e70506297bc..74f6ca500504 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -127,7 +127,15 @@ static void fuse_file_put(struct fuse_file *ff, bool sync) | |||
127 | if (atomic_dec_and_test(&ff->count)) { | 127 | if (atomic_dec_and_test(&ff->count)) { |
128 | struct fuse_req *req = ff->reserved_req; | 128 | struct fuse_req *req = ff->reserved_req; |
129 | 129 | ||
130 | if (sync) { | 130 | if (ff->fc->no_open) { |
131 | /* | ||
132 | * Drop the release request when client does not | ||
133 | * implement 'open' | ||
134 | */ | ||
135 | req->background = 0; | ||
136 | path_put(&req->misc.release.path); | ||
137 | fuse_put_request(ff->fc, req); | ||
138 | } else if (sync) { | ||
131 | req->background = 0; | 139 | req->background = 0; |
132 | fuse_request_send(ff->fc, req); | 140 | fuse_request_send(ff->fc, req); |
133 | path_put(&req->misc.release.path); | 141 | path_put(&req->misc.release.path); |
@@ -144,27 +152,36 @@ static void fuse_file_put(struct fuse_file *ff, bool sync) | |||
144 | int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, | 152 | int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, |
145 | bool isdir) | 153 | bool isdir) |
146 | { | 154 | { |
147 | struct fuse_open_out outarg; | ||
148 | struct fuse_file *ff; | 155 | struct fuse_file *ff; |
149 | int err; | ||
150 | int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; | 156 | int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; |
151 | 157 | ||
152 | ff = fuse_file_alloc(fc); | 158 | ff = fuse_file_alloc(fc); |
153 | if (!ff) | 159 | if (!ff) |
154 | return -ENOMEM; | 160 | return -ENOMEM; |
155 | 161 | ||
156 | err = fuse_send_open(fc, nodeid, file, opcode, &outarg); | 162 | ff->fh = 0; |
157 | if (err) { | 163 | ff->open_flags = FOPEN_KEEP_CACHE; /* Default for no-open */ |
158 | fuse_file_free(ff); | 164 | if (!fc->no_open || isdir) { |
159 | return err; | 165 | struct fuse_open_out outarg; |
166 | int err; | ||
167 | |||
168 | err = fuse_send_open(fc, nodeid, file, opcode, &outarg); | ||
169 | if (!err) { | ||
170 | ff->fh = outarg.fh; | ||
171 | ff->open_flags = outarg.open_flags; | ||
172 | |||
173 | } else if (err != -ENOSYS || isdir) { | ||
174 | fuse_file_free(ff); | ||
175 | return err; | ||
176 | } else { | ||
177 | fc->no_open = 1; | ||
178 | } | ||
160 | } | 179 | } |
161 | 180 | ||
162 | if (isdir) | 181 | if (isdir) |
163 | outarg.open_flags &= ~FOPEN_DIRECT_IO; | 182 | ff->open_flags &= ~FOPEN_DIRECT_IO; |
164 | 183 | ||
165 | ff->fh = outarg.fh; | ||
166 | ff->nodeid = nodeid; | 184 | ff->nodeid = nodeid; |
167 | ff->open_flags = outarg.open_flags; | ||
168 | file->private_data = fuse_file_get(ff); | 185 | file->private_data = fuse_file_get(ff); |
169 | 186 | ||
170 | return 0; | 187 | return 0; |
@@ -687,7 +704,7 @@ static int fuse_readpage(struct file *file, struct page *page) | |||
687 | SetPageUptodate(page); | 704 | SetPageUptodate(page); |
688 | } | 705 | } |
689 | 706 | ||
690 | fuse_invalidate_attr(inode); /* atime changed */ | 707 | fuse_invalidate_atime(inode); |
691 | out: | 708 | out: |
692 | unlock_page(page); | 709 | unlock_page(page); |
693 | return err; | 710 | return err; |
@@ -716,7 +733,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) | |||
716 | fuse_read_update_size(inode, pos, | 733 | fuse_read_update_size(inode, pos, |
717 | req->misc.read.attr_ver); | 734 | req->misc.read.attr_ver); |
718 | } | 735 | } |
719 | fuse_invalidate_attr(inode); /* atime changed */ | 736 | fuse_invalidate_atime(inode); |
720 | } | 737 | } |
721 | 738 | ||
722 | for (i = 0; i < req->num_pages; i++) { | 739 | for (i = 0; i < req->num_pages; i++) { |
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 7d2730912667..2da5db2c8bdb 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
@@ -485,6 +485,9 @@ struct fuse_conn { | |||
485 | * and hence races in setting them will not cause malfunction | 485 | * and hence races in setting them will not cause malfunction |
486 | */ | 486 | */ |
487 | 487 | ||
488 | /** Is open/release not implemented by fs? */ | ||
489 | unsigned no_open:1; | ||
490 | |||
488 | /** Is fsync not implemented by fs? */ | 491 | /** Is fsync not implemented by fs? */ |
489 | unsigned no_fsync:1; | 492 | unsigned no_fsync:1; |
490 | 493 | ||
@@ -788,6 +791,8 @@ void fuse_invalidate_attr(struct inode *inode); | |||
788 | 791 | ||
789 | void fuse_invalidate_entry_cache(struct dentry *entry); | 792 | void fuse_invalidate_entry_cache(struct dentry *entry); |
790 | 793 | ||
794 | void fuse_invalidate_atime(struct inode *inode); | ||
795 | |||
791 | /** | 796 | /** |
792 | * Acquire reference to fuse_conn | 797 | * Acquire reference to fuse_conn |
793 | */ | 798 | */ |
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index b7fc035a6943..49436fa7cd4f 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -986,6 +986,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | |||
986 | { | 986 | { |
987 | struct file *file = iocb->ki_filp; | 987 | struct file *file = iocb->ki_filp; |
988 | struct inode *inode = file->f_mapping->host; | 988 | struct inode *inode = file->f_mapping->host; |
989 | struct address_space *mapping = inode->i_mapping; | ||
989 | struct gfs2_inode *ip = GFS2_I(inode); | 990 | struct gfs2_inode *ip = GFS2_I(inode); |
990 | struct gfs2_holder gh; | 991 | struct gfs2_holder gh; |
991 | int rv; | 992 | int rv; |
@@ -1006,6 +1007,36 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | |||
1006 | if (rv != 1) | 1007 | if (rv != 1) |
1007 | goto out; /* dio not valid, fall back to buffered i/o */ | 1008 | goto out; /* dio not valid, fall back to buffered i/o */ |
1008 | 1009 | ||
1010 | /* | ||
1011 | * Now since we are holding a deferred (CW) lock at this point, you | ||
1012 | * might be wondering why this is ever needed. There is a case however | ||
1013 | * where we've granted a deferred local lock against a cached exclusive | ||
1014 | * glock. That is ok provided all granted local locks are deferred, but | ||
1015 | * it also means that it is possible to encounter pages which are | ||
1016 | * cached and possibly also mapped. So here we check for that and sort | ||
1017 | * them out ahead of the dio. The glock state machine will take care of | ||
1018 | * everything else. | ||
1019 | * | ||
1020 | * If in fact the cached glock state (gl->gl_state) is deferred (CW) in | ||
1021 | * the first place, mapping->nr_pages will always be zero. | ||
1022 | */ | ||
1023 | if (mapping->nrpages) { | ||
1024 | loff_t lstart = offset & (PAGE_CACHE_SIZE - 1); | ||
1025 | loff_t len = iov_length(iov, nr_segs); | ||
1026 | loff_t end = PAGE_ALIGN(offset + len) - 1; | ||
1027 | |||
1028 | rv = 0; | ||
1029 | if (len == 0) | ||
1030 | goto out; | ||
1031 | if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) | ||
1032 | unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len); | ||
1033 | rv = filemap_write_and_wait_range(mapping, lstart, end); | ||
1034 | if (rv) | ||
1035 | goto out; | ||
1036 | if (rw == WRITE) | ||
1037 | truncate_inode_pages_range(mapping, lstart, end); | ||
1038 | } | ||
1039 | |||
1009 | rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 1040 | rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, |
1010 | offset, nr_segs, gfs2_get_block_direct, | 1041 | offset, nr_segs, gfs2_get_block_direct, |
1011 | NULL, NULL, 0); | 1042 | NULL, NULL, 0); |
@@ -1050,30 +1081,22 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask) | |||
1050 | bh = bh->b_this_page; | 1081 | bh = bh->b_this_page; |
1051 | } while(bh != head); | 1082 | } while(bh != head); |
1052 | spin_unlock(&sdp->sd_ail_lock); | 1083 | spin_unlock(&sdp->sd_ail_lock); |
1053 | gfs2_log_unlock(sdp); | ||
1054 | 1084 | ||
1055 | head = bh = page_buffers(page); | 1085 | head = bh = page_buffers(page); |
1056 | do { | 1086 | do { |
1057 | gfs2_log_lock(sdp); | ||
1058 | bd = bh->b_private; | 1087 | bd = bh->b_private; |
1059 | if (bd) { | 1088 | if (bd) { |
1060 | gfs2_assert_warn(sdp, bd->bd_bh == bh); | 1089 | gfs2_assert_warn(sdp, bd->bd_bh == bh); |
1061 | if (!list_empty(&bd->bd_list)) { | 1090 | if (!list_empty(&bd->bd_list)) |
1062 | if (!buffer_pinned(bh)) | 1091 | list_del_init(&bd->bd_list); |
1063 | list_del_init(&bd->bd_list); | 1092 | bd->bd_bh = NULL; |
1064 | else | ||
1065 | bd = NULL; | ||
1066 | } | ||
1067 | if (bd) | ||
1068 | bd->bd_bh = NULL; | ||
1069 | bh->b_private = NULL; | 1093 | bh->b_private = NULL; |
1070 | } | ||
1071 | gfs2_log_unlock(sdp); | ||
1072 | if (bd) | ||
1073 | kmem_cache_free(gfs2_bufdata_cachep, bd); | 1094 | kmem_cache_free(gfs2_bufdata_cachep, bd); |
1095 | } | ||
1074 | 1096 | ||
1075 | bh = bh->b_this_page; | 1097 | bh = bh->b_this_page; |
1076 | } while (bh != head); | 1098 | } while (bh != head); |
1099 | gfs2_log_unlock(sdp); | ||
1077 | 1100 | ||
1078 | return try_to_free_buffers(page); | 1101 | return try_to_free_buffers(page); |
1079 | 1102 | ||
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 2e5fc268d324..fa32655449c8 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
@@ -834,6 +834,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, | |||
834 | struct gfs2_leaf *leaf; | 834 | struct gfs2_leaf *leaf; |
835 | struct gfs2_dirent *dent; | 835 | struct gfs2_dirent *dent; |
836 | struct qstr name = { .name = "" }; | 836 | struct qstr name = { .name = "" }; |
837 | struct timespec tv = CURRENT_TIME; | ||
837 | 838 | ||
838 | error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL); | 839 | error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL); |
839 | if (error) | 840 | if (error) |
@@ -850,7 +851,11 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, | |||
850 | leaf->lf_entries = 0; | 851 | leaf->lf_entries = 0; |
851 | leaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE); | 852 | leaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE); |
852 | leaf->lf_next = 0; | 853 | leaf->lf_next = 0; |
853 | memset(leaf->lf_reserved, 0, sizeof(leaf->lf_reserved)); | 854 | leaf->lf_inode = cpu_to_be64(ip->i_no_addr); |
855 | leaf->lf_dist = cpu_to_be32(1); | ||
856 | leaf->lf_nsec = cpu_to_be32(tv.tv_nsec); | ||
857 | leaf->lf_sec = cpu_to_be64(tv.tv_sec); | ||
858 | memset(leaf->lf_reserved2, 0, sizeof(leaf->lf_reserved2)); | ||
854 | dent = (struct gfs2_dirent *)(leaf+1); | 859 | dent = (struct gfs2_dirent *)(leaf+1); |
855 | gfs2_qstr2dirent(&name, bh->b_size - sizeof(struct gfs2_leaf), dent); | 860 | gfs2_qstr2dirent(&name, bh->b_size - sizeof(struct gfs2_leaf), dent); |
856 | *pbh = bh; | 861 | *pbh = bh; |
@@ -1612,11 +1617,31 @@ out: | |||
1612 | return ret; | 1617 | return ret; |
1613 | } | 1618 | } |
1614 | 1619 | ||
1620 | /** | ||
1621 | * dir_new_leaf - Add a new leaf onto hash chain | ||
1622 | * @inode: The directory | ||
1623 | * @name: The name we are adding | ||
1624 | * | ||
1625 | * This adds a new dir leaf onto an existing leaf when there is not | ||
1626 | * enough space to add a new dir entry. This is a last resort after | ||
1627 | * we've expanded the hash table to max size and also split existing | ||
1628 | * leaf blocks, so it will only occur for very large directories. | ||
1629 | * | ||
1630 | * The dist parameter is set to 1 for leaf blocks directly attached | ||
1631 | * to the hash table, 2 for one layer of indirection, 3 for two layers | ||
1632 | * etc. We are thus able to tell the difference between an old leaf | ||
1633 | * with dist set to zero (i.e. "don't know") and a new one where we | ||
1634 | * set this information for debug/fsck purposes. | ||
1635 | * | ||
1636 | * Returns: 0 on success, or -ve on error | ||
1637 | */ | ||
1638 | |||
1615 | static int dir_new_leaf(struct inode *inode, const struct qstr *name) | 1639 | static int dir_new_leaf(struct inode *inode, const struct qstr *name) |
1616 | { | 1640 | { |
1617 | struct buffer_head *bh, *obh; | 1641 | struct buffer_head *bh, *obh; |
1618 | struct gfs2_inode *ip = GFS2_I(inode); | 1642 | struct gfs2_inode *ip = GFS2_I(inode); |
1619 | struct gfs2_leaf *leaf, *oleaf; | 1643 | struct gfs2_leaf *leaf, *oleaf; |
1644 | u32 dist = 1; | ||
1620 | int error; | 1645 | int error; |
1621 | u32 index; | 1646 | u32 index; |
1622 | u64 bn; | 1647 | u64 bn; |
@@ -1626,6 +1651,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) | |||
1626 | if (error) | 1651 | if (error) |
1627 | return error; | 1652 | return error; |
1628 | do { | 1653 | do { |
1654 | dist++; | ||
1629 | oleaf = (struct gfs2_leaf *)obh->b_data; | 1655 | oleaf = (struct gfs2_leaf *)obh->b_data; |
1630 | bn = be64_to_cpu(oleaf->lf_next); | 1656 | bn = be64_to_cpu(oleaf->lf_next); |
1631 | if (!bn) | 1657 | if (!bn) |
@@ -1643,6 +1669,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) | |||
1643 | brelse(obh); | 1669 | brelse(obh); |
1644 | return -ENOSPC; | 1670 | return -ENOSPC; |
1645 | } | 1671 | } |
1672 | leaf->lf_dist = cpu_to_be32(dist); | ||
1646 | oleaf->lf_next = cpu_to_be64(bh->b_blocknr); | 1673 | oleaf->lf_next = cpu_to_be64(bh->b_blocknr); |
1647 | brelse(bh); | 1674 | brelse(bh); |
1648 | brelse(obh); | 1675 | brelse(obh); |
@@ -1659,39 +1686,53 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) | |||
1659 | 1686 | ||
1660 | /** | 1687 | /** |
1661 | * gfs2_dir_add - Add new filename into directory | 1688 | * gfs2_dir_add - Add new filename into directory |
1662 | * @dip: The GFS2 inode | 1689 | * @inode: The directory inode |
1663 | * @filename: The new name | 1690 | * @name: The new name |
1664 | * @inode: The inode number of the entry | 1691 | * @nip: The GFS2 inode to be linked in to the directory |
1665 | * @type: The type of the entry | 1692 | * @da: The directory addition info |
1693 | * | ||
1694 | * If the call to gfs2_diradd_alloc_required resulted in there being | ||
1695 | * no need to allocate any new directory blocks, then it will contain | ||
1696 | * a pointer to the directory entry and the bh in which it resides. We | ||
1697 | * can use that without having to repeat the search. If there was no | ||
1698 | * free space, then we must now create more space. | ||
1666 | * | 1699 | * |
1667 | * Returns: 0 on success, error code on failure | 1700 | * Returns: 0 on success, error code on failure |
1668 | */ | 1701 | */ |
1669 | 1702 | ||
1670 | int gfs2_dir_add(struct inode *inode, const struct qstr *name, | 1703 | int gfs2_dir_add(struct inode *inode, const struct qstr *name, |
1671 | const struct gfs2_inode *nip) | 1704 | const struct gfs2_inode *nip, struct gfs2_diradd *da) |
1672 | { | 1705 | { |
1673 | struct gfs2_inode *ip = GFS2_I(inode); | 1706 | struct gfs2_inode *ip = GFS2_I(inode); |
1674 | struct buffer_head *bh; | 1707 | struct buffer_head *bh = da->bh; |
1675 | struct gfs2_dirent *dent; | 1708 | struct gfs2_dirent *dent = da->dent; |
1709 | struct timespec tv; | ||
1676 | struct gfs2_leaf *leaf; | 1710 | struct gfs2_leaf *leaf; |
1677 | int error; | 1711 | int error; |
1678 | 1712 | ||
1679 | while(1) { | 1713 | while(1) { |
1680 | dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, | 1714 | if (da->bh == NULL) { |
1681 | &bh); | 1715 | dent = gfs2_dirent_search(inode, name, |
1716 | gfs2_dirent_find_space, &bh); | ||
1717 | } | ||
1682 | if (dent) { | 1718 | if (dent) { |
1683 | if (IS_ERR(dent)) | 1719 | if (IS_ERR(dent)) |
1684 | return PTR_ERR(dent); | 1720 | return PTR_ERR(dent); |
1685 | dent = gfs2_init_dirent(inode, dent, name, bh); | 1721 | dent = gfs2_init_dirent(inode, dent, name, bh); |
1686 | gfs2_inum_out(nip, dent); | 1722 | gfs2_inum_out(nip, dent); |
1687 | dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode)); | 1723 | dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode)); |
1724 | tv = CURRENT_TIME; | ||
1688 | if (ip->i_diskflags & GFS2_DIF_EXHASH) { | 1725 | if (ip->i_diskflags & GFS2_DIF_EXHASH) { |
1689 | leaf = (struct gfs2_leaf *)bh->b_data; | 1726 | leaf = (struct gfs2_leaf *)bh->b_data; |
1690 | be16_add_cpu(&leaf->lf_entries, 1); | 1727 | be16_add_cpu(&leaf->lf_entries, 1); |
1728 | leaf->lf_nsec = cpu_to_be32(tv.tv_nsec); | ||
1729 | leaf->lf_sec = cpu_to_be64(tv.tv_sec); | ||
1691 | } | 1730 | } |
1731 | da->dent = NULL; | ||
1732 | da->bh = NULL; | ||
1692 | brelse(bh); | 1733 | brelse(bh); |
1693 | ip->i_entries++; | 1734 | ip->i_entries++; |
1694 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 1735 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = tv; |
1695 | if (S_ISDIR(nip->i_inode.i_mode)) | 1736 | if (S_ISDIR(nip->i_inode.i_mode)) |
1696 | inc_nlink(&ip->i_inode); | 1737 | inc_nlink(&ip->i_inode); |
1697 | mark_inode_dirty(inode); | 1738 | mark_inode_dirty(inode); |
@@ -1742,6 +1783,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) | |||
1742 | const struct qstr *name = &dentry->d_name; | 1783 | const struct qstr *name = &dentry->d_name; |
1743 | struct gfs2_dirent *dent, *prev = NULL; | 1784 | struct gfs2_dirent *dent, *prev = NULL; |
1744 | struct buffer_head *bh; | 1785 | struct buffer_head *bh; |
1786 | struct timespec tv = CURRENT_TIME; | ||
1745 | 1787 | ||
1746 | /* Returns _either_ the entry (if its first in block) or the | 1788 | /* Returns _either_ the entry (if its first in block) or the |
1747 | previous entry otherwise */ | 1789 | previous entry otherwise */ |
@@ -1767,13 +1809,15 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) | |||
1767 | if (!entries) | 1809 | if (!entries) |
1768 | gfs2_consist_inode(dip); | 1810 | gfs2_consist_inode(dip); |
1769 | leaf->lf_entries = cpu_to_be16(--entries); | 1811 | leaf->lf_entries = cpu_to_be16(--entries); |
1812 | leaf->lf_nsec = cpu_to_be32(tv.tv_nsec); | ||
1813 | leaf->lf_sec = cpu_to_be64(tv.tv_sec); | ||
1770 | } | 1814 | } |
1771 | brelse(bh); | 1815 | brelse(bh); |
1772 | 1816 | ||
1773 | if (!dip->i_entries) | 1817 | if (!dip->i_entries) |
1774 | gfs2_consist_inode(dip); | 1818 | gfs2_consist_inode(dip); |
1775 | dip->i_entries--; | 1819 | dip->i_entries--; |
1776 | dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; | 1820 | dip->i_inode.i_mtime = dip->i_inode.i_ctime = tv; |
1777 | if (S_ISDIR(dentry->d_inode->i_mode)) | 1821 | if (S_ISDIR(dentry->d_inode->i_mode)) |
1778 | drop_nlink(&dip->i_inode); | 1822 | drop_nlink(&dip->i_inode); |
1779 | mark_inode_dirty(&dip->i_inode); | 1823 | mark_inode_dirty(&dip->i_inode); |
@@ -2017,22 +2061,36 @@ out: | |||
2017 | * gfs2_diradd_alloc_required - find if adding entry will require an allocation | 2061 | * gfs2_diradd_alloc_required - find if adding entry will require an allocation |
2018 | * @ip: the file being written to | 2062 | * @ip: the file being written to |
2019 | * @filname: the filename that's going to be added | 2063 | * @filname: the filename that's going to be added |
2064 | * @da: The structure to return dir alloc info | ||
2020 | * | 2065 | * |
2021 | * Returns: 1 if alloc required, 0 if not, -ve on error | 2066 | * Returns: 0 if ok, -ve on error |
2022 | */ | 2067 | */ |
2023 | 2068 | ||
2024 | int gfs2_diradd_alloc_required(struct inode *inode, const struct qstr *name) | 2069 | int gfs2_diradd_alloc_required(struct inode *inode, const struct qstr *name, |
2070 | struct gfs2_diradd *da) | ||
2025 | { | 2071 | { |
2072 | struct gfs2_inode *ip = GFS2_I(inode); | ||
2073 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
2074 | const unsigned int extra = sizeof(struct gfs2_dinode) - sizeof(struct gfs2_leaf); | ||
2026 | struct gfs2_dirent *dent; | 2075 | struct gfs2_dirent *dent; |
2027 | struct buffer_head *bh; | 2076 | struct buffer_head *bh; |
2028 | 2077 | ||
2078 | da->nr_blocks = 0; | ||
2079 | da->bh = NULL; | ||
2080 | da->dent = NULL; | ||
2081 | |||
2029 | dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, &bh); | 2082 | dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, &bh); |
2030 | if (!dent) { | 2083 | if (!dent) { |
2031 | return 1; | 2084 | da->nr_blocks = sdp->sd_max_dirres; |
2085 | if (!(ip->i_diskflags & GFS2_DIF_EXHASH) && | ||
2086 | (GFS2_DIRENT_SIZE(name->len) < extra)) | ||
2087 | da->nr_blocks = 1; | ||
2088 | return 0; | ||
2032 | } | 2089 | } |
2033 | if (IS_ERR(dent)) | 2090 | if (IS_ERR(dent)) |
2034 | return PTR_ERR(dent); | 2091 | return PTR_ERR(dent); |
2035 | brelse(bh); | 2092 | da->bh = bh; |
2093 | da->dent = dent; | ||
2036 | return 0; | 2094 | return 0; |
2037 | } | 2095 | } |
2038 | 2096 | ||
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index 4f03bbd1873f..126c65dda028 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h | |||
@@ -16,6 +16,14 @@ | |||
16 | struct inode; | 16 | struct inode; |
17 | struct gfs2_inode; | 17 | struct gfs2_inode; |
18 | struct gfs2_inum; | 18 | struct gfs2_inum; |
19 | struct buffer_head; | ||
20 | struct gfs2_dirent; | ||
21 | |||
22 | struct gfs2_diradd { | ||
23 | unsigned nr_blocks; | ||
24 | struct gfs2_dirent *dent; | ||
25 | struct buffer_head *bh; | ||
26 | }; | ||
19 | 27 | ||
20 | extern struct inode *gfs2_dir_search(struct inode *dir, | 28 | extern struct inode *gfs2_dir_search(struct inode *dir, |
21 | const struct qstr *filename, | 29 | const struct qstr *filename, |
@@ -23,7 +31,13 @@ extern struct inode *gfs2_dir_search(struct inode *dir, | |||
23 | extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, | 31 | extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, |
24 | const struct gfs2_inode *ip); | 32 | const struct gfs2_inode *ip); |
25 | extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, | 33 | extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, |
26 | const struct gfs2_inode *ip); | 34 | const struct gfs2_inode *ip, struct gfs2_diradd *da); |
35 | static inline void gfs2_dir_no_add(struct gfs2_diradd *da) | ||
36 | { | ||
37 | if (da->bh) | ||
38 | brelse(da->bh); | ||
39 | da->bh = NULL; | ||
40 | } | ||
27 | extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry); | 41 | extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry); |
28 | extern int gfs2_dir_read(struct inode *inode, struct dir_context *ctx, | 42 | extern int gfs2_dir_read(struct inode *inode, struct dir_context *ctx, |
29 | struct file_ra_state *f_ra); | 43 | struct file_ra_state *f_ra); |
@@ -33,7 +47,8 @@ extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, | |||
33 | extern int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); | 47 | extern int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); |
34 | 48 | ||
35 | extern int gfs2_diradd_alloc_required(struct inode *dir, | 49 | extern int gfs2_diradd_alloc_required(struct inode *dir, |
36 | const struct qstr *filename); | 50 | const struct qstr *filename, |
51 | struct gfs2_diradd *da); | ||
37 | extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, | 52 | extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, |
38 | struct buffer_head **bhp); | 53 | struct buffer_head **bhp); |
39 | extern void gfs2_dir_hash_inval(struct gfs2_inode *ip); | 54 | extern void gfs2_dir_hash_inval(struct gfs2_inode *ip); |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index c8420f7e4db6..ca0be6c69a26 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -1552,13 +1552,11 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp) | |||
1552 | glock_hash_walk(thaw_glock, sdp); | 1552 | glock_hash_walk(thaw_glock, sdp); |
1553 | } | 1553 | } |
1554 | 1554 | ||
1555 | static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl) | 1555 | static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl) |
1556 | { | 1556 | { |
1557 | int ret; | ||
1558 | spin_lock(&gl->gl_spin); | 1557 | spin_lock(&gl->gl_spin); |
1559 | ret = gfs2_dump_glock(seq, gl); | 1558 | gfs2_dump_glock(seq, gl); |
1560 | spin_unlock(&gl->gl_spin); | 1559 | spin_unlock(&gl->gl_spin); |
1561 | return ret; | ||
1562 | } | 1560 | } |
1563 | 1561 | ||
1564 | static void dump_glock_func(struct gfs2_glock *gl) | 1562 | static void dump_glock_func(struct gfs2_glock *gl) |
@@ -1647,14 +1645,14 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags) | |||
1647 | * @seq: the seq_file struct | 1645 | * @seq: the seq_file struct |
1648 | * @gh: the glock holder | 1646 | * @gh: the glock holder |
1649 | * | 1647 | * |
1650 | * Returns: 0 on success, -ENOBUFS when we run out of space | ||
1651 | */ | 1648 | */ |
1652 | 1649 | ||
1653 | static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) | 1650 | static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) |
1654 | { | 1651 | { |
1655 | struct task_struct *gh_owner = NULL; | 1652 | struct task_struct *gh_owner = NULL; |
1656 | char flags_buf[32]; | 1653 | char flags_buf[32]; |
1657 | 1654 | ||
1655 | rcu_read_lock(); | ||
1658 | if (gh->gh_owner_pid) | 1656 | if (gh->gh_owner_pid) |
1659 | gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); | 1657 | gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); |
1660 | gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n", | 1658 | gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n", |
@@ -1664,7 +1662,7 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) | |||
1664 | gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, | 1662 | gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, |
1665 | gh_owner ? gh_owner->comm : "(ended)", | 1663 | gh_owner ? gh_owner->comm : "(ended)", |
1666 | (void *)gh->gh_ip); | 1664 | (void *)gh->gh_ip); |
1667 | return 0; | 1665 | rcu_read_unlock(); |
1668 | } | 1666 | } |
1669 | 1667 | ||
1670 | static const char *gflags2str(char *buf, const struct gfs2_glock *gl) | 1668 | static const char *gflags2str(char *buf, const struct gfs2_glock *gl) |
@@ -1719,16 +1717,14 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) | |||
1719 | * example. The field's are n = number (id of the object), f = flags, | 1717 | * example. The field's are n = number (id of the object), f = flags, |
1720 | * t = type, s = state, r = refcount, e = error, p = pid. | 1718 | * t = type, s = state, r = refcount, e = error, p = pid. |
1721 | * | 1719 | * |
1722 | * Returns: 0 on success, -ENOBUFS when we run out of space | ||
1723 | */ | 1720 | */ |
1724 | 1721 | ||
1725 | int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) | 1722 | void gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) |
1726 | { | 1723 | { |
1727 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 1724 | const struct gfs2_glock_operations *glops = gl->gl_ops; |
1728 | unsigned long long dtime; | 1725 | unsigned long long dtime; |
1729 | const struct gfs2_holder *gh; | 1726 | const struct gfs2_holder *gh; |
1730 | char gflags_buf[32]; | 1727 | char gflags_buf[32]; |
1731 | int error = 0; | ||
1732 | 1728 | ||
1733 | dtime = jiffies - gl->gl_demote_time; | 1729 | dtime = jiffies - gl->gl_demote_time; |
1734 | dtime *= 1000000/HZ; /* demote time in uSec */ | 1730 | dtime *= 1000000/HZ; /* demote time in uSec */ |
@@ -1745,15 +1741,11 @@ int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) | |||
1745 | atomic_read(&gl->gl_revokes), | 1741 | atomic_read(&gl->gl_revokes), |
1746 | (int)gl->gl_lockref.count, gl->gl_hold_time); | 1742 | (int)gl->gl_lockref.count, gl->gl_hold_time); |
1747 | 1743 | ||
1748 | list_for_each_entry(gh, &gl->gl_holders, gh_list) { | 1744 | list_for_each_entry(gh, &gl->gl_holders, gh_list) |
1749 | error = dump_holder(seq, gh); | 1745 | dump_holder(seq, gh); |
1750 | if (error) | 1746 | |
1751 | goto out; | ||
1752 | } | ||
1753 | if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump) | 1747 | if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump) |
1754 | error = glops->go_dump(seq, gl); | 1748 | glops->go_dump(seq, gl); |
1755 | out: | ||
1756 | return error; | ||
1757 | } | 1749 | } |
1758 | 1750 | ||
1759 | static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr) | 1751 | static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr) |
@@ -1951,7 +1943,8 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) | |||
1951 | 1943 | ||
1952 | static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) | 1944 | static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) |
1953 | { | 1945 | { |
1954 | return dump_glock(seq, iter_ptr); | 1946 | dump_glock(seq, iter_ptr); |
1947 | return 0; | ||
1955 | } | 1948 | } |
1956 | 1949 | ||
1957 | static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos) | 1950 | static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos) |
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 6647d77366ba..32572f71f027 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
@@ -199,7 +199,7 @@ extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, | |||
199 | struct gfs2_holder *gh); | 199 | struct gfs2_holder *gh); |
200 | extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); | 200 | extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); |
201 | extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); | 201 | extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); |
202 | extern int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); | 202 | extern void gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); |
203 | #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { gfs2_dump_glock(NULL, gl); BUG(); } } while(0) | 203 | #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { gfs2_dump_glock(NULL, gl); BUG(); } } while(0) |
204 | extern __printf(2, 3) | 204 | extern __printf(2, 3) |
205 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); | 205 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index db908f697139..3bf0631b5d56 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -133,7 +133,8 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) | |||
133 | 133 | ||
134 | static void rgrp_go_sync(struct gfs2_glock *gl) | 134 | static void rgrp_go_sync(struct gfs2_glock *gl) |
135 | { | 135 | { |
136 | struct address_space *metamapping = gfs2_glock2aspace(gl); | 136 | struct gfs2_sbd *sdp = gl->gl_sbd; |
137 | struct address_space *mapping = &sdp->sd_aspace; | ||
137 | struct gfs2_rgrpd *rgd; | 138 | struct gfs2_rgrpd *rgd; |
138 | int error; | 139 | int error; |
139 | 140 | ||
@@ -141,10 +142,10 @@ static void rgrp_go_sync(struct gfs2_glock *gl) | |||
141 | return; | 142 | return; |
142 | GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); | 143 | GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); |
143 | 144 | ||
144 | gfs2_log_flush(gl->gl_sbd, gl); | 145 | gfs2_log_flush(sdp, gl); |
145 | filemap_fdatawrite(metamapping); | 146 | filemap_fdatawrite_range(mapping, gl->gl_vm.start, gl->gl_vm.end); |
146 | error = filemap_fdatawait(metamapping); | 147 | error = filemap_fdatawait_range(mapping, gl->gl_vm.start, gl->gl_vm.end); |
147 | mapping_set_error(metamapping, error); | 148 | mapping_set_error(mapping, error); |
148 | gfs2_ail_empty_gl(gl); | 149 | gfs2_ail_empty_gl(gl); |
149 | 150 | ||
150 | spin_lock(&gl->gl_spin); | 151 | spin_lock(&gl->gl_spin); |
@@ -166,11 +167,12 @@ static void rgrp_go_sync(struct gfs2_glock *gl) | |||
166 | 167 | ||
167 | static void rgrp_go_inval(struct gfs2_glock *gl, int flags) | 168 | static void rgrp_go_inval(struct gfs2_glock *gl, int flags) |
168 | { | 169 | { |
169 | struct address_space *mapping = gfs2_glock2aspace(gl); | 170 | struct gfs2_sbd *sdp = gl->gl_sbd; |
171 | struct address_space *mapping = &sdp->sd_aspace; | ||
170 | 172 | ||
171 | WARN_ON_ONCE(!(flags & DIO_METADATA)); | 173 | WARN_ON_ONCE(!(flags & DIO_METADATA)); |
172 | gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); | 174 | gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); |
173 | truncate_inode_pages(mapping, 0); | 175 | truncate_inode_pages_range(mapping, gl->gl_vm.start, gl->gl_vm.end); |
174 | 176 | ||
175 | if (gl->gl_object) { | 177 | if (gl->gl_object) { |
176 | struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object; | 178 | struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object; |
@@ -192,8 +194,11 @@ static void inode_go_sync(struct gfs2_glock *gl) | |||
192 | 194 | ||
193 | if (ip && !S_ISREG(ip->i_inode.i_mode)) | 195 | if (ip && !S_ISREG(ip->i_inode.i_mode)) |
194 | ip = NULL; | 196 | ip = NULL; |
195 | if (ip && test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) | 197 | if (ip) { |
196 | unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0); | 198 | if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) |
199 | unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0); | ||
200 | inode_dio_wait(&ip->i_inode); | ||
201 | } | ||
197 | if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) | 202 | if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) |
198 | return; | 203 | return; |
199 | 204 | ||
@@ -410,6 +415,9 @@ static int inode_go_lock(struct gfs2_holder *gh) | |||
410 | return error; | 415 | return error; |
411 | } | 416 | } |
412 | 417 | ||
418 | if (gh->gh_state != LM_ST_DEFERRED) | ||
419 | inode_dio_wait(&ip->i_inode); | ||
420 | |||
413 | if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) && | 421 | if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) && |
414 | (gl->gl_state == LM_ST_EXCLUSIVE) && | 422 | (gl->gl_state == LM_ST_EXCLUSIVE) && |
415 | (gh->gh_state == LM_ST_EXCLUSIVE)) { | 423 | (gh->gh_state == LM_ST_EXCLUSIVE)) { |
@@ -429,21 +437,19 @@ static int inode_go_lock(struct gfs2_holder *gh) | |||
429 | * @seq: The iterator | 437 | * @seq: The iterator |
430 | * @ip: the inode | 438 | * @ip: the inode |
431 | * | 439 | * |
432 | * Returns: 0 on success, -ENOBUFS when we run out of space | ||
433 | */ | 440 | */ |
434 | 441 | ||
435 | static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) | 442 | static void inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) |
436 | { | 443 | { |
437 | const struct gfs2_inode *ip = gl->gl_object; | 444 | const struct gfs2_inode *ip = gl->gl_object; |
438 | if (ip == NULL) | 445 | if (ip == NULL) |
439 | return 0; | 446 | return; |
440 | gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu\n", | 447 | gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu\n", |
441 | (unsigned long long)ip->i_no_formal_ino, | 448 | (unsigned long long)ip->i_no_formal_ino, |
442 | (unsigned long long)ip->i_no_addr, | 449 | (unsigned long long)ip->i_no_addr, |
443 | IF2DT(ip->i_inode.i_mode), ip->i_flags, | 450 | IF2DT(ip->i_inode.i_mode), ip->i_flags, |
444 | (unsigned int)ip->i_diskflags, | 451 | (unsigned int)ip->i_diskflags, |
445 | (unsigned long long)i_size_read(&ip->i_inode)); | 452 | (unsigned long long)i_size_read(&ip->i_inode)); |
446 | return 0; | ||
447 | } | 453 | } |
448 | 454 | ||
449 | /** | 455 | /** |
@@ -552,7 +558,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { | |||
552 | .go_unlock = gfs2_rgrp_go_unlock, | 558 | .go_unlock = gfs2_rgrp_go_unlock, |
553 | .go_dump = gfs2_rgrp_dump, | 559 | .go_dump = gfs2_rgrp_dump, |
554 | .go_type = LM_TYPE_RGRP, | 560 | .go_type = LM_TYPE_RGRP, |
555 | .go_flags = GLOF_ASPACE | GLOF_LVB, | 561 | .go_flags = GLOF_LVB, |
556 | }; | 562 | }; |
557 | 563 | ||
558 | const struct gfs2_glock_operations gfs2_trans_glops = { | 564 | const struct gfs2_glock_operations gfs2_trans_glops = { |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index ba1ea67f4eeb..cf0e34400f71 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -93,6 +93,7 @@ struct gfs2_rgrpd { | |||
93 | struct gfs2_rgrp_lvb *rd_rgl; | 93 | struct gfs2_rgrp_lvb *rd_rgl; |
94 | u32 rd_last_alloc; | 94 | u32 rd_last_alloc; |
95 | u32 rd_flags; | 95 | u32 rd_flags; |
96 | u32 rd_extfail_pt; /* extent failure point */ | ||
96 | #define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ | 97 | #define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ |
97 | #define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */ | 98 | #define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */ |
98 | #define GFS2_RDF_ERROR 0x40000000 /* error in rg */ | 99 | #define GFS2_RDF_ERROR 0x40000000 /* error in rg */ |
@@ -217,7 +218,7 @@ struct gfs2_glock_operations { | |||
217 | int (*go_demote_ok) (const struct gfs2_glock *gl); | 218 | int (*go_demote_ok) (const struct gfs2_glock *gl); |
218 | int (*go_lock) (struct gfs2_holder *gh); | 219 | int (*go_lock) (struct gfs2_holder *gh); |
219 | void (*go_unlock) (struct gfs2_holder *gh); | 220 | void (*go_unlock) (struct gfs2_holder *gh); |
220 | int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl); | 221 | void (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl); |
221 | void (*go_callback)(struct gfs2_glock *gl, bool remote); | 222 | void (*go_callback)(struct gfs2_glock *gl, bool remote); |
222 | const int go_type; | 223 | const int go_type; |
223 | const unsigned long go_flags; | 224 | const unsigned long go_flags; |
@@ -350,7 +351,15 @@ struct gfs2_glock { | |||
350 | atomic_t gl_ail_count; | 351 | atomic_t gl_ail_count; |
351 | atomic_t gl_revokes; | 352 | atomic_t gl_revokes; |
352 | struct delayed_work gl_work; | 353 | struct delayed_work gl_work; |
353 | struct work_struct gl_delete; | 354 | union { |
355 | /* For inode and iopen glocks only */ | ||
356 | struct work_struct gl_delete; | ||
357 | /* For rgrp glocks only */ | ||
358 | struct { | ||
359 | loff_t start; | ||
360 | loff_t end; | ||
361 | } gl_vm; | ||
362 | }; | ||
354 | struct rcu_head gl_rcu; | 363 | struct rcu_head gl_rcu; |
355 | }; | 364 | }; |
356 | 365 | ||
@@ -419,10 +428,13 @@ enum { | |||
419 | }; | 428 | }; |
420 | 429 | ||
421 | struct gfs2_quota_data { | 430 | struct gfs2_quota_data { |
431 | struct hlist_bl_node qd_hlist; | ||
422 | struct list_head qd_list; | 432 | struct list_head qd_list; |
423 | struct kqid qd_id; | 433 | struct kqid qd_id; |
434 | struct gfs2_sbd *qd_sbd; | ||
424 | struct lockref qd_lockref; | 435 | struct lockref qd_lockref; |
425 | struct list_head qd_lru; | 436 | struct list_head qd_lru; |
437 | unsigned qd_hash; | ||
426 | 438 | ||
427 | unsigned long qd_flags; /* QDF_... */ | 439 | unsigned long qd_flags; /* QDF_... */ |
428 | 440 | ||
@@ -441,6 +453,7 @@ struct gfs2_quota_data { | |||
441 | 453 | ||
442 | u64 qd_sync_gen; | 454 | u64 qd_sync_gen; |
443 | unsigned long qd_last_warn; | 455 | unsigned long qd_last_warn; |
456 | struct rcu_head qd_rcu; | ||
444 | }; | 457 | }; |
445 | 458 | ||
446 | struct gfs2_trans { | 459 | struct gfs2_trans { |
@@ -720,13 +733,15 @@ struct gfs2_sbd { | |||
720 | spinlock_t sd_trunc_lock; | 733 | spinlock_t sd_trunc_lock; |
721 | 734 | ||
722 | unsigned int sd_quota_slots; | 735 | unsigned int sd_quota_slots; |
723 | unsigned int sd_quota_chunks; | 736 | unsigned long *sd_quota_bitmap; |
724 | unsigned char **sd_quota_bitmap; | 737 | spinlock_t sd_bitmap_lock; |
725 | 738 | ||
726 | u64 sd_quota_sync_gen; | 739 | u64 sd_quota_sync_gen; |
727 | 740 | ||
728 | /* Log stuff */ | 741 | /* Log stuff */ |
729 | 742 | ||
743 | struct address_space sd_aspace; | ||
744 | |||
730 | spinlock_t sd_log_lock; | 745 | spinlock_t sd_log_lock; |
731 | 746 | ||
732 | struct gfs2_trans *sd_log_tr; | 747 | struct gfs2_trans *sd_log_tr; |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 7119504159f1..890588c7fb33 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -149,7 +149,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, | |||
149 | ip = GFS2_I(inode); | 149 | ip = GFS2_I(inode); |
150 | 150 | ||
151 | if (!inode) | 151 | if (!inode) |
152 | return ERR_PTR(-ENOBUFS); | 152 | return ERR_PTR(-ENOMEM); |
153 | 153 | ||
154 | if (inode->i_state & I_NEW) { | 154 | if (inode->i_state & I_NEW) { |
155 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 155 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
@@ -469,14 +469,36 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip, | |||
469 | brelse(dibh); | 469 | brelse(dibh); |
470 | } | 470 | } |
471 | 471 | ||
472 | /** | ||
473 | * gfs2_trans_da_blocks - Calculate number of blocks to link inode | ||
474 | * @dip: The directory we are linking into | ||
475 | * @da: The dir add information | ||
476 | * @nr_inodes: The number of inodes involved | ||
477 | * | ||
478 | * This calculate the number of blocks we need to reserve in a | ||
479 | * transaction to link @nr_inodes into a directory. In most cases | ||
480 | * @nr_inodes will be 2 (the directory plus the inode being linked in) | ||
481 | * but in case of rename, 4 may be required. | ||
482 | * | ||
483 | * Returns: Number of blocks | ||
484 | */ | ||
485 | |||
486 | static unsigned gfs2_trans_da_blks(const struct gfs2_inode *dip, | ||
487 | const struct gfs2_diradd *da, | ||
488 | unsigned nr_inodes) | ||
489 | { | ||
490 | return da->nr_blocks + gfs2_rg_blocks(dip, da->nr_blocks) + | ||
491 | (nr_inodes * RES_DINODE) + RES_QUOTA + RES_STATFS; | ||
492 | } | ||
493 | |||
472 | static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, | 494 | static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, |
473 | struct gfs2_inode *ip, int arq) | 495 | struct gfs2_inode *ip, struct gfs2_diradd *da) |
474 | { | 496 | { |
475 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | 497 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); |
476 | struct gfs2_alloc_parms ap = { .target = sdp->sd_max_dirres, }; | 498 | struct gfs2_alloc_parms ap = { .target = da->nr_blocks, }; |
477 | int error; | 499 | int error; |
478 | 500 | ||
479 | if (arq) { | 501 | if (da->nr_blocks) { |
480 | error = gfs2_quota_lock_check(dip); | 502 | error = gfs2_quota_lock_check(dip); |
481 | if (error) | 503 | if (error) |
482 | goto fail_quota_locks; | 504 | goto fail_quota_locks; |
@@ -485,10 +507,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, | |||
485 | if (error) | 507 | if (error) |
486 | goto fail_quota_locks; | 508 | goto fail_quota_locks; |
487 | 509 | ||
488 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | 510 | error = gfs2_trans_begin(sdp, gfs2_trans_da_blks(dip, da, 2), 0); |
489 | dip->i_rgd->rd_length + | ||
490 | 2 * RES_DINODE + | ||
491 | RES_STATFS + RES_QUOTA, 0); | ||
492 | if (error) | 511 | if (error) |
493 | goto fail_ipreserv; | 512 | goto fail_ipreserv; |
494 | } else { | 513 | } else { |
@@ -497,7 +516,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, | |||
497 | goto fail_quota_locks; | 516 | goto fail_quota_locks; |
498 | } | 517 | } |
499 | 518 | ||
500 | error = gfs2_dir_add(&dip->i_inode, name, ip); | 519 | error = gfs2_dir_add(&dip->i_inode, name, ip, da); |
501 | if (error) | 520 | if (error) |
502 | goto fail_end_trans; | 521 | goto fail_end_trans; |
503 | 522 | ||
@@ -560,7 +579,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
560 | struct dentry *d; | 579 | struct dentry *d; |
561 | int error; | 580 | int error; |
562 | u32 aflags = 0; | 581 | u32 aflags = 0; |
563 | int arq; | 582 | struct gfs2_diradd da = { .bh = NULL, }; |
564 | 583 | ||
565 | if (!name->len || name->len > GFS2_FNAMESIZE) | 584 | if (!name->len || name->len > GFS2_FNAMESIZE) |
566 | return -ENAMETOOLONG; | 585 | return -ENAMETOOLONG; |
@@ -585,6 +604,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
585 | error = PTR_ERR(inode); | 604 | error = PTR_ERR(inode); |
586 | if (!IS_ERR(inode)) { | 605 | if (!IS_ERR(inode)) { |
587 | d = d_splice_alias(inode, dentry); | 606 | d = d_splice_alias(inode, dentry); |
607 | error = PTR_ERR(d); | ||
608 | if (IS_ERR(d)) | ||
609 | goto fail_gunlock; | ||
588 | error = 0; | 610 | error = 0; |
589 | if (file) { | 611 | if (file) { |
590 | if (S_ISREG(inode->i_mode)) { | 612 | if (S_ISREG(inode->i_mode)) { |
@@ -602,7 +624,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
602 | goto fail_gunlock; | 624 | goto fail_gunlock; |
603 | } | 625 | } |
604 | 626 | ||
605 | arq = error = gfs2_diradd_alloc_required(dir, name); | 627 | error = gfs2_diradd_alloc_required(dir, name, &da); |
606 | if (error < 0) | 628 | if (error < 0) |
607 | goto fail_gunlock; | 629 | goto fail_gunlock; |
608 | 630 | ||
@@ -690,7 +712,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
690 | if (error) | 712 | if (error) |
691 | goto fail_gunlock3; | 713 | goto fail_gunlock3; |
692 | 714 | ||
693 | error = link_dinode(dip, name, ip, arq); | 715 | error = link_dinode(dip, name, ip, &da); |
694 | if (error) | 716 | if (error) |
695 | goto fail_gunlock3; | 717 | goto fail_gunlock3; |
696 | 718 | ||
@@ -719,6 +741,7 @@ fail_free_inode: | |||
719 | free_inode_nonrcu(inode); | 741 | free_inode_nonrcu(inode); |
720 | inode = NULL; | 742 | inode = NULL; |
721 | fail_gunlock: | 743 | fail_gunlock: |
744 | gfs2_dir_no_add(&da); | ||
722 | gfs2_glock_dq_uninit(ghs); | 745 | gfs2_glock_dq_uninit(ghs); |
723 | if (inode && !IS_ERR(inode)) { | 746 | if (inode && !IS_ERR(inode)) { |
724 | clear_nlink(inode); | 747 | clear_nlink(inode); |
@@ -779,6 +802,11 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry, | |||
779 | } | 802 | } |
780 | 803 | ||
781 | d = d_splice_alias(inode, dentry); | 804 | d = d_splice_alias(inode, dentry); |
805 | if (IS_ERR(d)) { | ||
806 | iput(inode); | ||
807 | gfs2_glock_dq_uninit(&gh); | ||
808 | return d; | ||
809 | } | ||
782 | if (file && S_ISREG(inode->i_mode)) | 810 | if (file && S_ISREG(inode->i_mode)) |
783 | error = finish_open(file, dentry, gfs2_open_common, opened); | 811 | error = finish_open(file, dentry, gfs2_open_common, opened); |
784 | 812 | ||
@@ -817,7 +845,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
817 | struct gfs2_inode *ip = GFS2_I(inode); | 845 | struct gfs2_inode *ip = GFS2_I(inode); |
818 | struct gfs2_holder ghs[2]; | 846 | struct gfs2_holder ghs[2]; |
819 | struct buffer_head *dibh; | 847 | struct buffer_head *dibh; |
820 | int alloc_required; | 848 | struct gfs2_diradd da = { .bh = NULL, }; |
821 | int error; | 849 | int error; |
822 | 850 | ||
823 | if (S_ISDIR(inode->i_mode)) | 851 | if (S_ISDIR(inode->i_mode)) |
@@ -872,13 +900,12 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
872 | if (ip->i_inode.i_nlink == (u32)-1) | 900 | if (ip->i_inode.i_nlink == (u32)-1) |
873 | goto out_gunlock; | 901 | goto out_gunlock; |
874 | 902 | ||
875 | alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name); | 903 | error = gfs2_diradd_alloc_required(dir, &dentry->d_name, &da); |
876 | if (error < 0) | 904 | if (error < 0) |
877 | goto out_gunlock; | 905 | goto out_gunlock; |
878 | error = 0; | ||
879 | 906 | ||
880 | if (alloc_required) { | 907 | if (da.nr_blocks) { |
881 | struct gfs2_alloc_parms ap = { .target = sdp->sd_max_dirres, }; | 908 | struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; |
882 | error = gfs2_quota_lock_check(dip); | 909 | error = gfs2_quota_lock_check(dip); |
883 | if (error) | 910 | if (error) |
884 | goto out_gunlock; | 911 | goto out_gunlock; |
@@ -887,10 +914,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
887 | if (error) | 914 | if (error) |
888 | goto out_gunlock_q; | 915 | goto out_gunlock_q; |
889 | 916 | ||
890 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | 917 | error = gfs2_trans_begin(sdp, gfs2_trans_da_blks(dip, &da, 2), 0); |
891 | gfs2_rg_blocks(dip, sdp->sd_max_dirres) + | ||
892 | 2 * RES_DINODE + RES_STATFS + | ||
893 | RES_QUOTA, 0); | ||
894 | if (error) | 918 | if (error) |
895 | goto out_ipres; | 919 | goto out_ipres; |
896 | } else { | 920 | } else { |
@@ -903,7 +927,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
903 | if (error) | 927 | if (error) |
904 | goto out_end_trans; | 928 | goto out_end_trans; |
905 | 929 | ||
906 | error = gfs2_dir_add(dir, &dentry->d_name, ip); | 930 | error = gfs2_dir_add(dir, &dentry->d_name, ip, &da); |
907 | if (error) | 931 | if (error) |
908 | goto out_brelse; | 932 | goto out_brelse; |
909 | 933 | ||
@@ -919,12 +943,13 @@ out_brelse: | |||
919 | out_end_trans: | 943 | out_end_trans: |
920 | gfs2_trans_end(sdp); | 944 | gfs2_trans_end(sdp); |
921 | out_ipres: | 945 | out_ipres: |
922 | if (alloc_required) | 946 | if (da.nr_blocks) |
923 | gfs2_inplace_release(dip); | 947 | gfs2_inplace_release(dip); |
924 | out_gunlock_q: | 948 | out_gunlock_q: |
925 | if (alloc_required) | 949 | if (da.nr_blocks) |
926 | gfs2_quota_unlock(dip); | 950 | gfs2_quota_unlock(dip); |
927 | out_gunlock: | 951 | out_gunlock: |
952 | gfs2_dir_no_add(&da); | ||
928 | gfs2_glock_dq(ghs + 1); | 953 | gfs2_glock_dq(ghs + 1); |
929 | out_child: | 954 | out_child: |
930 | gfs2_glock_dq(ghs); | 955 | gfs2_glock_dq(ghs); |
@@ -1254,7 +1279,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
1254 | struct gfs2_rgrpd *nrgd; | 1279 | struct gfs2_rgrpd *nrgd; |
1255 | unsigned int num_gh; | 1280 | unsigned int num_gh; |
1256 | int dir_rename = 0; | 1281 | int dir_rename = 0; |
1257 | int alloc_required = 0; | 1282 | struct gfs2_diradd da = { .nr_blocks = 0, }; |
1258 | unsigned int x; | 1283 | unsigned int x; |
1259 | int error; | 1284 | int error; |
1260 | 1285 | ||
@@ -1388,14 +1413,14 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
1388 | goto out_gunlock; | 1413 | goto out_gunlock; |
1389 | } | 1414 | } |
1390 | 1415 | ||
1391 | if (nip == NULL) | 1416 | if (nip == NULL) { |
1392 | alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name); | 1417 | error = gfs2_diradd_alloc_required(ndir, &ndentry->d_name, &da); |
1393 | error = alloc_required; | 1418 | if (error) |
1394 | if (error < 0) | 1419 | goto out_gunlock; |
1395 | goto out_gunlock; | 1420 | } |
1396 | 1421 | ||
1397 | if (alloc_required) { | 1422 | if (da.nr_blocks) { |
1398 | struct gfs2_alloc_parms ap = { .target = sdp->sd_max_dirres, }; | 1423 | struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; |
1399 | error = gfs2_quota_lock_check(ndip); | 1424 | error = gfs2_quota_lock_check(ndip); |
1400 | if (error) | 1425 | if (error) |
1401 | goto out_gunlock; | 1426 | goto out_gunlock; |
@@ -1404,10 +1429,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
1404 | if (error) | 1429 | if (error) |
1405 | goto out_gunlock_q; | 1430 | goto out_gunlock_q; |
1406 | 1431 | ||
1407 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | 1432 | error = gfs2_trans_begin(sdp, gfs2_trans_da_blks(ndip, &da, 4) + |
1408 | gfs2_rg_blocks(ndip, sdp->sd_max_dirres) + | 1433 | 4 * RES_LEAF + 4, 0); |
1409 | 4 * RES_DINODE + 4 * RES_LEAF + | ||
1410 | RES_STATFS + RES_QUOTA + 4, 0); | ||
1411 | if (error) | 1434 | if (error) |
1412 | goto out_ipreserv; | 1435 | goto out_ipreserv; |
1413 | } else { | 1436 | } else { |
@@ -1441,19 +1464,20 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
1441 | if (error) | 1464 | if (error) |
1442 | goto out_end_trans; | 1465 | goto out_end_trans; |
1443 | 1466 | ||
1444 | error = gfs2_dir_add(ndir, &ndentry->d_name, ip); | 1467 | error = gfs2_dir_add(ndir, &ndentry->d_name, ip, &da); |
1445 | if (error) | 1468 | if (error) |
1446 | goto out_end_trans; | 1469 | goto out_end_trans; |
1447 | 1470 | ||
1448 | out_end_trans: | 1471 | out_end_trans: |
1449 | gfs2_trans_end(sdp); | 1472 | gfs2_trans_end(sdp); |
1450 | out_ipreserv: | 1473 | out_ipreserv: |
1451 | if (alloc_required) | 1474 | if (da.nr_blocks) |
1452 | gfs2_inplace_release(ndip); | 1475 | gfs2_inplace_release(ndip); |
1453 | out_gunlock_q: | 1476 | out_gunlock_q: |
1454 | if (alloc_required) | 1477 | if (da.nr_blocks) |
1455 | gfs2_quota_unlock(ndip); | 1478 | gfs2_quota_unlock(ndip); |
1456 | out_gunlock: | 1479 | out_gunlock: |
1480 | gfs2_dir_no_add(&da); | ||
1457 | while (x--) { | 1481 | while (x--) { |
1458 | gfs2_glock_dq(ghs + x); | 1482 | gfs2_glock_dq(ghs + x); |
1459 | gfs2_holder_uninit(ghs + x); | 1483 | gfs2_holder_uninit(ghs + x); |
@@ -1607,10 +1631,22 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) | |||
1607 | if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid)) | 1631 | if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid)) |
1608 | ogid = ngid = NO_GID_QUOTA_CHANGE; | 1632 | ogid = ngid = NO_GID_QUOTA_CHANGE; |
1609 | 1633 | ||
1610 | error = gfs2_quota_lock(ip, nuid, ngid); | 1634 | error = get_write_access(inode); |
1611 | if (error) | 1635 | if (error) |
1612 | return error; | 1636 | return error; |
1613 | 1637 | ||
1638 | error = gfs2_rs_alloc(ip); | ||
1639 | if (error) | ||
1640 | goto out; | ||
1641 | |||
1642 | error = gfs2_rindex_update(sdp); | ||
1643 | if (error) | ||
1644 | goto out; | ||
1645 | |||
1646 | error = gfs2_quota_lock(ip, nuid, ngid); | ||
1647 | if (error) | ||
1648 | goto out; | ||
1649 | |||
1614 | if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || | 1650 | if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || |
1615 | !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { | 1651 | !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { |
1616 | error = gfs2_quota_check(ip, nuid, ngid); | 1652 | error = gfs2_quota_check(ip, nuid, ngid); |
@@ -1637,6 +1673,8 @@ out_end_trans: | |||
1637 | gfs2_trans_end(sdp); | 1673 | gfs2_trans_end(sdp); |
1638 | out_gunlock_q: | 1674 | out_gunlock_q: |
1639 | gfs2_quota_unlock(ip); | 1675 | gfs2_quota_unlock(ip); |
1676 | out: | ||
1677 | put_write_access(inode); | ||
1640 | return error; | 1678 | return error; |
1641 | } | 1679 | } |
1642 | 1680 | ||
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 610613fb65b5..9dcb9777a5f8 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -551,10 +551,10 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) | |||
551 | struct buffer_head *bh = bd->bd_bh; | 551 | struct buffer_head *bh = bd->bd_bh; |
552 | struct gfs2_glock *gl = bd->bd_gl; | 552 | struct gfs2_glock *gl = bd->bd_gl; |
553 | 553 | ||
554 | gfs2_remove_from_ail(bd); | ||
555 | bd->bd_bh = NULL; | ||
556 | bh->b_private = NULL; | 554 | bh->b_private = NULL; |
557 | bd->bd_blkno = bh->b_blocknr; | 555 | bd->bd_blkno = bh->b_blocknr; |
556 | gfs2_remove_from_ail(bd); /* drops ref on bh */ | ||
557 | bd->bd_bh = NULL; | ||
558 | bd->bd_ops = &gfs2_revoke_lops; | 558 | bd->bd_ops = &gfs2_revoke_lops; |
559 | sdp->sd_log_num_revoke++; | 559 | sdp->sd_log_num_revoke++; |
560 | atomic_inc(&gl->gl_revokes); | 560 | atomic_inc(&gl->gl_revokes); |
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 010b9fb9fec6..58f06400b7b8 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c | |||
@@ -83,6 +83,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd) | |||
83 | bd->bd_bh->b_data + bi->bi_offset, bi->bi_len); | 83 | bd->bd_bh->b_data + bi->bi_offset, bi->bi_len); |
84 | clear_bit(GBF_FULL, &bi->bi_flags); | 84 | clear_bit(GBF_FULL, &bi->bi_flags); |
85 | rgd->rd_free_clone = rgd->rd_free; | 85 | rgd->rd_free_clone = rgd->rd_free; |
86 | rgd->rd_extfail_pt = rgd->rd_free; | ||
86 | } | 87 | } |
87 | 88 | ||
88 | /** | 89 | /** |
@@ -588,8 +589,12 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | |||
588 | static void gfs2_meta_sync(struct gfs2_glock *gl) | 589 | static void gfs2_meta_sync(struct gfs2_glock *gl) |
589 | { | 590 | { |
590 | struct address_space *mapping = gfs2_glock2aspace(gl); | 591 | struct address_space *mapping = gfs2_glock2aspace(gl); |
592 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
591 | int error; | 593 | int error; |
592 | 594 | ||
595 | if (mapping == NULL) | ||
596 | mapping = &sdp->sd_aspace; | ||
597 | |||
593 | filemap_fdatawrite(mapping); | 598 | filemap_fdatawrite(mapping); |
594 | error = filemap_fdatawait(mapping); | 599 | error = filemap_fdatawait(mapping); |
595 | 600 | ||
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 0650db2541ef..c272e73063de 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
@@ -76,6 +76,7 @@ static int __init init_gfs2_fs(void) | |||
76 | 76 | ||
77 | gfs2_str2qstr(&gfs2_qdot, "."); | 77 | gfs2_str2qstr(&gfs2_qdot, "."); |
78 | gfs2_str2qstr(&gfs2_qdotdot, ".."); | 78 | gfs2_str2qstr(&gfs2_qdotdot, ".."); |
79 | gfs2_quota_hash_init(); | ||
79 | 80 | ||
80 | error = gfs2_sys_init(); | 81 | error = gfs2_sys_init(); |
81 | if (error) | 82 | if (error) |
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 932415050540..c7f24690ed05 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c | |||
@@ -116,6 +116,9 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) | |||
116 | unsigned long index; | 116 | unsigned long index; |
117 | unsigned int bufnum; | 117 | unsigned int bufnum; |
118 | 118 | ||
119 | if (mapping == NULL) | ||
120 | mapping = &sdp->sd_aspace; | ||
121 | |||
119 | shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift; | 122 | shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift; |
120 | index = blkno >> shift; /* convert block to page */ | 123 | index = blkno >> shift; /* convert block to page */ |
121 | bufnum = blkno - (index << shift); /* block buf index within page */ | 124 | bufnum = blkno - (index << shift); /* block buf index within page */ |
@@ -258,6 +261,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int | |||
258 | struct address_space *mapping = bh->b_page->mapping; | 261 | struct address_space *mapping = bh->b_page->mapping; |
259 | struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); | 262 | struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); |
260 | struct gfs2_bufdata *bd = bh->b_private; | 263 | struct gfs2_bufdata *bd = bh->b_private; |
264 | int was_pinned = 0; | ||
261 | 265 | ||
262 | if (test_clear_buffer_pinned(bh)) { | 266 | if (test_clear_buffer_pinned(bh)) { |
263 | trace_gfs2_pin(bd, 0); | 267 | trace_gfs2_pin(bd, 0); |
@@ -273,12 +277,16 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int | |||
273 | tr->tr_num_databuf_rm++; | 277 | tr->tr_num_databuf_rm++; |
274 | } | 278 | } |
275 | tr->tr_touched = 1; | 279 | tr->tr_touched = 1; |
280 | was_pinned = 1; | ||
276 | brelse(bh); | 281 | brelse(bh); |
277 | } | 282 | } |
278 | if (bd) { | 283 | if (bd) { |
279 | spin_lock(&sdp->sd_ail_lock); | 284 | spin_lock(&sdp->sd_ail_lock); |
280 | if (bd->bd_tr) { | 285 | if (bd->bd_tr) { |
281 | gfs2_trans_add_revoke(sdp, bd); | 286 | gfs2_trans_add_revoke(sdp, bd); |
287 | } else if (was_pinned) { | ||
288 | bh->b_private = NULL; | ||
289 | kmem_cache_free(gfs2_bufdata_cachep, bd); | ||
282 | } | 290 | } |
283 | spin_unlock(&sdp->sd_ail_lock); | 291 | spin_unlock(&sdp->sd_ail_lock); |
284 | } | 292 | } |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 82303b474958..1e712b566d76 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include "log.h" | 36 | #include "log.h" |
37 | #include "quota.h" | 37 | #include "quota.h" |
38 | #include "dir.h" | 38 | #include "dir.h" |
39 | #include "meta_io.h" | ||
39 | #include "trace_gfs2.h" | 40 | #include "trace_gfs2.h" |
40 | 41 | ||
41 | #define DO 0 | 42 | #define DO 0 |
@@ -62,6 +63,7 @@ static void gfs2_tune_init(struct gfs2_tune *gt) | |||
62 | static struct gfs2_sbd *init_sbd(struct super_block *sb) | 63 | static struct gfs2_sbd *init_sbd(struct super_block *sb) |
63 | { | 64 | { |
64 | struct gfs2_sbd *sdp; | 65 | struct gfs2_sbd *sdp; |
66 | struct address_space *mapping; | ||
65 | 67 | ||
66 | sdp = kzalloc(sizeof(struct gfs2_sbd), GFP_KERNEL); | 68 | sdp = kzalloc(sizeof(struct gfs2_sbd), GFP_KERNEL); |
67 | if (!sdp) | 69 | if (!sdp) |
@@ -97,6 +99,18 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
97 | init_waitqueue_head(&sdp->sd_quota_wait); | 99 | init_waitqueue_head(&sdp->sd_quota_wait); |
98 | INIT_LIST_HEAD(&sdp->sd_trunc_list); | 100 | INIT_LIST_HEAD(&sdp->sd_trunc_list); |
99 | spin_lock_init(&sdp->sd_trunc_lock); | 101 | spin_lock_init(&sdp->sd_trunc_lock); |
102 | spin_lock_init(&sdp->sd_bitmap_lock); | ||
103 | |||
104 | mapping = &sdp->sd_aspace; | ||
105 | |||
106 | address_space_init_once(mapping); | ||
107 | mapping->a_ops = &gfs2_meta_aops; | ||
108 | mapping->host = sb->s_bdev->bd_inode; | ||
109 | mapping->flags = 0; | ||
110 | mapping_set_gfp_mask(mapping, GFP_NOFS); | ||
111 | mapping->private_data = NULL; | ||
112 | mapping->backing_dev_info = sb->s_bdi; | ||
113 | mapping->writeback_index = 0; | ||
100 | 114 | ||
101 | spin_lock_init(&sdp->sd_log_lock); | 115 | spin_lock_init(&sdp->sd_log_lock); |
102 | atomic_set(&sdp->sd_log_pinned, 0); | 116 | atomic_set(&sdp->sd_log_pinned, 0); |
@@ -217,7 +231,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) | |||
217 | 231 | ||
218 | page = alloc_page(GFP_NOFS); | 232 | page = alloc_page(GFP_NOFS); |
219 | if (unlikely(!page)) | 233 | if (unlikely(!page)) |
220 | return -ENOBUFS; | 234 | return -ENOMEM; |
221 | 235 | ||
222 | ClearPageUptodate(page); | 236 | ClearPageUptodate(page); |
223 | ClearPageDirty(page); | 237 | ClearPageDirty(page); |
@@ -956,40 +970,6 @@ fail: | |||
956 | return error; | 970 | return error; |
957 | } | 971 | } |
958 | 972 | ||
959 | static int init_threads(struct gfs2_sbd *sdp, int undo) | ||
960 | { | ||
961 | struct task_struct *p; | ||
962 | int error = 0; | ||
963 | |||
964 | if (undo) | ||
965 | goto fail_quotad; | ||
966 | |||
967 | p = kthread_run(gfs2_logd, sdp, "gfs2_logd"); | ||
968 | if (IS_ERR(p)) { | ||
969 | error = PTR_ERR(p); | ||
970 | fs_err(sdp, "can't start logd thread: %d\n", error); | ||
971 | return error; | ||
972 | } | ||
973 | sdp->sd_logd_process = p; | ||
974 | |||
975 | p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad"); | ||
976 | if (IS_ERR(p)) { | ||
977 | error = PTR_ERR(p); | ||
978 | fs_err(sdp, "can't start quotad thread: %d\n", error); | ||
979 | goto fail; | ||
980 | } | ||
981 | sdp->sd_quotad_process = p; | ||
982 | |||
983 | return 0; | ||
984 | |||
985 | |||
986 | fail_quotad: | ||
987 | kthread_stop(sdp->sd_quotad_process); | ||
988 | fail: | ||
989 | kthread_stop(sdp->sd_logd_process); | ||
990 | return error; | ||
991 | } | ||
992 | |||
993 | static const match_table_t nolock_tokens = { | 973 | static const match_table_t nolock_tokens = { |
994 | { Opt_jid, "jid=%d\n", }, | 974 | { Opt_jid, "jid=%d\n", }, |
995 | { Opt_err, NULL }, | 975 | { Opt_err, NULL }, |
@@ -1254,15 +1234,11 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent | |||
1254 | goto fail_per_node; | 1234 | goto fail_per_node; |
1255 | } | 1235 | } |
1256 | 1236 | ||
1257 | error = init_threads(sdp, DO); | ||
1258 | if (error) | ||
1259 | goto fail_per_node; | ||
1260 | |||
1261 | if (!(sb->s_flags & MS_RDONLY)) { | 1237 | if (!(sb->s_flags & MS_RDONLY)) { |
1262 | error = gfs2_make_fs_rw(sdp); | 1238 | error = gfs2_make_fs_rw(sdp); |
1263 | if (error) { | 1239 | if (error) { |
1264 | fs_err(sdp, "can't make FS RW: %d\n", error); | 1240 | fs_err(sdp, "can't make FS RW: %d\n", error); |
1265 | goto fail_threads; | 1241 | goto fail_per_node; |
1266 | } | 1242 | } |
1267 | } | 1243 | } |
1268 | 1244 | ||
@@ -1270,8 +1246,6 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent | |||
1270 | gfs2_online_uevent(sdp); | 1246 | gfs2_online_uevent(sdp); |
1271 | return 0; | 1247 | return 0; |
1272 | 1248 | ||
1273 | fail_threads: | ||
1274 | init_threads(sdp, UNDO); | ||
1275 | fail_per_node: | 1249 | fail_per_node: |
1276 | init_per_node(sdp, UNDO); | 1250 | init_per_node(sdp, UNDO); |
1277 | fail_inodes: | 1251 | fail_inodes: |
@@ -1366,8 +1340,18 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, | |||
1366 | if (IS_ERR(s)) | 1340 | if (IS_ERR(s)) |
1367 | goto error_bdev; | 1341 | goto error_bdev; |
1368 | 1342 | ||
1369 | if (s->s_root) | 1343 | if (s->s_root) { |
1344 | /* | ||
1345 | * s_umount nests inside bd_mutex during | ||
1346 | * __invalidate_device(). blkdev_put() acquires | ||
1347 | * bd_mutex and can't be called under s_umount. Drop | ||
1348 | * s_umount temporarily. This is safe as we're | ||
1349 | * holding an active reference. | ||
1350 | */ | ||
1351 | up_write(&s->s_umount); | ||
1370 | blkdev_put(bdev, mode); | 1352 | blkdev_put(bdev, mode); |
1353 | down_write(&s->s_umount); | ||
1354 | } | ||
1371 | 1355 | ||
1372 | memset(&args, 0, sizeof(args)); | 1356 | memset(&args, 0, sizeof(args)); |
1373 | args.ar_quota = GFS2_QUOTA_DEFAULT; | 1357 | args.ar_quota = GFS2_QUOTA_DEFAULT; |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 98236d0df3ca..8bec0e3192dd 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -52,6 +52,11 @@ | |||
52 | #include <linux/dqblk_xfs.h> | 52 | #include <linux/dqblk_xfs.h> |
53 | #include <linux/lockref.h> | 53 | #include <linux/lockref.h> |
54 | #include <linux/list_lru.h> | 54 | #include <linux/list_lru.h> |
55 | #include <linux/rcupdate.h> | ||
56 | #include <linux/rculist_bl.h> | ||
57 | #include <linux/bit_spinlock.h> | ||
58 | #include <linux/jhash.h> | ||
59 | #include <linux/vmalloc.h> | ||
55 | 60 | ||
56 | #include "gfs2.h" | 61 | #include "gfs2.h" |
57 | #include "incore.h" | 62 | #include "incore.h" |
@@ -67,16 +72,44 @@ | |||
67 | #include "inode.h" | 72 | #include "inode.h" |
68 | #include "util.h" | 73 | #include "util.h" |
69 | 74 | ||
70 | struct gfs2_quota_change_host { | 75 | #define GFS2_QD_HASH_SHIFT 12 |
71 | u64 qc_change; | 76 | #define GFS2_QD_HASH_SIZE (1 << GFS2_QD_HASH_SHIFT) |
72 | u32 qc_flags; /* GFS2_QCF_... */ | 77 | #define GFS2_QD_HASH_MASK (GFS2_QD_HASH_SIZE - 1) |
73 | struct kqid qc_id; | ||
74 | }; | ||
75 | 78 | ||
76 | /* Lock order: qd_lock -> qd->lockref.lock -> lru lock */ | 79 | /* Lock order: qd_lock -> bucket lock -> qd->lockref.lock -> lru lock */ |
80 | /* -> sd_bitmap_lock */ | ||
77 | static DEFINE_SPINLOCK(qd_lock); | 81 | static DEFINE_SPINLOCK(qd_lock); |
78 | struct list_lru gfs2_qd_lru; | 82 | struct list_lru gfs2_qd_lru; |
79 | 83 | ||
84 | static struct hlist_bl_head qd_hash_table[GFS2_QD_HASH_SIZE]; | ||
85 | |||
86 | static unsigned int gfs2_qd_hash(const struct gfs2_sbd *sdp, | ||
87 | const struct kqid qid) | ||
88 | { | ||
89 | unsigned int h; | ||
90 | |||
91 | h = jhash(&sdp, sizeof(struct gfs2_sbd *), 0); | ||
92 | h = jhash(&qid, sizeof(struct kqid), h); | ||
93 | |||
94 | return h & GFS2_QD_HASH_MASK; | ||
95 | } | ||
96 | |||
97 | static inline void spin_lock_bucket(unsigned int hash) | ||
98 | { | ||
99 | hlist_bl_lock(&qd_hash_table[hash]); | ||
100 | } | ||
101 | |||
102 | static inline void spin_unlock_bucket(unsigned int hash) | ||
103 | { | ||
104 | hlist_bl_unlock(&qd_hash_table[hash]); | ||
105 | } | ||
106 | |||
107 | static void gfs2_qd_dealloc(struct rcu_head *rcu) | ||
108 | { | ||
109 | struct gfs2_quota_data *qd = container_of(rcu, struct gfs2_quota_data, qd_rcu); | ||
110 | kmem_cache_free(gfs2_quotad_cachep, qd); | ||
111 | } | ||
112 | |||
80 | static void gfs2_qd_dispose(struct list_head *list) | 113 | static void gfs2_qd_dispose(struct list_head *list) |
81 | { | 114 | { |
82 | struct gfs2_quota_data *qd; | 115 | struct gfs2_quota_data *qd; |
@@ -93,6 +126,10 @@ static void gfs2_qd_dispose(struct list_head *list) | |||
93 | list_del(&qd->qd_list); | 126 | list_del(&qd->qd_list); |
94 | spin_unlock(&qd_lock); | 127 | spin_unlock(&qd_lock); |
95 | 128 | ||
129 | spin_lock_bucket(qd->qd_hash); | ||
130 | hlist_bl_del_rcu(&qd->qd_hlist); | ||
131 | spin_unlock_bucket(qd->qd_hash); | ||
132 | |||
96 | gfs2_assert_warn(sdp, !qd->qd_change); | 133 | gfs2_assert_warn(sdp, !qd->qd_change); |
97 | gfs2_assert_warn(sdp, !qd->qd_slot_count); | 134 | gfs2_assert_warn(sdp, !qd->qd_slot_count); |
98 | gfs2_assert_warn(sdp, !qd->qd_bh_count); | 135 | gfs2_assert_warn(sdp, !qd->qd_bh_count); |
@@ -101,7 +138,7 @@ static void gfs2_qd_dispose(struct list_head *list) | |||
101 | atomic_dec(&sdp->sd_quota_count); | 138 | atomic_dec(&sdp->sd_quota_count); |
102 | 139 | ||
103 | /* Delete it from the common reclaim list */ | 140 | /* Delete it from the common reclaim list */ |
104 | kmem_cache_free(gfs2_quotad_cachep, qd); | 141 | call_rcu(&qd->qd_rcu, gfs2_qd_dealloc); |
105 | } | 142 | } |
106 | } | 143 | } |
107 | 144 | ||
@@ -171,83 +208,95 @@ static u64 qd2offset(struct gfs2_quota_data *qd) | |||
171 | return offset; | 208 | return offset; |
172 | } | 209 | } |
173 | 210 | ||
174 | static int qd_alloc(struct gfs2_sbd *sdp, struct kqid qid, | 211 | static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, struct kqid qid) |
175 | struct gfs2_quota_data **qdp) | ||
176 | { | 212 | { |
177 | struct gfs2_quota_data *qd; | 213 | struct gfs2_quota_data *qd; |
178 | int error; | 214 | int error; |
179 | 215 | ||
180 | qd = kmem_cache_zalloc(gfs2_quotad_cachep, GFP_NOFS); | 216 | qd = kmem_cache_zalloc(gfs2_quotad_cachep, GFP_NOFS); |
181 | if (!qd) | 217 | if (!qd) |
182 | return -ENOMEM; | 218 | return NULL; |
183 | 219 | ||
220 | qd->qd_sbd = sdp; | ||
184 | qd->qd_lockref.count = 1; | 221 | qd->qd_lockref.count = 1; |
185 | spin_lock_init(&qd->qd_lockref.lock); | 222 | spin_lock_init(&qd->qd_lockref.lock); |
186 | qd->qd_id = qid; | 223 | qd->qd_id = qid; |
187 | qd->qd_slot = -1; | 224 | qd->qd_slot = -1; |
188 | INIT_LIST_HEAD(&qd->qd_lru); | 225 | INIT_LIST_HEAD(&qd->qd_lru); |
226 | qd->qd_hash = hash; | ||
189 | 227 | ||
190 | error = gfs2_glock_get(sdp, qd2index(qd), | 228 | error = gfs2_glock_get(sdp, qd2index(qd), |
191 | &gfs2_quota_glops, CREATE, &qd->qd_gl); | 229 | &gfs2_quota_glops, CREATE, &qd->qd_gl); |
192 | if (error) | 230 | if (error) |
193 | goto fail; | 231 | goto fail; |
194 | 232 | ||
195 | *qdp = qd; | 233 | return qd; |
196 | |||
197 | return 0; | ||
198 | 234 | ||
199 | fail: | 235 | fail: |
200 | kmem_cache_free(gfs2_quotad_cachep, qd); | 236 | kmem_cache_free(gfs2_quotad_cachep, qd); |
201 | return error; | 237 | return NULL; |
202 | } | 238 | } |
203 | 239 | ||
204 | static int qd_get(struct gfs2_sbd *sdp, struct kqid qid, | 240 | static struct gfs2_quota_data *gfs2_qd_search_bucket(unsigned int hash, |
205 | struct gfs2_quota_data **qdp) | 241 | const struct gfs2_sbd *sdp, |
242 | struct kqid qid) | ||
206 | { | 243 | { |
207 | struct gfs2_quota_data *qd = NULL, *new_qd = NULL; | 244 | struct gfs2_quota_data *qd; |
208 | int error, found; | 245 | struct hlist_bl_node *h; |
209 | |||
210 | *qdp = NULL; | ||
211 | 246 | ||
212 | for (;;) { | 247 | hlist_bl_for_each_entry_rcu(qd, h, &qd_hash_table[hash], qd_hlist) { |
213 | found = 0; | 248 | if (!qid_eq(qd->qd_id, qid)) |
214 | spin_lock(&qd_lock); | 249 | continue; |
215 | list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { | 250 | if (qd->qd_sbd != sdp) |
216 | if (qid_eq(qd->qd_id, qid) && | 251 | continue; |
217 | lockref_get_not_dead(&qd->qd_lockref)) { | 252 | if (lockref_get_not_dead(&qd->qd_lockref)) { |
218 | list_lru_del(&gfs2_qd_lru, &qd->qd_lru); | 253 | list_lru_del(&gfs2_qd_lru, &qd->qd_lru); |
219 | found = 1; | 254 | return qd; |
220 | break; | ||
221 | } | ||
222 | } | 255 | } |
256 | } | ||
223 | 257 | ||
224 | if (!found) | 258 | return NULL; |
225 | qd = NULL; | 259 | } |
226 | 260 | ||
227 | if (!qd && new_qd) { | ||
228 | qd = new_qd; | ||
229 | list_add(&qd->qd_list, &sdp->sd_quota_list); | ||
230 | atomic_inc(&sdp->sd_quota_count); | ||
231 | new_qd = NULL; | ||
232 | } | ||
233 | 261 | ||
234 | spin_unlock(&qd_lock); | 262 | static int qd_get(struct gfs2_sbd *sdp, struct kqid qid, |
263 | struct gfs2_quota_data **qdp) | ||
264 | { | ||
265 | struct gfs2_quota_data *qd, *new_qd; | ||
266 | unsigned int hash = gfs2_qd_hash(sdp, qid); | ||
235 | 267 | ||
236 | if (qd) { | 268 | rcu_read_lock(); |
237 | if (new_qd) { | 269 | *qdp = qd = gfs2_qd_search_bucket(hash, sdp, qid); |
238 | gfs2_glock_put(new_qd->qd_gl); | 270 | rcu_read_unlock(); |
239 | kmem_cache_free(gfs2_quotad_cachep, new_qd); | ||
240 | } | ||
241 | *qdp = qd; | ||
242 | return 0; | ||
243 | } | ||
244 | 271 | ||
245 | error = qd_alloc(sdp, qid, &new_qd); | 272 | if (qd) |
246 | if (error) | 273 | return 0; |
247 | return error; | 274 | |
275 | new_qd = qd_alloc(hash, sdp, qid); | ||
276 | if (!new_qd) | ||
277 | return -ENOMEM; | ||
278 | |||
279 | spin_lock(&qd_lock); | ||
280 | spin_lock_bucket(hash); | ||
281 | *qdp = qd = gfs2_qd_search_bucket(hash, sdp, qid); | ||
282 | if (qd == NULL) { | ||
283 | *qdp = new_qd; | ||
284 | list_add(&new_qd->qd_list, &sdp->sd_quota_list); | ||
285 | hlist_bl_add_head_rcu(&new_qd->qd_hlist, &qd_hash_table[hash]); | ||
286 | atomic_inc(&sdp->sd_quota_count); | ||
248 | } | 287 | } |
288 | spin_unlock_bucket(hash); | ||
289 | spin_unlock(&qd_lock); | ||
290 | |||
291 | if (qd) { | ||
292 | gfs2_glock_put(new_qd->qd_gl); | ||
293 | kmem_cache_free(gfs2_quotad_cachep, new_qd); | ||
294 | } | ||
295 | |||
296 | return 0; | ||
249 | } | 297 | } |
250 | 298 | ||
299 | |||
251 | static void qd_hold(struct gfs2_quota_data *qd) | 300 | static void qd_hold(struct gfs2_quota_data *qd) |
252 | { | 301 | { |
253 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; | 302 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; |
@@ -268,88 +317,48 @@ static void qd_put(struct gfs2_quota_data *qd) | |||
268 | 317 | ||
269 | static int slot_get(struct gfs2_quota_data *qd) | 318 | static int slot_get(struct gfs2_quota_data *qd) |
270 | { | 319 | { |
271 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; | 320 | struct gfs2_sbd *sdp = qd->qd_sbd; |
272 | unsigned int c, o = 0, b; | 321 | unsigned int bit; |
273 | unsigned char byte = 0; | 322 | int error = 0; |
274 | 323 | ||
275 | spin_lock(&qd_lock); | 324 | spin_lock(&sdp->sd_bitmap_lock); |
325 | if (qd->qd_slot_count != 0) | ||
326 | goto out; | ||
276 | 327 | ||
277 | if (qd->qd_slot_count++) { | 328 | error = -ENOSPC; |
278 | spin_unlock(&qd_lock); | 329 | bit = find_first_zero_bit(sdp->sd_quota_bitmap, sdp->sd_quota_slots); |
279 | return 0; | 330 | if (bit < sdp->sd_quota_slots) { |
331 | set_bit(bit, sdp->sd_quota_bitmap); | ||
332 | qd->qd_slot = bit; | ||
333 | out: | ||
334 | qd->qd_slot_count++; | ||
280 | } | 335 | } |
336 | spin_unlock(&sdp->sd_bitmap_lock); | ||
281 | 337 | ||
282 | for (c = 0; c < sdp->sd_quota_chunks; c++) | 338 | return error; |
283 | for (o = 0; o < PAGE_SIZE; o++) { | ||
284 | byte = sdp->sd_quota_bitmap[c][o]; | ||
285 | if (byte != 0xFF) | ||
286 | goto found; | ||
287 | } | ||
288 | |||
289 | goto fail; | ||
290 | |||
291 | found: | ||
292 | for (b = 0; b < 8; b++) | ||
293 | if (!(byte & (1 << b))) | ||
294 | break; | ||
295 | qd->qd_slot = c * (8 * PAGE_SIZE) + o * 8 + b; | ||
296 | |||
297 | if (qd->qd_slot >= sdp->sd_quota_slots) | ||
298 | goto fail; | ||
299 | |||
300 | sdp->sd_quota_bitmap[c][o] |= 1 << b; | ||
301 | |||
302 | spin_unlock(&qd_lock); | ||
303 | |||
304 | return 0; | ||
305 | |||
306 | fail: | ||
307 | qd->qd_slot_count--; | ||
308 | spin_unlock(&qd_lock); | ||
309 | return -ENOSPC; | ||
310 | } | 339 | } |
311 | 340 | ||
312 | static void slot_hold(struct gfs2_quota_data *qd) | 341 | static void slot_hold(struct gfs2_quota_data *qd) |
313 | { | 342 | { |
314 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; | 343 | struct gfs2_sbd *sdp = qd->qd_sbd; |
315 | 344 | ||
316 | spin_lock(&qd_lock); | 345 | spin_lock(&sdp->sd_bitmap_lock); |
317 | gfs2_assert(sdp, qd->qd_slot_count); | 346 | gfs2_assert(sdp, qd->qd_slot_count); |
318 | qd->qd_slot_count++; | 347 | qd->qd_slot_count++; |
319 | spin_unlock(&qd_lock); | 348 | spin_unlock(&sdp->sd_bitmap_lock); |
320 | } | ||
321 | |||
322 | static void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, | ||
323 | unsigned int bit, int new_value) | ||
324 | { | ||
325 | unsigned int c, o, b = bit; | ||
326 | int old_value; | ||
327 | |||
328 | c = b / (8 * PAGE_SIZE); | ||
329 | b %= 8 * PAGE_SIZE; | ||
330 | o = b / 8; | ||
331 | b %= 8; | ||
332 | |||
333 | old_value = (bitmap[c][o] & (1 << b)); | ||
334 | gfs2_assert_withdraw(sdp, !old_value != !new_value); | ||
335 | |||
336 | if (new_value) | ||
337 | bitmap[c][o] |= 1 << b; | ||
338 | else | ||
339 | bitmap[c][o] &= ~(1 << b); | ||
340 | } | 349 | } |
341 | 350 | ||
342 | static void slot_put(struct gfs2_quota_data *qd) | 351 | static void slot_put(struct gfs2_quota_data *qd) |
343 | { | 352 | { |
344 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; | 353 | struct gfs2_sbd *sdp = qd->qd_sbd; |
345 | 354 | ||
346 | spin_lock(&qd_lock); | 355 | spin_lock(&sdp->sd_bitmap_lock); |
347 | gfs2_assert(sdp, qd->qd_slot_count); | 356 | gfs2_assert(sdp, qd->qd_slot_count); |
348 | if (!--qd->qd_slot_count) { | 357 | if (!--qd->qd_slot_count) { |
349 | gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0); | 358 | BUG_ON(!test_and_clear_bit(qd->qd_slot, sdp->sd_quota_bitmap)); |
350 | qd->qd_slot = -1; | 359 | qd->qd_slot = -1; |
351 | } | 360 | } |
352 | spin_unlock(&qd_lock); | 361 | spin_unlock(&sdp->sd_bitmap_lock); |
353 | } | 362 | } |
354 | 363 | ||
355 | static int bh_get(struct gfs2_quota_data *qd) | 364 | static int bh_get(struct gfs2_quota_data *qd) |
@@ -427,8 +436,7 @@ static int qd_check_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd, | |||
427 | list_move_tail(&qd->qd_list, &sdp->sd_quota_list); | 436 | list_move_tail(&qd->qd_list, &sdp->sd_quota_list); |
428 | set_bit(QDF_LOCKED, &qd->qd_flags); | 437 | set_bit(QDF_LOCKED, &qd->qd_flags); |
429 | qd->qd_change_sync = qd->qd_change; | 438 | qd->qd_change_sync = qd->qd_change; |
430 | gfs2_assert_warn(sdp, qd->qd_slot_count); | 439 | slot_hold(qd); |
431 | qd->qd_slot_count++; | ||
432 | return 1; | 440 | return 1; |
433 | } | 441 | } |
434 | 442 | ||
@@ -1214,17 +1222,6 @@ int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid) | |||
1214 | return error; | 1222 | return error; |
1215 | } | 1223 | } |
1216 | 1224 | ||
1217 | static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf) | ||
1218 | { | ||
1219 | const struct gfs2_quota_change *str = buf; | ||
1220 | |||
1221 | qc->qc_change = be64_to_cpu(str->qc_change); | ||
1222 | qc->qc_flags = be32_to_cpu(str->qc_flags); | ||
1223 | qc->qc_id = make_kqid(&init_user_ns, | ||
1224 | (qc->qc_flags & GFS2_QCF_USER)?USRQUOTA:GRPQUOTA, | ||
1225 | be32_to_cpu(str->qc_id)); | ||
1226 | } | ||
1227 | |||
1228 | int gfs2_quota_init(struct gfs2_sbd *sdp) | 1225 | int gfs2_quota_init(struct gfs2_sbd *sdp) |
1229 | { | 1226 | { |
1230 | struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); | 1227 | struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); |
@@ -1232,6 +1229,8 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) | |||
1232 | unsigned int blocks = size >> sdp->sd_sb.sb_bsize_shift; | 1229 | unsigned int blocks = size >> sdp->sd_sb.sb_bsize_shift; |
1233 | unsigned int x, slot = 0; | 1230 | unsigned int x, slot = 0; |
1234 | unsigned int found = 0; | 1231 | unsigned int found = 0; |
1232 | unsigned int hash; | ||
1233 | unsigned int bm_size; | ||
1235 | u64 dblock; | 1234 | u64 dblock; |
1236 | u32 extlen = 0; | 1235 | u32 extlen = 0; |
1237 | int error; | 1236 | int error; |
@@ -1240,23 +1239,20 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) | |||
1240 | return -EIO; | 1239 | return -EIO; |
1241 | 1240 | ||
1242 | sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block; | 1241 | sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block; |
1243 | sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE); | 1242 | bm_size = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * sizeof(unsigned long)); |
1244 | 1243 | bm_size *= sizeof(unsigned long); | |
1245 | error = -ENOMEM; | 1244 | error = -ENOMEM; |
1246 | 1245 | sdp->sd_quota_bitmap = kmalloc(bm_size, GFP_NOFS|__GFP_NOWARN); | |
1247 | sdp->sd_quota_bitmap = kcalloc(sdp->sd_quota_chunks, | 1246 | if (sdp->sd_quota_bitmap == NULL) |
1248 | sizeof(unsigned char *), GFP_NOFS); | 1247 | sdp->sd_quota_bitmap = __vmalloc(bm_size, GFP_NOFS, PAGE_KERNEL); |
1249 | if (!sdp->sd_quota_bitmap) | 1248 | if (!sdp->sd_quota_bitmap) |
1250 | return error; | 1249 | return error; |
1251 | 1250 | ||
1252 | for (x = 0; x < sdp->sd_quota_chunks; x++) { | 1251 | memset(sdp->sd_quota_bitmap, 0, bm_size); |
1253 | sdp->sd_quota_bitmap[x] = kzalloc(PAGE_SIZE, GFP_NOFS); | ||
1254 | if (!sdp->sd_quota_bitmap[x]) | ||
1255 | goto fail; | ||
1256 | } | ||
1257 | 1252 | ||
1258 | for (x = 0; x < blocks; x++) { | 1253 | for (x = 0; x < blocks; x++) { |
1259 | struct buffer_head *bh; | 1254 | struct buffer_head *bh; |
1255 | const struct gfs2_quota_change *qc; | ||
1260 | unsigned int y; | 1256 | unsigned int y; |
1261 | 1257 | ||
1262 | if (!extlen) { | 1258 | if (!extlen) { |
@@ -1274,34 +1270,42 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) | |||
1274 | goto fail; | 1270 | goto fail; |
1275 | } | 1271 | } |
1276 | 1272 | ||
1273 | qc = (const struct gfs2_quota_change *)(bh->b_data + sizeof(struct gfs2_meta_header)); | ||
1277 | for (y = 0; y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots; | 1274 | for (y = 0; y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots; |
1278 | y++, slot++) { | 1275 | y++, slot++) { |
1279 | struct gfs2_quota_change_host qc; | ||
1280 | struct gfs2_quota_data *qd; | 1276 | struct gfs2_quota_data *qd; |
1281 | 1277 | s64 qc_change = be64_to_cpu(qc->qc_change); | |
1282 | gfs2_quota_change_in(&qc, bh->b_data + | 1278 | u32 qc_flags = be32_to_cpu(qc->qc_flags); |
1283 | sizeof(struct gfs2_meta_header) + | 1279 | enum quota_type qtype = (qc_flags & GFS2_QCF_USER) ? |
1284 | y * sizeof(struct gfs2_quota_change)); | 1280 | USRQUOTA : GRPQUOTA; |
1285 | if (!qc.qc_change) | 1281 | struct kqid qc_id = make_kqid(&init_user_ns, qtype, |
1282 | be32_to_cpu(qc->qc_id)); | ||
1283 | qc++; | ||
1284 | if (!qc_change) | ||
1286 | continue; | 1285 | continue; |
1287 | 1286 | ||
1288 | error = qd_alloc(sdp, qc.qc_id, &qd); | 1287 | hash = gfs2_qd_hash(sdp, qc_id); |
1289 | if (error) { | 1288 | qd = qd_alloc(hash, sdp, qc_id); |
1289 | if (qd == NULL) { | ||
1290 | brelse(bh); | 1290 | brelse(bh); |
1291 | goto fail; | 1291 | goto fail; |
1292 | } | 1292 | } |
1293 | 1293 | ||
1294 | set_bit(QDF_CHANGE, &qd->qd_flags); | 1294 | set_bit(QDF_CHANGE, &qd->qd_flags); |
1295 | qd->qd_change = qc.qc_change; | 1295 | qd->qd_change = qc_change; |
1296 | qd->qd_slot = slot; | 1296 | qd->qd_slot = slot; |
1297 | qd->qd_slot_count = 1; | 1297 | qd->qd_slot_count = 1; |
1298 | 1298 | ||
1299 | spin_lock(&qd_lock); | 1299 | spin_lock(&qd_lock); |
1300 | gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1); | 1300 | BUG_ON(test_and_set_bit(slot, sdp->sd_quota_bitmap)); |
1301 | list_add(&qd->qd_list, &sdp->sd_quota_list); | 1301 | list_add(&qd->qd_list, &sdp->sd_quota_list); |
1302 | atomic_inc(&sdp->sd_quota_count); | 1302 | atomic_inc(&sdp->sd_quota_count); |
1303 | spin_unlock(&qd_lock); | 1303 | spin_unlock(&qd_lock); |
1304 | 1304 | ||
1305 | spin_lock_bucket(hash); | ||
1306 | hlist_bl_add_head_rcu(&qd->qd_hlist, &qd_hash_table[hash]); | ||
1307 | spin_unlock_bucket(hash); | ||
1308 | |||
1305 | found++; | 1309 | found++; |
1306 | } | 1310 | } |
1307 | 1311 | ||
@@ -1324,44 +1328,28 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp) | |||
1324 | { | 1328 | { |
1325 | struct list_head *head = &sdp->sd_quota_list; | 1329 | struct list_head *head = &sdp->sd_quota_list; |
1326 | struct gfs2_quota_data *qd; | 1330 | struct gfs2_quota_data *qd; |
1327 | unsigned int x; | ||
1328 | 1331 | ||
1329 | spin_lock(&qd_lock); | 1332 | spin_lock(&qd_lock); |
1330 | while (!list_empty(head)) { | 1333 | while (!list_empty(head)) { |
1331 | qd = list_entry(head->prev, struct gfs2_quota_data, qd_list); | 1334 | qd = list_entry(head->prev, struct gfs2_quota_data, qd_list); |
1332 | 1335 | ||
1333 | /* | ||
1334 | * To be removed in due course... we should be able to | ||
1335 | * ensure that all refs to the qd have done by this point | ||
1336 | * so that this rather odd test is not required | ||
1337 | */ | ||
1338 | spin_lock(&qd->qd_lockref.lock); | ||
1339 | if (qd->qd_lockref.count > 1 || | ||
1340 | (qd->qd_lockref.count && !test_bit(QDF_CHANGE, &qd->qd_flags))) { | ||
1341 | spin_unlock(&qd->qd_lockref.lock); | ||
1342 | list_move(&qd->qd_list, head); | ||
1343 | spin_unlock(&qd_lock); | ||
1344 | schedule(); | ||
1345 | spin_lock(&qd_lock); | ||
1346 | continue; | ||
1347 | } | ||
1348 | spin_unlock(&qd->qd_lockref.lock); | ||
1349 | |||
1350 | list_del(&qd->qd_list); | 1336 | list_del(&qd->qd_list); |
1337 | |||
1351 | /* Also remove if this qd exists in the reclaim list */ | 1338 | /* Also remove if this qd exists in the reclaim list */ |
1352 | list_lru_del(&gfs2_qd_lru, &qd->qd_lru); | 1339 | list_lru_del(&gfs2_qd_lru, &qd->qd_lru); |
1353 | atomic_dec(&sdp->sd_quota_count); | 1340 | atomic_dec(&sdp->sd_quota_count); |
1354 | spin_unlock(&qd_lock); | 1341 | spin_unlock(&qd_lock); |
1355 | 1342 | ||
1356 | if (!qd->qd_lockref.count) { | 1343 | spin_lock_bucket(qd->qd_hash); |
1357 | gfs2_assert_warn(sdp, !qd->qd_change); | 1344 | hlist_bl_del_rcu(&qd->qd_hlist); |
1358 | gfs2_assert_warn(sdp, !qd->qd_slot_count); | 1345 | spin_unlock_bucket(qd->qd_hash); |
1359 | } else | 1346 | |
1360 | gfs2_assert_warn(sdp, qd->qd_slot_count == 1); | 1347 | gfs2_assert_warn(sdp, !qd->qd_change); |
1348 | gfs2_assert_warn(sdp, !qd->qd_slot_count); | ||
1361 | gfs2_assert_warn(sdp, !qd->qd_bh_count); | 1349 | gfs2_assert_warn(sdp, !qd->qd_bh_count); |
1362 | 1350 | ||
1363 | gfs2_glock_put(qd->qd_gl); | 1351 | gfs2_glock_put(qd->qd_gl); |
1364 | kmem_cache_free(gfs2_quotad_cachep, qd); | 1352 | call_rcu(&qd->qd_rcu, gfs2_qd_dealloc); |
1365 | 1353 | ||
1366 | spin_lock(&qd_lock); | 1354 | spin_lock(&qd_lock); |
1367 | } | 1355 | } |
@@ -1370,9 +1358,11 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp) | |||
1370 | gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count)); | 1358 | gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count)); |
1371 | 1359 | ||
1372 | if (sdp->sd_quota_bitmap) { | 1360 | if (sdp->sd_quota_bitmap) { |
1373 | for (x = 0; x < sdp->sd_quota_chunks; x++) | 1361 | if (is_vmalloc_addr(sdp->sd_quota_bitmap)) |
1374 | kfree(sdp->sd_quota_bitmap[x]); | 1362 | vfree(sdp->sd_quota_bitmap); |
1375 | kfree(sdp->sd_quota_bitmap); | 1363 | else |
1364 | kfree(sdp->sd_quota_bitmap); | ||
1365 | sdp->sd_quota_bitmap = NULL; | ||
1376 | } | 1366 | } |
1377 | } | 1367 | } |
1378 | 1368 | ||
@@ -1656,3 +1646,11 @@ const struct quotactl_ops gfs2_quotactl_ops = { | |||
1656 | .get_dqblk = gfs2_get_dqblk, | 1646 | .get_dqblk = gfs2_get_dqblk, |
1657 | .set_dqblk = gfs2_set_dqblk, | 1647 | .set_dqblk = gfs2_set_dqblk, |
1658 | }; | 1648 | }; |
1649 | |||
1650 | void __init gfs2_quota_hash_init(void) | ||
1651 | { | ||
1652 | unsigned i; | ||
1653 | |||
1654 | for(i = 0; i < GFS2_QD_HASH_SIZE; i++) | ||
1655 | INIT_HLIST_BL_HEAD(&qd_hash_table[i]); | ||
1656 | } | ||
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 96e4f34a03b0..55d506eb3c4a 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h | |||
@@ -57,5 +57,6 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) | |||
57 | extern const struct quotactl_ops gfs2_quotactl_ops; | 57 | extern const struct quotactl_ops gfs2_quotactl_ops; |
58 | extern struct shrinker gfs2_qd_shrinker; | 58 | extern struct shrinker gfs2_qd_shrinker; |
59 | extern struct list_lru gfs2_qd_lru; | 59 | extern struct list_lru gfs2_qd_lru; |
60 | extern void __init gfs2_quota_hash_init(void); | ||
60 | 61 | ||
61 | #endif /* __QUOTA_DOT_H__ */ | 62 | #endif /* __QUOTA_DOT_H__ */ |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c8d6161bd682..a1da21349235 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -57,6 +57,11 @@ | |||
57 | * 3 = Used (metadata) | 57 | * 3 = Used (metadata) |
58 | */ | 58 | */ |
59 | 59 | ||
60 | struct gfs2_extent { | ||
61 | struct gfs2_rbm rbm; | ||
62 | u32 len; | ||
63 | }; | ||
64 | |||
60 | static const char valid_change[16] = { | 65 | static const char valid_change[16] = { |
61 | /* current */ | 66 | /* current */ |
62 | /* n */ 0, 1, 1, 1, | 67 | /* n */ 0, 1, 1, 1, |
@@ -65,8 +70,9 @@ static const char valid_change[16] = { | |||
65 | 1, 0, 0, 0 | 70 | 1, 0, 0, 0 |
66 | }; | 71 | }; |
67 | 72 | ||
68 | static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, | 73 | static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, |
69 | const struct gfs2_inode *ip, bool nowrap); | 74 | const struct gfs2_inode *ip, bool nowrap, |
75 | const struct gfs2_alloc_parms *ap); | ||
70 | 76 | ||
71 | 77 | ||
72 | /** | 78 | /** |
@@ -635,9 +641,13 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) | |||
635 | /* return reserved blocks to the rgrp */ | 641 | /* return reserved blocks to the rgrp */ |
636 | BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); | 642 | BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); |
637 | rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; | 643 | rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; |
644 | /* The rgrp extent failure point is likely not to increase; | ||
645 | it will only do so if the freed blocks are somehow | ||
646 | contiguous with a span of free blocks that follows. Still, | ||
647 | it will force the number to be recalculated later. */ | ||
648 | rgd->rd_extfail_pt += rs->rs_free; | ||
638 | rs->rs_free = 0; | 649 | rs->rs_free = 0; |
639 | clear_bit(GBF_FULL, &bi->bi_flags); | 650 | clear_bit(GBF_FULL, &bi->bi_flags); |
640 | smp_mb__after_clear_bit(); | ||
641 | } | 651 | } |
642 | } | 652 | } |
643 | 653 | ||
@@ -876,6 +886,7 @@ static int rgd_insert(struct gfs2_rgrpd *rgd) | |||
876 | static int read_rindex_entry(struct gfs2_inode *ip) | 886 | static int read_rindex_entry(struct gfs2_inode *ip) |
877 | { | 887 | { |
878 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 888 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
889 | const unsigned bsize = sdp->sd_sb.sb_bsize; | ||
879 | loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); | 890 | loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); |
880 | struct gfs2_rindex buf; | 891 | struct gfs2_rindex buf; |
881 | int error; | 892 | int error; |
@@ -913,6 +924,8 @@ static int read_rindex_entry(struct gfs2_inode *ip) | |||
913 | goto fail; | 924 | goto fail; |
914 | 925 | ||
915 | rgd->rd_gl->gl_object = rgd; | 926 | rgd->rd_gl->gl_object = rgd; |
927 | rgd->rd_gl->gl_vm.start = rgd->rd_addr * bsize; | ||
928 | rgd->rd_gl->gl_vm.end = rgd->rd_gl->gl_vm.start + (rgd->rd_length * bsize) - 1; | ||
916 | rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; | 929 | rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; |
917 | rgd->rd_flags &= ~GFS2_RDF_UPTODATE; | 930 | rgd->rd_flags &= ~GFS2_RDF_UPTODATE; |
918 | if (rgd->rd_data > sdp->sd_max_rg_data) | 931 | if (rgd->rd_data > sdp->sd_max_rg_data) |
@@ -1126,6 +1139,8 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) | |||
1126 | gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); | 1139 | gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); |
1127 | rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); | 1140 | rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); |
1128 | rgd->rd_free_clone = rgd->rd_free; | 1141 | rgd->rd_free_clone = rgd->rd_free; |
1142 | /* max out the rgrp allocation failure point */ | ||
1143 | rgd->rd_extfail_pt = rgd->rd_free; | ||
1129 | } | 1144 | } |
1130 | if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { | 1145 | if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { |
1131 | rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); | 1146 | rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); |
@@ -1184,7 +1199,7 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh) | |||
1184 | 1199 | ||
1185 | if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb) | 1200 | if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb) |
1186 | return 0; | 1201 | return 0; |
1187 | return gfs2_rgrp_bh_get((struct gfs2_rgrpd *)gh->gh_gl->gl_object); | 1202 | return gfs2_rgrp_bh_get(rgd); |
1188 | } | 1203 | } |
1189 | 1204 | ||
1190 | /** | 1205 | /** |
@@ -1455,7 +1470,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, | |||
1455 | if (WARN_ON(gfs2_rbm_from_block(&rbm, goal))) | 1470 | if (WARN_ON(gfs2_rbm_from_block(&rbm, goal))) |
1456 | return; | 1471 | return; |
1457 | 1472 | ||
1458 | ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, extlen, ip, true); | 1473 | ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true, ap); |
1459 | if (ret == 0) { | 1474 | if (ret == 0) { |
1460 | rs->rs_rbm = rbm; | 1475 | rs->rs_rbm = rbm; |
1461 | rs->rs_free = extlen; | 1476 | rs->rs_free = extlen; |
@@ -1520,6 +1535,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, | |||
1520 | * @rbm: The current position in the resource group | 1535 | * @rbm: The current position in the resource group |
1521 | * @ip: The inode for which we are searching for blocks | 1536 | * @ip: The inode for which we are searching for blocks |
1522 | * @minext: The minimum extent length | 1537 | * @minext: The minimum extent length |
1538 | * @maxext: A pointer to the maximum extent structure | ||
1523 | * | 1539 | * |
1524 | * This checks the current position in the rgrp to see whether there is | 1540 | * This checks the current position in the rgrp to see whether there is |
1525 | * a reservation covering this block. If not then this function is a | 1541 | * a reservation covering this block. If not then this function is a |
@@ -1532,7 +1548,8 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, | |||
1532 | 1548 | ||
1533 | static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, | 1549 | static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, |
1534 | const struct gfs2_inode *ip, | 1550 | const struct gfs2_inode *ip, |
1535 | u32 minext) | 1551 | u32 minext, |
1552 | struct gfs2_extent *maxext) | ||
1536 | { | 1553 | { |
1537 | u64 block = gfs2_rbm_to_block(rbm); | 1554 | u64 block = gfs2_rbm_to_block(rbm); |
1538 | u32 extlen = 1; | 1555 | u32 extlen = 1; |
@@ -1545,8 +1562,7 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, | |||
1545 | */ | 1562 | */ |
1546 | if (minext) { | 1563 | if (minext) { |
1547 | extlen = gfs2_free_extlen(rbm, minext); | 1564 | extlen = gfs2_free_extlen(rbm, minext); |
1548 | nblock = block + extlen; | 1565 | if (extlen <= maxext->len) |
1549 | if (extlen < minext) | ||
1550 | goto fail; | 1566 | goto fail; |
1551 | } | 1567 | } |
1552 | 1568 | ||
@@ -1555,9 +1571,17 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, | |||
1555 | * and skip if parts of it are already reserved | 1571 | * and skip if parts of it are already reserved |
1556 | */ | 1572 | */ |
1557 | nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip); | 1573 | nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip); |
1558 | if (nblock == block) | 1574 | if (nblock == block) { |
1559 | return 0; | 1575 | if (!minext || extlen >= minext) |
1576 | return 0; | ||
1577 | |||
1578 | if (extlen > maxext->len) { | ||
1579 | maxext->len = extlen; | ||
1580 | maxext->rbm = *rbm; | ||
1581 | } | ||
1560 | fail: | 1582 | fail: |
1583 | nblock = block + extlen; | ||
1584 | } | ||
1561 | ret = gfs2_rbm_from_block(rbm, nblock); | 1585 | ret = gfs2_rbm_from_block(rbm, nblock); |
1562 | if (ret < 0) | 1586 | if (ret < 0) |
1563 | return ret; | 1587 | return ret; |
@@ -1568,30 +1592,38 @@ fail: | |||
1568 | * gfs2_rbm_find - Look for blocks of a particular state | 1592 | * gfs2_rbm_find - Look for blocks of a particular state |
1569 | * @rbm: Value/result starting position and final position | 1593 | * @rbm: Value/result starting position and final position |
1570 | * @state: The state which we want to find | 1594 | * @state: The state which we want to find |
1571 | * @minext: The requested extent length (0 for a single block) | 1595 | * @minext: Pointer to the requested extent length (NULL for a single block) |
1596 | * This is updated to be the actual reservation size. | ||
1572 | * @ip: If set, check for reservations | 1597 | * @ip: If set, check for reservations |
1573 | * @nowrap: Stop looking at the end of the rgrp, rather than wrapping | 1598 | * @nowrap: Stop looking at the end of the rgrp, rather than wrapping |
1574 | * around until we've reached the starting point. | 1599 | * around until we've reached the starting point. |
1600 | * @ap: the allocation parameters | ||
1575 | * | 1601 | * |
1576 | * Side effects: | 1602 | * Side effects: |
1577 | * - If looking for free blocks, we set GBF_FULL on each bitmap which | 1603 | * - If looking for free blocks, we set GBF_FULL on each bitmap which |
1578 | * has no free blocks in it. | 1604 | * has no free blocks in it. |
1605 | * - If looking for free blocks, we set rd_extfail_pt on each rgrp which | ||
1606 | * has come up short on a free block search. | ||
1579 | * | 1607 | * |
1580 | * Returns: 0 on success, -ENOSPC if there is no block of the requested state | 1608 | * Returns: 0 on success, -ENOSPC if there is no block of the requested state |
1581 | */ | 1609 | */ |
1582 | 1610 | ||
1583 | static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, | 1611 | static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, |
1584 | const struct gfs2_inode *ip, bool nowrap) | 1612 | const struct gfs2_inode *ip, bool nowrap, |
1613 | const struct gfs2_alloc_parms *ap) | ||
1585 | { | 1614 | { |
1586 | struct buffer_head *bh; | 1615 | struct buffer_head *bh; |
1587 | int initial_bii; | 1616 | int initial_bii; |
1588 | u32 initial_offset; | 1617 | u32 initial_offset; |
1618 | int first_bii = rbm->bii; | ||
1619 | u32 first_offset = rbm->offset; | ||
1589 | u32 offset; | 1620 | u32 offset; |
1590 | u8 *buffer; | 1621 | u8 *buffer; |
1591 | int n = 0; | 1622 | int n = 0; |
1592 | int iters = rbm->rgd->rd_length; | 1623 | int iters = rbm->rgd->rd_length; |
1593 | int ret; | 1624 | int ret; |
1594 | struct gfs2_bitmap *bi; | 1625 | struct gfs2_bitmap *bi; |
1626 | struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, }; | ||
1595 | 1627 | ||
1596 | /* If we are not starting at the beginning of a bitmap, then we | 1628 | /* If we are not starting at the beginning of a bitmap, then we |
1597 | * need to add one to the bitmap count to ensure that we search | 1629 | * need to add one to the bitmap count to ensure that we search |
@@ -1620,7 +1652,9 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, | |||
1620 | return 0; | 1652 | return 0; |
1621 | 1653 | ||
1622 | initial_bii = rbm->bii; | 1654 | initial_bii = rbm->bii; |
1623 | ret = gfs2_reservation_check_and_update(rbm, ip, minext); | 1655 | ret = gfs2_reservation_check_and_update(rbm, ip, |
1656 | minext ? *minext : 0, | ||
1657 | &maxext); | ||
1624 | if (ret == 0) | 1658 | if (ret == 0) |
1625 | return 0; | 1659 | return 0; |
1626 | if (ret > 0) { | 1660 | if (ret > 0) { |
@@ -1655,6 +1689,24 @@ next_iter: | |||
1655 | break; | 1689 | break; |
1656 | } | 1690 | } |
1657 | 1691 | ||
1692 | if (minext == NULL || state != GFS2_BLKST_FREE) | ||
1693 | return -ENOSPC; | ||
1694 | |||
1695 | /* If the extent was too small, and it's smaller than the smallest | ||
1696 | to have failed before, remember for future reference that it's | ||
1697 | useless to search this rgrp again for this amount or more. */ | ||
1698 | if ((first_offset == 0) && (first_bii == 0) && | ||
1699 | (*minext < rbm->rgd->rd_extfail_pt)) | ||
1700 | rbm->rgd->rd_extfail_pt = *minext; | ||
1701 | |||
1702 | /* If the maximum extent we found is big enough to fulfill the | ||
1703 | minimum requirements, use it anyway. */ | ||
1704 | if (maxext.len) { | ||
1705 | *rbm = maxext.rbm; | ||
1706 | *minext = maxext.len; | ||
1707 | return 0; | ||
1708 | } | ||
1709 | |||
1658 | return -ENOSPC; | 1710 | return -ENOSPC; |
1659 | } | 1711 | } |
1660 | 1712 | ||
@@ -1680,7 +1732,8 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip | |||
1680 | 1732 | ||
1681 | while (1) { | 1733 | while (1) { |
1682 | down_write(&sdp->sd_log_flush_lock); | 1734 | down_write(&sdp->sd_log_flush_lock); |
1683 | error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, 0, NULL, true); | 1735 | error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL, |
1736 | true, NULL); | ||
1684 | up_write(&sdp->sd_log_flush_lock); | 1737 | up_write(&sdp->sd_log_flush_lock); |
1685 | if (error == -ENOSPC) | 1738 | if (error == -ENOSPC) |
1686 | break; | 1739 | break; |
@@ -1891,7 +1944,9 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a | |||
1891 | } | 1944 | } |
1892 | 1945 | ||
1893 | /* Skip unuseable resource groups */ | 1946 | /* Skip unuseable resource groups */ |
1894 | if (rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) | 1947 | if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | |
1948 | GFS2_RDF_ERROR)) || | ||
1949 | (ap->target > rs->rs_rbm.rgd->rd_extfail_pt)) | ||
1895 | goto skip_rgrp; | 1950 | goto skip_rgrp; |
1896 | 1951 | ||
1897 | if (sdp->sd_args.ar_rgrplvb) | 1952 | if (sdp->sd_args.ar_rgrplvb) |
@@ -1911,15 +1966,16 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a | |||
1911 | return 0; | 1966 | return 0; |
1912 | } | 1967 | } |
1913 | 1968 | ||
1914 | /* Drop reservation, if we couldn't use reserved rgrp */ | ||
1915 | if (gfs2_rs_active(rs)) | ||
1916 | gfs2_rs_deltree(rs); | ||
1917 | check_rgrp: | 1969 | check_rgrp: |
1918 | /* Check for unlinked inodes which can be reclaimed */ | 1970 | /* Check for unlinked inodes which can be reclaimed */ |
1919 | if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) | 1971 | if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) |
1920 | try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, | 1972 | try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, |
1921 | ip->i_no_addr); | 1973 | ip->i_no_addr); |
1922 | skip_rgrp: | 1974 | skip_rgrp: |
1975 | /* Drop reservation, if we couldn't use reserved rgrp */ | ||
1976 | if (gfs2_rs_active(rs)) | ||
1977 | gfs2_rs_deltree(rs); | ||
1978 | |||
1923 | /* Unlock rgrp if required */ | 1979 | /* Unlock rgrp if required */ |
1924 | if (!rg_locked) | 1980 | if (!rg_locked) |
1925 | gfs2_glock_dq_uninit(&rs->rs_rgd_gh); | 1981 | gfs2_glock_dq_uninit(&rs->rs_rgd_gh); |
@@ -2064,25 +2120,24 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, | |||
2064 | * | 2120 | * |
2065 | */ | 2121 | */ |
2066 | 2122 | ||
2067 | int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) | 2123 | void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) |
2068 | { | 2124 | { |
2069 | struct gfs2_rgrpd *rgd = gl->gl_object; | 2125 | struct gfs2_rgrpd *rgd = gl->gl_object; |
2070 | struct gfs2_blkreserv *trs; | 2126 | struct gfs2_blkreserv *trs; |
2071 | const struct rb_node *n; | 2127 | const struct rb_node *n; |
2072 | 2128 | ||
2073 | if (rgd == NULL) | 2129 | if (rgd == NULL) |
2074 | return 0; | 2130 | return; |
2075 | gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n", | 2131 | gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n", |
2076 | (unsigned long long)rgd->rd_addr, rgd->rd_flags, | 2132 | (unsigned long long)rgd->rd_addr, rgd->rd_flags, |
2077 | rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, | 2133 | rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, |
2078 | rgd->rd_reserved); | 2134 | rgd->rd_reserved, rgd->rd_extfail_pt); |
2079 | spin_lock(&rgd->rd_rsspin); | 2135 | spin_lock(&rgd->rd_rsspin); |
2080 | for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { | 2136 | for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { |
2081 | trs = rb_entry(n, struct gfs2_blkreserv, rs_node); | 2137 | trs = rb_entry(n, struct gfs2_blkreserv, rs_node); |
2082 | dump_rs(seq, trs); | 2138 | dump_rs(seq, trs); |
2083 | } | 2139 | } |
2084 | spin_unlock(&rgd->rd_rsspin); | 2140 | spin_unlock(&rgd->rd_rsspin); |
2085 | return 0; | ||
2086 | } | 2141 | } |
2087 | 2142 | ||
2088 | static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) | 2143 | static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) |
@@ -2184,18 +2239,20 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, | |||
2184 | int error; | 2239 | int error; |
2185 | 2240 | ||
2186 | gfs2_set_alloc_start(&rbm, ip, dinode); | 2241 | gfs2_set_alloc_start(&rbm, ip, dinode); |
2187 | error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false); | 2242 | error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false, NULL); |
2188 | 2243 | ||
2189 | if (error == -ENOSPC) { | 2244 | if (error == -ENOSPC) { |
2190 | gfs2_set_alloc_start(&rbm, ip, dinode); | 2245 | gfs2_set_alloc_start(&rbm, ip, dinode); |
2191 | error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false); | 2246 | error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false, |
2247 | NULL); | ||
2192 | } | 2248 | } |
2193 | 2249 | ||
2194 | /* Since all blocks are reserved in advance, this shouldn't happen */ | 2250 | /* Since all blocks are reserved in advance, this shouldn't happen */ |
2195 | if (error) { | 2251 | if (error) { |
2196 | fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d\n", | 2252 | fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n", |
2197 | (unsigned long long)ip->i_no_addr, error, *nblocks, | 2253 | (unsigned long long)ip->i_no_addr, error, *nblocks, |
2198 | test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags)); | 2254 | test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags), |
2255 | rbm.rgd->rd_extfail_pt); | ||
2199 | goto rgrp_error; | 2256 | goto rgrp_error; |
2200 | } | 2257 | } |
2201 | 2258 | ||
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 3a10d2ffbbe7..463ab2e95d1c 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h | |||
@@ -68,7 +68,7 @@ extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, | |||
68 | extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state); | 68 | extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state); |
69 | extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); | 69 | extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); |
70 | extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); | 70 | extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); |
71 | extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl); | 71 | extern void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl); |
72 | extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, | 72 | extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, |
73 | struct buffer_head *bh, | 73 | struct buffer_head *bh, |
74 | const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); | 74 | const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); |
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 35da5b19c0de..60f60f6181f3 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -369,6 +369,33 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd) | |||
369 | return 0; | 369 | return 0; |
370 | } | 370 | } |
371 | 371 | ||
372 | static int init_threads(struct gfs2_sbd *sdp) | ||
373 | { | ||
374 | struct task_struct *p; | ||
375 | int error = 0; | ||
376 | |||
377 | p = kthread_run(gfs2_logd, sdp, "gfs2_logd"); | ||
378 | if (IS_ERR(p)) { | ||
379 | error = PTR_ERR(p); | ||
380 | fs_err(sdp, "can't start logd thread: %d\n", error); | ||
381 | return error; | ||
382 | } | ||
383 | sdp->sd_logd_process = p; | ||
384 | |||
385 | p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad"); | ||
386 | if (IS_ERR(p)) { | ||
387 | error = PTR_ERR(p); | ||
388 | fs_err(sdp, "can't start quotad thread: %d\n", error); | ||
389 | goto fail; | ||
390 | } | ||
391 | sdp->sd_quotad_process = p; | ||
392 | return 0; | ||
393 | |||
394 | fail: | ||
395 | kthread_stop(sdp->sd_logd_process); | ||
396 | return error; | ||
397 | } | ||
398 | |||
372 | /** | 399 | /** |
373 | * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one | 400 | * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one |
374 | * @sdp: the filesystem | 401 | * @sdp: the filesystem |
@@ -384,10 +411,14 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) | |||
384 | struct gfs2_log_header_host head; | 411 | struct gfs2_log_header_host head; |
385 | int error; | 412 | int error; |
386 | 413 | ||
387 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &t_gh); | 414 | error = init_threads(sdp); |
388 | if (error) | 415 | if (error) |
389 | return error; | 416 | return error; |
390 | 417 | ||
418 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &t_gh); | ||
419 | if (error) | ||
420 | goto fail_threads; | ||
421 | |||
391 | j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); | 422 | j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); |
392 | 423 | ||
393 | error = gfs2_find_jhead(sdp->sd_jdesc, &head); | 424 | error = gfs2_find_jhead(sdp->sd_jdesc, &head); |
@@ -417,7 +448,9 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) | |||
417 | fail: | 448 | fail: |
418 | t_gh.gh_flags |= GL_NOCACHE; | 449 | t_gh.gh_flags |= GL_NOCACHE; |
419 | gfs2_glock_dq_uninit(&t_gh); | 450 | gfs2_glock_dq_uninit(&t_gh); |
420 | 451 | fail_threads: | |
452 | kthread_stop(sdp->sd_quotad_process); | ||
453 | kthread_stop(sdp->sd_logd_process); | ||
421 | return error; | 454 | return error; |
422 | } | 455 | } |
423 | 456 | ||
@@ -800,6 +833,9 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) | |||
800 | struct gfs2_holder t_gh; | 833 | struct gfs2_holder t_gh; |
801 | int error; | 834 | int error; |
802 | 835 | ||
836 | kthread_stop(sdp->sd_quotad_process); | ||
837 | kthread_stop(sdp->sd_logd_process); | ||
838 | |||
803 | flush_workqueue(gfs2_delete_workqueue); | 839 | flush_workqueue(gfs2_delete_workqueue); |
804 | gfs2_quota_sync(sdp->sd_vfs, 0); | 840 | gfs2_quota_sync(sdp->sd_vfs, 0); |
805 | gfs2_statfs_sync(sdp->sd_vfs, 0); | 841 | gfs2_statfs_sync(sdp->sd_vfs, 0); |
@@ -857,9 +893,6 @@ restart: | |||
857 | } | 893 | } |
858 | spin_unlock(&sdp->sd_jindex_spin); | 894 | spin_unlock(&sdp->sd_jindex_spin); |
859 | 895 | ||
860 | kthread_stop(sdp->sd_quotad_process); | ||
861 | kthread_stop(sdp->sd_logd_process); | ||
862 | |||
863 | if (!(sb->s_flags & MS_RDONLY)) { | 896 | if (!(sb->s_flags & MS_RDONLY)) { |
864 | error = gfs2_make_fs_ro(sdp); | 897 | error = gfs2_make_fs_ro(sdp); |
865 | if (error) | 898 | if (error) |
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 2d04f9afafd7..06fe11e0abfa 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -573,7 +573,7 @@ int log_wait_commit(journal_t *journal, tid_t tid) | |||
573 | #ifdef CONFIG_JBD_DEBUG | 573 | #ifdef CONFIG_JBD_DEBUG |
574 | spin_lock(&journal->j_state_lock); | 574 | spin_lock(&journal->j_state_lock); |
575 | if (!tid_geq(journal->j_commit_request, tid)) { | 575 | if (!tid_geq(journal->j_commit_request, tid)) { |
576 | printk(KERN_EMERG | 576 | printk(KERN_ERR |
577 | "%s: error: j_commit_request=%d, tid=%d\n", | 577 | "%s: error: j_commit_request=%d, tid=%d\n", |
578 | __func__, journal->j_commit_request, tid); | 578 | __func__, journal->j_commit_request, tid); |
579 | } | 579 | } |
@@ -604,10 +604,8 @@ int log_wait_commit(journal_t *journal, tid_t tid) | |||
604 | out_unlock: | 604 | out_unlock: |
605 | spin_unlock(&journal->j_state_lock); | 605 | spin_unlock(&journal->j_state_lock); |
606 | 606 | ||
607 | if (unlikely(is_journal_aborted(journal))) { | 607 | if (unlikely(is_journal_aborted(journal))) |
608 | printk(KERN_EMERG "journal commit I/O error\n"); | ||
609 | err = -EIO; | 608 | err = -EIO; |
610 | } | ||
611 | return err; | 609 | return err; |
612 | } | 610 | } |
613 | 611 | ||
@@ -2136,7 +2134,7 @@ static void __exit journal_exit(void) | |||
2136 | #ifdef CONFIG_JBD_DEBUG | 2134 | #ifdef CONFIG_JBD_DEBUG |
2137 | int n = atomic_read(&nr_journal_heads); | 2135 | int n = atomic_read(&nr_journal_heads); |
2138 | if (n) | 2136 | if (n) |
2139 | printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); | 2137 | printk(KERN_ERR "JBD: leaked %d journal_heads!\n", n); |
2140 | #endif | 2138 | #endif |
2141 | jbd_remove_debugfs_entry(); | 2139 | jbd_remove_debugfs_entry(); |
2142 | journal_destroy_caches(); | 2140 | journal_destroy_caches(); |
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index aa603e017d22..1695ba8334a2 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -675,7 +675,7 @@ repeat: | |||
675 | jbd_alloc(jh2bh(jh)->b_size, | 675 | jbd_alloc(jh2bh(jh)->b_size, |
676 | GFP_NOFS); | 676 | GFP_NOFS); |
677 | if (!frozen_buffer) { | 677 | if (!frozen_buffer) { |
678 | printk(KERN_EMERG | 678 | printk(KERN_ERR |
679 | "%s: OOM for frozen_buffer\n", | 679 | "%s: OOM for frozen_buffer\n", |
680 | __func__); | 680 | __func__); |
681 | JBUFFER_TRACE(jh, "oom!"); | 681 | JBUFFER_TRACE(jh, "oom!"); |
@@ -898,7 +898,7 @@ repeat: | |||
898 | if (!jh->b_committed_data) { | 898 | if (!jh->b_committed_data) { |
899 | committed_data = jbd_alloc(jh2bh(jh)->b_size, GFP_NOFS); | 899 | committed_data = jbd_alloc(jh2bh(jh)->b_size, GFP_NOFS); |
900 | if (!committed_data) { | 900 | if (!committed_data) { |
901 | printk(KERN_EMERG "%s: No memory for committed data\n", | 901 | printk(KERN_ERR "%s: No memory for committed data\n", |
902 | __func__); | 902 | __func__); |
903 | err = -ENOMEM; | 903 | err = -ENOMEM; |
904 | goto out; | 904 | goto out; |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 52032647dd4a..5fa344afb49a 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -702,7 +702,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) | |||
702 | read_lock(&journal->j_state_lock); | 702 | read_lock(&journal->j_state_lock); |
703 | #ifdef CONFIG_JBD2_DEBUG | 703 | #ifdef CONFIG_JBD2_DEBUG |
704 | if (!tid_geq(journal->j_commit_request, tid)) { | 704 | if (!tid_geq(journal->j_commit_request, tid)) { |
705 | printk(KERN_EMERG | 705 | printk(KERN_ERR |
706 | "%s: error: j_commit_request=%d, tid=%d\n", | 706 | "%s: error: j_commit_request=%d, tid=%d\n", |
707 | __func__, journal->j_commit_request, tid); | 707 | __func__, journal->j_commit_request, tid); |
708 | } | 708 | } |
@@ -718,10 +718,8 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) | |||
718 | } | 718 | } |
719 | read_unlock(&journal->j_state_lock); | 719 | read_unlock(&journal->j_state_lock); |
720 | 720 | ||
721 | if (unlikely(is_journal_aborted(journal))) { | 721 | if (unlikely(is_journal_aborted(journal))) |
722 | printk(KERN_EMERG "journal commit I/O error\n"); | ||
723 | err = -EIO; | 722 | err = -EIO; |
724 | } | ||
725 | return err; | 723 | return err; |
726 | } | 724 | } |
727 | 725 | ||
@@ -1527,13 +1525,13 @@ static int journal_get_superblock(journal_t *journal) | |||
1527 | if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && | 1525 | if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && |
1528 | JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { | 1526 | JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { |
1529 | /* Can't have checksum v1 and v2 on at the same time! */ | 1527 | /* Can't have checksum v1 and v2 on at the same time! */ |
1530 | printk(KERN_ERR "JBD: Can't enable checksumming v1 and v2 " | 1528 | printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 " |
1531 | "at the same time!\n"); | 1529 | "at the same time!\n"); |
1532 | goto out; | 1530 | goto out; |
1533 | } | 1531 | } |
1534 | 1532 | ||
1535 | if (!jbd2_verify_csum_type(journal, sb)) { | 1533 | if (!jbd2_verify_csum_type(journal, sb)) { |
1536 | printk(KERN_ERR "JBD: Unknown checksum type\n"); | 1534 | printk(KERN_ERR "JBD2: Unknown checksum type\n"); |
1537 | goto out; | 1535 | goto out; |
1538 | } | 1536 | } |
1539 | 1537 | ||
@@ -1541,7 +1539,7 @@ static int journal_get_superblock(journal_t *journal) | |||
1541 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { | 1539 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { |
1542 | journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); | 1540 | journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); |
1543 | if (IS_ERR(journal->j_chksum_driver)) { | 1541 | if (IS_ERR(journal->j_chksum_driver)) { |
1544 | printk(KERN_ERR "JBD: Cannot load crc32c driver.\n"); | 1542 | printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); |
1545 | err = PTR_ERR(journal->j_chksum_driver); | 1543 | err = PTR_ERR(journal->j_chksum_driver); |
1546 | journal->j_chksum_driver = NULL; | 1544 | journal->j_chksum_driver = NULL; |
1547 | goto out; | 1545 | goto out; |
@@ -1550,7 +1548,7 @@ static int journal_get_superblock(journal_t *journal) | |||
1550 | 1548 | ||
1551 | /* Check superblock checksum */ | 1549 | /* Check superblock checksum */ |
1552 | if (!jbd2_superblock_csum_verify(journal, sb)) { | 1550 | if (!jbd2_superblock_csum_verify(journal, sb)) { |
1553 | printk(KERN_ERR "JBD: journal checksum error\n"); | 1551 | printk(KERN_ERR "JBD2: journal checksum error\n"); |
1554 | goto out; | 1552 | goto out; |
1555 | } | 1553 | } |
1556 | 1554 | ||
@@ -1836,7 +1834,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, | |||
1836 | journal->j_chksum_driver = crypto_alloc_shash("crc32c", | 1834 | journal->j_chksum_driver = crypto_alloc_shash("crc32c", |
1837 | 0, 0); | 1835 | 0, 0); |
1838 | if (IS_ERR(journal->j_chksum_driver)) { | 1836 | if (IS_ERR(journal->j_chksum_driver)) { |
1839 | printk(KERN_ERR "JBD: Cannot load crc32c " | 1837 | printk(KERN_ERR "JBD2: Cannot load crc32c " |
1840 | "driver.\n"); | 1838 | "driver.\n"); |
1841 | journal->j_chksum_driver = NULL; | 1839 | journal->j_chksum_driver = NULL; |
1842 | return 0; | 1840 | return 0; |
@@ -2645,7 +2643,7 @@ static void __exit journal_exit(void) | |||
2645 | #ifdef CONFIG_JBD2_DEBUG | 2643 | #ifdef CONFIG_JBD2_DEBUG |
2646 | int n = atomic_read(&nr_journal_heads); | 2644 | int n = atomic_read(&nr_journal_heads); |
2647 | if (n) | 2645 | if (n) |
2648 | printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n); | 2646 | printk(KERN_ERR "JBD2: leaked %d journal_heads!\n", n); |
2649 | #endif | 2647 | #endif |
2650 | jbd2_remove_jbd_stats_proc_entry(); | 2648 | jbd2_remove_jbd_stats_proc_entry(); |
2651 | jbd2_journal_destroy_caches(); | 2649 | jbd2_journal_destroy_caches(); |
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 3929c50428b1..3b6bb19d60b1 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
@@ -594,7 +594,7 @@ static int do_one_pass(journal_t *journal, | |||
594 | be32_to_cpu(tmp->h_sequence))) { | 594 | be32_to_cpu(tmp->h_sequence))) { |
595 | brelse(obh); | 595 | brelse(obh); |
596 | success = -EIO; | 596 | success = -EIO; |
597 | printk(KERN_ERR "JBD: Invalid " | 597 | printk(KERN_ERR "JBD2: Invalid " |
598 | "checksum recovering " | 598 | "checksum recovering " |
599 | "block %llu in log\n", | 599 | "block %llu in log\n", |
600 | blocknr); | 600 | blocknr); |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 7aa9a32573bb..8360674c85bc 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -932,7 +932,7 @@ repeat: | |||
932 | jbd2_alloc(jh2bh(jh)->b_size, | 932 | jbd2_alloc(jh2bh(jh)->b_size, |
933 | GFP_NOFS); | 933 | GFP_NOFS); |
934 | if (!frozen_buffer) { | 934 | if (!frozen_buffer) { |
935 | printk(KERN_EMERG | 935 | printk(KERN_ERR |
936 | "%s: OOM for frozen_buffer\n", | 936 | "%s: OOM for frozen_buffer\n", |
937 | __func__); | 937 | __func__); |
938 | JBUFFER_TRACE(jh, "oom!"); | 938 | JBUFFER_TRACE(jh, "oom!"); |
@@ -1166,7 +1166,7 @@ repeat: | |||
1166 | if (!jh->b_committed_data) { | 1166 | if (!jh->b_committed_data) { |
1167 | committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); | 1167 | committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); |
1168 | if (!committed_data) { | 1168 | if (!committed_data) { |
1169 | printk(KERN_EMERG "%s: No memory for committed data\n", | 1169 | printk(KERN_ERR "%s: No memory for committed data\n", |
1170 | __func__); | 1170 | __func__); |
1171 | err = -ENOMEM; | 1171 | err = -ENOMEM; |
1172 | goto out; | 1172 | goto out; |
@@ -1290,7 +1290,10 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
1290 | * once a transaction -bzzz | 1290 | * once a transaction -bzzz |
1291 | */ | 1291 | */ |
1292 | jh->b_modified = 1; | 1292 | jh->b_modified = 1; |
1293 | J_ASSERT_JH(jh, handle->h_buffer_credits > 0); | 1293 | if (handle->h_buffer_credits <= 0) { |
1294 | ret = -ENOSPC; | ||
1295 | goto out_unlock_bh; | ||
1296 | } | ||
1294 | handle->h_buffer_credits--; | 1297 | handle->h_buffer_credits--; |
1295 | } | 1298 | } |
1296 | 1299 | ||
@@ -1305,7 +1308,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
1305 | JBUFFER_TRACE(jh, "fastpath"); | 1308 | JBUFFER_TRACE(jh, "fastpath"); |
1306 | if (unlikely(jh->b_transaction != | 1309 | if (unlikely(jh->b_transaction != |
1307 | journal->j_running_transaction)) { | 1310 | journal->j_running_transaction)) { |
1308 | printk(KERN_EMERG "JBD: %s: " | 1311 | printk(KERN_ERR "JBD2: %s: " |
1309 | "jh->b_transaction (%llu, %p, %u) != " | 1312 | "jh->b_transaction (%llu, %p, %u) != " |
1310 | "journal->j_running_transaction (%p, %u)", | 1313 | "journal->j_running_transaction (%p, %u)", |
1311 | journal->j_devname, | 1314 | journal->j_devname, |
@@ -1332,7 +1335,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
1332 | JBUFFER_TRACE(jh, "already on other transaction"); | 1335 | JBUFFER_TRACE(jh, "already on other transaction"); |
1333 | if (unlikely(jh->b_transaction != | 1336 | if (unlikely(jh->b_transaction != |
1334 | journal->j_committing_transaction)) { | 1337 | journal->j_committing_transaction)) { |
1335 | printk(KERN_EMERG "JBD: %s: " | 1338 | printk(KERN_ERR "JBD2: %s: " |
1336 | "jh->b_transaction (%llu, %p, %u) != " | 1339 | "jh->b_transaction (%llu, %p, %u) != " |
1337 | "journal->j_committing_transaction (%p, %u)", | 1340 | "journal->j_committing_transaction (%p, %u)", |
1338 | journal->j_devname, | 1341 | journal->j_devname, |
@@ -1345,7 +1348,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
1345 | ret = -EINVAL; | 1348 | ret = -EINVAL; |
1346 | } | 1349 | } |
1347 | if (unlikely(jh->b_next_transaction != transaction)) { | 1350 | if (unlikely(jh->b_next_transaction != transaction)) { |
1348 | printk(KERN_EMERG "JBD: %s: " | 1351 | printk(KERN_ERR "JBD2: %s: " |
1349 | "jh->b_next_transaction (%llu, %p, %u) != " | 1352 | "jh->b_next_transaction (%llu, %p, %u) != " |
1350 | "transaction (%p, %u)", | 1353 | "transaction (%p, %u)", |
1351 | journal->j_devname, | 1354 | journal->j_devname, |
@@ -1373,7 +1376,6 @@ out_unlock_bh: | |||
1373 | jbd2_journal_put_journal_head(jh); | 1376 | jbd2_journal_put_journal_head(jh); |
1374 | out: | 1377 | out: |
1375 | JBUFFER_TRACE(jh, "exit"); | 1378 | JBUFFER_TRACE(jh, "exit"); |
1376 | WARN_ON(ret); /* All errors are bugs, so dump the stack */ | ||
1377 | return ret; | 1379 | return ret; |
1378 | } | 1380 | } |
1379 | 1381 | ||
diff --git a/fs/kernfs/Makefile b/fs/kernfs/Makefile new file mode 100644 index 000000000000..674337c76673 --- /dev/null +++ b/fs/kernfs/Makefile | |||
@@ -0,0 +1,5 @@ | |||
1 | # | ||
2 | # Makefile for the kernfs pseudo filesystem | ||
3 | # | ||
4 | |||
5 | obj-y := mount.o inode.o dir.o file.o symlink.o | ||
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c new file mode 100644 index 000000000000..5104cf5d25c5 --- /dev/null +++ b/fs/kernfs/dir.c | |||
@@ -0,0 +1,1073 @@ | |||
1 | /* | ||
2 | * fs/kernfs/dir.c - kernfs directory implementation | ||
3 | * | ||
4 | * Copyright (c) 2001-3 Patrick Mochel | ||
5 | * Copyright (c) 2007 SUSE Linux Products GmbH | ||
6 | * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> | ||
7 | * | ||
8 | * This file is released under the GPLv2. | ||
9 | */ | ||
10 | |||
11 | #include <linux/fs.h> | ||
12 | #include <linux/namei.h> | ||
13 | #include <linux/idr.h> | ||
14 | #include <linux/slab.h> | ||
15 | #include <linux/security.h> | ||
16 | #include <linux/hash.h> | ||
17 | |||
18 | #include "kernfs-internal.h" | ||
19 | |||
20 | DEFINE_MUTEX(kernfs_mutex); | ||
21 | |||
22 | #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) | ||
23 | |||
24 | /** | ||
25 | * kernfs_name_hash | ||
26 | * @name: Null terminated string to hash | ||
27 | * @ns: Namespace tag to hash | ||
28 | * | ||
29 | * Returns 31 bit hash of ns + name (so it fits in an off_t ) | ||
30 | */ | ||
31 | static unsigned int kernfs_name_hash(const char *name, const void *ns) | ||
32 | { | ||
33 | unsigned long hash = init_name_hash(); | ||
34 | unsigned int len = strlen(name); | ||
35 | while (len--) | ||
36 | hash = partial_name_hash(*name++, hash); | ||
37 | hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31)); | ||
38 | hash &= 0x7fffffffU; | ||
39 | /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ | ||
40 | if (hash < 1) | ||
41 | hash += 2; | ||
42 | if (hash >= INT_MAX) | ||
43 | hash = INT_MAX - 1; | ||
44 | return hash; | ||
45 | } | ||
46 | |||
47 | static int kernfs_name_compare(unsigned int hash, const char *name, | ||
48 | const void *ns, const struct kernfs_node *kn) | ||
49 | { | ||
50 | if (hash != kn->hash) | ||
51 | return hash - kn->hash; | ||
52 | if (ns != kn->ns) | ||
53 | return ns - kn->ns; | ||
54 | return strcmp(name, kn->name); | ||
55 | } | ||
56 | |||
57 | static int kernfs_sd_compare(const struct kernfs_node *left, | ||
58 | const struct kernfs_node *right) | ||
59 | { | ||
60 | return kernfs_name_compare(left->hash, left->name, left->ns, right); | ||
61 | } | ||
62 | |||
63 | /** | ||
64 | * kernfs_link_sibling - link kernfs_node into sibling rbtree | ||
65 | * @kn: kernfs_node of interest | ||
66 | * | ||
67 | * Link @kn into its sibling rbtree which starts from | ||
68 | * @kn->parent->dir.children. | ||
69 | * | ||
70 | * Locking: | ||
71 | * mutex_lock(kernfs_mutex) | ||
72 | * | ||
73 | * RETURNS: | ||
74 | * 0 on susccess -EEXIST on failure. | ||
75 | */ | ||
76 | static int kernfs_link_sibling(struct kernfs_node *kn) | ||
77 | { | ||
78 | struct rb_node **node = &kn->parent->dir.children.rb_node; | ||
79 | struct rb_node *parent = NULL; | ||
80 | |||
81 | if (kernfs_type(kn) == KERNFS_DIR) | ||
82 | kn->parent->dir.subdirs++; | ||
83 | |||
84 | while (*node) { | ||
85 | struct kernfs_node *pos; | ||
86 | int result; | ||
87 | |||
88 | pos = rb_to_kn(*node); | ||
89 | parent = *node; | ||
90 | result = kernfs_sd_compare(kn, pos); | ||
91 | if (result < 0) | ||
92 | node = &pos->rb.rb_left; | ||
93 | else if (result > 0) | ||
94 | node = &pos->rb.rb_right; | ||
95 | else | ||
96 | return -EEXIST; | ||
97 | } | ||
98 | /* add new node and rebalance the tree */ | ||
99 | rb_link_node(&kn->rb, parent, node); | ||
100 | rb_insert_color(&kn->rb, &kn->parent->dir.children); | ||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | /** | ||
105 | * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree | ||
106 | * @kn: kernfs_node of interest | ||
107 | * | ||
108 | * Unlink @kn from its sibling rbtree which starts from | ||
109 | * kn->parent->dir.children. | ||
110 | * | ||
111 | * Locking: | ||
112 | * mutex_lock(kernfs_mutex) | ||
113 | */ | ||
114 | static void kernfs_unlink_sibling(struct kernfs_node *kn) | ||
115 | { | ||
116 | if (kernfs_type(kn) == KERNFS_DIR) | ||
117 | kn->parent->dir.subdirs--; | ||
118 | |||
119 | rb_erase(&kn->rb, &kn->parent->dir.children); | ||
120 | } | ||
121 | |||
122 | /** | ||
123 | * kernfs_get_active - get an active reference to kernfs_node | ||
124 | * @kn: kernfs_node to get an active reference to | ||
125 | * | ||
126 | * Get an active reference of @kn. This function is noop if @kn | ||
127 | * is NULL. | ||
128 | * | ||
129 | * RETURNS: | ||
130 | * Pointer to @kn on success, NULL on failure. | ||
131 | */ | ||
132 | struct kernfs_node *kernfs_get_active(struct kernfs_node *kn) | ||
133 | { | ||
134 | if (unlikely(!kn)) | ||
135 | return NULL; | ||
136 | |||
137 | if (!atomic_inc_unless_negative(&kn->active)) | ||
138 | return NULL; | ||
139 | |||
140 | if (kn->flags & KERNFS_LOCKDEP) | ||
141 | rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_); | ||
142 | return kn; | ||
143 | } | ||
144 | |||
145 | /** | ||
146 | * kernfs_put_active - put an active reference to kernfs_node | ||
147 | * @kn: kernfs_node to put an active reference to | ||
148 | * | ||
149 | * Put an active reference to @kn. This function is noop if @kn | ||
150 | * is NULL. | ||
151 | */ | ||
152 | void kernfs_put_active(struct kernfs_node *kn) | ||
153 | { | ||
154 | int v; | ||
155 | |||
156 | if (unlikely(!kn)) | ||
157 | return; | ||
158 | |||
159 | if (kn->flags & KERNFS_LOCKDEP) | ||
160 | rwsem_release(&kn->dep_map, 1, _RET_IP_); | ||
161 | v = atomic_dec_return(&kn->active); | ||
162 | if (likely(v != KN_DEACTIVATED_BIAS)) | ||
163 | return; | ||
164 | |||
165 | /* | ||
166 | * atomic_dec_return() is a mb(), we'll always see the updated | ||
167 | * kn->u.completion. | ||
168 | */ | ||
169 | complete(kn->u.completion); | ||
170 | } | ||
171 | |||
172 | /** | ||
173 | * kernfs_deactivate - deactivate kernfs_node | ||
174 | * @kn: kernfs_node to deactivate | ||
175 | * | ||
176 | * Deny new active references and drain existing ones. | ||
177 | */ | ||
178 | static void kernfs_deactivate(struct kernfs_node *kn) | ||
179 | { | ||
180 | DECLARE_COMPLETION_ONSTACK(wait); | ||
181 | int v; | ||
182 | |||
183 | BUG_ON(!(kn->flags & KERNFS_REMOVED)); | ||
184 | |||
185 | if (!(kernfs_type(kn) & KERNFS_ACTIVE_REF)) | ||
186 | return; | ||
187 | |||
188 | kn->u.completion = (void *)&wait; | ||
189 | |||
190 | rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); | ||
191 | /* atomic_add_return() is a mb(), put_active() will always see | ||
192 | * the updated kn->u.completion. | ||
193 | */ | ||
194 | v = atomic_add_return(KN_DEACTIVATED_BIAS, &kn->active); | ||
195 | |||
196 | if (v != KN_DEACTIVATED_BIAS) { | ||
197 | lock_contended(&kn->dep_map, _RET_IP_); | ||
198 | wait_for_completion(&wait); | ||
199 | } | ||
200 | |||
201 | lock_acquired(&kn->dep_map, _RET_IP_); | ||
202 | rwsem_release(&kn->dep_map, 1, _RET_IP_); | ||
203 | } | ||
204 | |||
205 | /** | ||
206 | * kernfs_get - get a reference count on a kernfs_node | ||
207 | * @kn: the target kernfs_node | ||
208 | */ | ||
209 | void kernfs_get(struct kernfs_node *kn) | ||
210 | { | ||
211 | if (kn) { | ||
212 | WARN_ON(!atomic_read(&kn->count)); | ||
213 | atomic_inc(&kn->count); | ||
214 | } | ||
215 | } | ||
216 | EXPORT_SYMBOL_GPL(kernfs_get); | ||
217 | |||
218 | /** | ||
219 | * kernfs_put - put a reference count on a kernfs_node | ||
220 | * @kn: the target kernfs_node | ||
221 | * | ||
222 | * Put a reference count of @kn and destroy it if it reached zero. | ||
223 | */ | ||
224 | void kernfs_put(struct kernfs_node *kn) | ||
225 | { | ||
226 | struct kernfs_node *parent; | ||
227 | struct kernfs_root *root; | ||
228 | |||
229 | if (!kn || !atomic_dec_and_test(&kn->count)) | ||
230 | return; | ||
231 | root = kernfs_root(kn); | ||
232 | repeat: | ||
233 | /* Moving/renaming is always done while holding reference. | ||
234 | * kn->parent won't change beneath us. | ||
235 | */ | ||
236 | parent = kn->parent; | ||
237 | |||
238 | WARN(!(kn->flags & KERNFS_REMOVED), "kernfs: free using entry: %s/%s\n", | ||
239 | parent ? parent->name : "", kn->name); | ||
240 | |||
241 | if (kernfs_type(kn) == KERNFS_LINK) | ||
242 | kernfs_put(kn->symlink.target_kn); | ||
243 | if (!(kn->flags & KERNFS_STATIC_NAME)) | ||
244 | kfree(kn->name); | ||
245 | if (kn->iattr) { | ||
246 | if (kn->iattr->ia_secdata) | ||
247 | security_release_secctx(kn->iattr->ia_secdata, | ||
248 | kn->iattr->ia_secdata_len); | ||
249 | simple_xattrs_free(&kn->iattr->xattrs); | ||
250 | } | ||
251 | kfree(kn->iattr); | ||
252 | ida_simple_remove(&root->ino_ida, kn->ino); | ||
253 | kmem_cache_free(kernfs_node_cache, kn); | ||
254 | |||
255 | kn = parent; | ||
256 | if (kn) { | ||
257 | if (atomic_dec_and_test(&kn->count)) | ||
258 | goto repeat; | ||
259 | } else { | ||
260 | /* just released the root kn, free @root too */ | ||
261 | ida_destroy(&root->ino_ida); | ||
262 | kfree(root); | ||
263 | } | ||
264 | } | ||
265 | EXPORT_SYMBOL_GPL(kernfs_put); | ||
266 | |||
267 | static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) | ||
268 | { | ||
269 | struct kernfs_node *kn; | ||
270 | |||
271 | if (flags & LOOKUP_RCU) | ||
272 | return -ECHILD; | ||
273 | |||
274 | /* Always perform fresh lookup for negatives */ | ||
275 | if (!dentry->d_inode) | ||
276 | goto out_bad_unlocked; | ||
277 | |||
278 | kn = dentry->d_fsdata; | ||
279 | mutex_lock(&kernfs_mutex); | ||
280 | |||
281 | /* The kernfs node has been deleted */ | ||
282 | if (kn->flags & KERNFS_REMOVED) | ||
283 | goto out_bad; | ||
284 | |||
285 | /* The kernfs node has been moved? */ | ||
286 | if (dentry->d_parent->d_fsdata != kn->parent) | ||
287 | goto out_bad; | ||
288 | |||
289 | /* The kernfs node has been renamed */ | ||
290 | if (strcmp(dentry->d_name.name, kn->name) != 0) | ||
291 | goto out_bad; | ||
292 | |||
293 | /* The kernfs node has been moved to a different namespace */ | ||
294 | if (kn->parent && kernfs_ns_enabled(kn->parent) && | ||
295 | kernfs_info(dentry->d_sb)->ns != kn->ns) | ||
296 | goto out_bad; | ||
297 | |||
298 | mutex_unlock(&kernfs_mutex); | ||
299 | out_valid: | ||
300 | return 1; | ||
301 | out_bad: | ||
302 | mutex_unlock(&kernfs_mutex); | ||
303 | out_bad_unlocked: | ||
304 | /* | ||
305 | * @dentry doesn't match the underlying kernfs node, drop the | ||
306 | * dentry and force lookup. If we have submounts we must allow the | ||
307 | * vfs caches to lie about the state of the filesystem to prevent | ||
308 | * leaks and other nasty things, so use check_submounts_and_drop() | ||
309 | * instead of d_drop(). | ||
310 | */ | ||
311 | if (check_submounts_and_drop(dentry) != 0) | ||
312 | goto out_valid; | ||
313 | |||
314 | return 0; | ||
315 | } | ||
316 | |||
317 | static void kernfs_dop_release(struct dentry *dentry) | ||
318 | { | ||
319 | kernfs_put(dentry->d_fsdata); | ||
320 | } | ||
321 | |||
322 | const struct dentry_operations kernfs_dops = { | ||
323 | .d_revalidate = kernfs_dop_revalidate, | ||
324 | .d_release = kernfs_dop_release, | ||
325 | }; | ||
326 | |||
327 | static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, | ||
328 | const char *name, umode_t mode, | ||
329 | unsigned flags) | ||
330 | { | ||
331 | char *dup_name = NULL; | ||
332 | struct kernfs_node *kn; | ||
333 | int ret; | ||
334 | |||
335 | if (!(flags & KERNFS_STATIC_NAME)) { | ||
336 | name = dup_name = kstrdup(name, GFP_KERNEL); | ||
337 | if (!name) | ||
338 | return NULL; | ||
339 | } | ||
340 | |||
341 | kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL); | ||
342 | if (!kn) | ||
343 | goto err_out1; | ||
344 | |||
345 | ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL); | ||
346 | if (ret < 0) | ||
347 | goto err_out2; | ||
348 | kn->ino = ret; | ||
349 | |||
350 | atomic_set(&kn->count, 1); | ||
351 | atomic_set(&kn->active, 0); | ||
352 | |||
353 | kn->name = name; | ||
354 | kn->mode = mode; | ||
355 | kn->flags = flags | KERNFS_REMOVED; | ||
356 | |||
357 | return kn; | ||
358 | |||
359 | err_out2: | ||
360 | kmem_cache_free(kernfs_node_cache, kn); | ||
361 | err_out1: | ||
362 | kfree(dup_name); | ||
363 | return NULL; | ||
364 | } | ||
365 | |||
366 | struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, | ||
367 | const char *name, umode_t mode, | ||
368 | unsigned flags) | ||
369 | { | ||
370 | struct kernfs_node *kn; | ||
371 | |||
372 | kn = __kernfs_new_node(kernfs_root(parent), name, mode, flags); | ||
373 | if (kn) { | ||
374 | kernfs_get(parent); | ||
375 | kn->parent = parent; | ||
376 | } | ||
377 | return kn; | ||
378 | } | ||
379 | |||
380 | /** | ||
381 | * kernfs_addrm_start - prepare for kernfs_node add/remove | ||
382 | * @acxt: pointer to kernfs_addrm_cxt to be used | ||
383 | * | ||
384 | * This function is called when the caller is about to add or remove | ||
385 | * kernfs_node. This function acquires kernfs_mutex. @acxt is used | ||
386 | * to keep and pass context to other addrm functions. | ||
387 | * | ||
388 | * LOCKING: | ||
389 | * Kernel thread context (may sleep). kernfs_mutex is locked on | ||
390 | * return. | ||
391 | */ | ||
392 | void kernfs_addrm_start(struct kernfs_addrm_cxt *acxt) | ||
393 | __acquires(kernfs_mutex) | ||
394 | { | ||
395 | memset(acxt, 0, sizeof(*acxt)); | ||
396 | |||
397 | mutex_lock(&kernfs_mutex); | ||
398 | } | ||
399 | |||
400 | /** | ||
401 | * kernfs_add_one - add kernfs_node to parent without warning | ||
402 | * @acxt: addrm context to use | ||
403 | * @kn: kernfs_node to be added | ||
404 | * | ||
405 | * The caller must already have initialized @kn->parent. This | ||
406 | * function increments nlink of the parent's inode if @kn is a | ||
407 | * directory and link into the children list of the parent. | ||
408 | * | ||
409 | * This function should be called between calls to | ||
410 | * kernfs_addrm_start() and kernfs_addrm_finish() and should be passed | ||
411 | * the same @acxt as passed to kernfs_addrm_start(). | ||
412 | * | ||
413 | * LOCKING: | ||
414 | * Determined by kernfs_addrm_start(). | ||
415 | * | ||
416 | * RETURNS: | ||
417 | * 0 on success, -EEXIST if entry with the given name already | ||
418 | * exists. | ||
419 | */ | ||
420 | int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn) | ||
421 | { | ||
422 | struct kernfs_node *parent = kn->parent; | ||
423 | bool has_ns = kernfs_ns_enabled(parent); | ||
424 | struct kernfs_iattrs *ps_iattr; | ||
425 | int ret; | ||
426 | |||
427 | if (has_ns != (bool)kn->ns) { | ||
428 | WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", | ||
429 | has_ns ? "required" : "invalid", parent->name, kn->name); | ||
430 | return -EINVAL; | ||
431 | } | ||
432 | |||
433 | if (kernfs_type(parent) != KERNFS_DIR) | ||
434 | return -EINVAL; | ||
435 | |||
436 | if (parent->flags & KERNFS_REMOVED) | ||
437 | return -ENOENT; | ||
438 | |||
439 | kn->hash = kernfs_name_hash(kn->name, kn->ns); | ||
440 | |||
441 | ret = kernfs_link_sibling(kn); | ||
442 | if (ret) | ||
443 | return ret; | ||
444 | |||
445 | /* Update timestamps on the parent */ | ||
446 | ps_iattr = parent->iattr; | ||
447 | if (ps_iattr) { | ||
448 | struct iattr *ps_iattrs = &ps_iattr->ia_iattr; | ||
449 | ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; | ||
450 | } | ||
451 | |||
452 | /* Mark the entry added into directory tree */ | ||
453 | kn->flags &= ~KERNFS_REMOVED; | ||
454 | |||
455 | return 0; | ||
456 | } | ||
457 | |||
458 | /** | ||
459 | * kernfs_remove_one - remove kernfs_node from parent | ||
460 | * @acxt: addrm context to use | ||
461 | * @kn: kernfs_node to be removed | ||
462 | * | ||
463 | * Mark @kn removed and drop nlink of parent inode if @kn is a | ||
464 | * directory. @kn is unlinked from the children list. | ||
465 | * | ||
466 | * This function should be called between calls to | ||
467 | * kernfs_addrm_start() and kernfs_addrm_finish() and should be | ||
468 | * passed the same @acxt as passed to kernfs_addrm_start(). | ||
469 | * | ||
470 | * LOCKING: | ||
471 | * Determined by kernfs_addrm_start(). | ||
472 | */ | ||
473 | static void kernfs_remove_one(struct kernfs_addrm_cxt *acxt, | ||
474 | struct kernfs_node *kn) | ||
475 | { | ||
476 | struct kernfs_iattrs *ps_iattr; | ||
477 | |||
478 | /* | ||
479 | * Removal can be called multiple times on the same node. Only the | ||
480 | * first invocation is effective and puts the base ref. | ||
481 | */ | ||
482 | if (kn->flags & KERNFS_REMOVED) | ||
483 | return; | ||
484 | |||
485 | if (kn->parent) { | ||
486 | kernfs_unlink_sibling(kn); | ||
487 | |||
488 | /* Update timestamps on the parent */ | ||
489 | ps_iattr = kn->parent->iattr; | ||
490 | if (ps_iattr) { | ||
491 | ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME; | ||
492 | ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME; | ||
493 | } | ||
494 | } | ||
495 | |||
496 | kn->flags |= KERNFS_REMOVED; | ||
497 | kn->u.removed_list = acxt->removed; | ||
498 | acxt->removed = kn; | ||
499 | } | ||
500 | |||
501 | /** | ||
502 | * kernfs_addrm_finish - finish up kernfs_node add/remove | ||
503 | * @acxt: addrm context to finish up | ||
504 | * | ||
505 | * Finish up kernfs_node add/remove. Resources acquired by | ||
506 | * kernfs_addrm_start() are released and removed kernfs_nodes are | ||
507 | * cleaned up. | ||
508 | * | ||
509 | * LOCKING: | ||
510 | * kernfs_mutex is released. | ||
511 | */ | ||
512 | void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt) | ||
513 | __releases(kernfs_mutex) | ||
514 | { | ||
515 | /* release resources acquired by kernfs_addrm_start() */ | ||
516 | mutex_unlock(&kernfs_mutex); | ||
517 | |||
518 | /* kill removed kernfs_nodes */ | ||
519 | while (acxt->removed) { | ||
520 | struct kernfs_node *kn = acxt->removed; | ||
521 | |||
522 | acxt->removed = kn->u.removed_list; | ||
523 | |||
524 | kernfs_deactivate(kn); | ||
525 | kernfs_unmap_bin_file(kn); | ||
526 | kernfs_put(kn); | ||
527 | } | ||
528 | } | ||
529 | |||
530 | /** | ||
531 | * kernfs_find_ns - find kernfs_node with the given name | ||
532 | * @parent: kernfs_node to search under | ||
533 | * @name: name to look for | ||
534 | * @ns: the namespace tag to use | ||
535 | * | ||
536 | * Look for kernfs_node with name @name under @parent. Returns pointer to | ||
537 | * the found kernfs_node on success, %NULL on failure. | ||
538 | */ | ||
539 | static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, | ||
540 | const unsigned char *name, | ||
541 | const void *ns) | ||
542 | { | ||
543 | struct rb_node *node = parent->dir.children.rb_node; | ||
544 | bool has_ns = kernfs_ns_enabled(parent); | ||
545 | unsigned int hash; | ||
546 | |||
547 | lockdep_assert_held(&kernfs_mutex); | ||
548 | |||
549 | if (has_ns != (bool)ns) { | ||
550 | WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", | ||
551 | has_ns ? "required" : "invalid", parent->name, name); | ||
552 | return NULL; | ||
553 | } | ||
554 | |||
555 | hash = kernfs_name_hash(name, ns); | ||
556 | while (node) { | ||
557 | struct kernfs_node *kn; | ||
558 | int result; | ||
559 | |||
560 | kn = rb_to_kn(node); | ||
561 | result = kernfs_name_compare(hash, name, ns, kn); | ||
562 | if (result < 0) | ||
563 | node = node->rb_left; | ||
564 | else if (result > 0) | ||
565 | node = node->rb_right; | ||
566 | else | ||
567 | return kn; | ||
568 | } | ||
569 | return NULL; | ||
570 | } | ||
571 | |||
572 | /** | ||
573 | * kernfs_find_and_get_ns - find and get kernfs_node with the given name | ||
574 | * @parent: kernfs_node to search under | ||
575 | * @name: name to look for | ||
576 | * @ns: the namespace tag to use | ||
577 | * | ||
578 | * Look for kernfs_node with name @name under @parent and get a reference | ||
579 | * if found. This function may sleep and returns pointer to the found | ||
580 | * kernfs_node on success, %NULL on failure. | ||
581 | */ | ||
582 | struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, | ||
583 | const char *name, const void *ns) | ||
584 | { | ||
585 | struct kernfs_node *kn; | ||
586 | |||
587 | mutex_lock(&kernfs_mutex); | ||
588 | kn = kernfs_find_ns(parent, name, ns); | ||
589 | kernfs_get(kn); | ||
590 | mutex_unlock(&kernfs_mutex); | ||
591 | |||
592 | return kn; | ||
593 | } | ||
594 | EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); | ||
595 | |||
596 | /** | ||
597 | * kernfs_create_root - create a new kernfs hierarchy | ||
598 | * @kdops: optional directory syscall operations for the hierarchy | ||
599 | * @priv: opaque data associated with the new directory | ||
600 | * | ||
601 | * Returns the root of the new hierarchy on success, ERR_PTR() value on | ||
602 | * failure. | ||
603 | */ | ||
604 | struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv) | ||
605 | { | ||
606 | struct kernfs_root *root; | ||
607 | struct kernfs_node *kn; | ||
608 | |||
609 | root = kzalloc(sizeof(*root), GFP_KERNEL); | ||
610 | if (!root) | ||
611 | return ERR_PTR(-ENOMEM); | ||
612 | |||
613 | ida_init(&root->ino_ida); | ||
614 | |||
615 | kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO, | ||
616 | KERNFS_DIR); | ||
617 | if (!kn) { | ||
618 | ida_destroy(&root->ino_ida); | ||
619 | kfree(root); | ||
620 | return ERR_PTR(-ENOMEM); | ||
621 | } | ||
622 | |||
623 | kn->flags &= ~KERNFS_REMOVED; | ||
624 | kn->priv = priv; | ||
625 | kn->dir.root = root; | ||
626 | |||
627 | root->dir_ops = kdops; | ||
628 | root->kn = kn; | ||
629 | |||
630 | return root; | ||
631 | } | ||
632 | |||
633 | /** | ||
634 | * kernfs_destroy_root - destroy a kernfs hierarchy | ||
635 | * @root: root of the hierarchy to destroy | ||
636 | * | ||
637 | * Destroy the hierarchy anchored at @root by removing all existing | ||
638 | * directories and destroying @root. | ||
639 | */ | ||
640 | void kernfs_destroy_root(struct kernfs_root *root) | ||
641 | { | ||
642 | kernfs_remove(root->kn); /* will also free @root */ | ||
643 | } | ||
644 | |||
645 | /** | ||
646 | * kernfs_create_dir_ns - create a directory | ||
647 | * @parent: parent in which to create a new directory | ||
648 | * @name: name of the new directory | ||
649 | * @mode: mode of the new directory | ||
650 | * @priv: opaque data associated with the new directory | ||
651 | * @ns: optional namespace tag of the directory | ||
652 | * | ||
653 | * Returns the created node on success, ERR_PTR() value on failure. | ||
654 | */ | ||
655 | struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, | ||
656 | const char *name, umode_t mode, | ||
657 | void *priv, const void *ns) | ||
658 | { | ||
659 | struct kernfs_addrm_cxt acxt; | ||
660 | struct kernfs_node *kn; | ||
661 | int rc; | ||
662 | |||
663 | /* allocate */ | ||
664 | kn = kernfs_new_node(parent, name, mode | S_IFDIR, KERNFS_DIR); | ||
665 | if (!kn) | ||
666 | return ERR_PTR(-ENOMEM); | ||
667 | |||
668 | kn->dir.root = parent->dir.root; | ||
669 | kn->ns = ns; | ||
670 | kn->priv = priv; | ||
671 | |||
672 | /* link in */ | ||
673 | kernfs_addrm_start(&acxt); | ||
674 | rc = kernfs_add_one(&acxt, kn); | ||
675 | kernfs_addrm_finish(&acxt); | ||
676 | |||
677 | if (!rc) | ||
678 | return kn; | ||
679 | |||
680 | kernfs_put(kn); | ||
681 | return ERR_PTR(rc); | ||
682 | } | ||
683 | |||
684 | static struct dentry *kernfs_iop_lookup(struct inode *dir, | ||
685 | struct dentry *dentry, | ||
686 | unsigned int flags) | ||
687 | { | ||
688 | struct dentry *ret; | ||
689 | struct kernfs_node *parent = dentry->d_parent->d_fsdata; | ||
690 | struct kernfs_node *kn; | ||
691 | struct inode *inode; | ||
692 | const void *ns = NULL; | ||
693 | |||
694 | mutex_lock(&kernfs_mutex); | ||
695 | |||
696 | if (kernfs_ns_enabled(parent)) | ||
697 | ns = kernfs_info(dir->i_sb)->ns; | ||
698 | |||
699 | kn = kernfs_find_ns(parent, dentry->d_name.name, ns); | ||
700 | |||
701 | /* no such entry */ | ||
702 | if (!kn) { | ||
703 | ret = NULL; | ||
704 | goto out_unlock; | ||
705 | } | ||
706 | kernfs_get(kn); | ||
707 | dentry->d_fsdata = kn; | ||
708 | |||
709 | /* attach dentry and inode */ | ||
710 | inode = kernfs_get_inode(dir->i_sb, kn); | ||
711 | if (!inode) { | ||
712 | ret = ERR_PTR(-ENOMEM); | ||
713 | goto out_unlock; | ||
714 | } | ||
715 | |||
716 | /* instantiate and hash dentry */ | ||
717 | ret = d_materialise_unique(dentry, inode); | ||
718 | out_unlock: | ||
719 | mutex_unlock(&kernfs_mutex); | ||
720 | return ret; | ||
721 | } | ||
722 | |||
723 | static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry, | ||
724 | umode_t mode) | ||
725 | { | ||
726 | struct kernfs_node *parent = dir->i_private; | ||
727 | struct kernfs_dir_ops *kdops = kernfs_root(parent)->dir_ops; | ||
728 | |||
729 | if (!kdops || !kdops->mkdir) | ||
730 | return -EPERM; | ||
731 | |||
732 | return kdops->mkdir(parent, dentry->d_name.name, mode); | ||
733 | } | ||
734 | |||
735 | static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry) | ||
736 | { | ||
737 | struct kernfs_node *kn = dentry->d_fsdata; | ||
738 | struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops; | ||
739 | |||
740 | if (!kdops || !kdops->rmdir) | ||
741 | return -EPERM; | ||
742 | |||
743 | return kdops->rmdir(kn); | ||
744 | } | ||
745 | |||
746 | static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry, | ||
747 | struct inode *new_dir, struct dentry *new_dentry) | ||
748 | { | ||
749 | struct kernfs_node *kn = old_dentry->d_fsdata; | ||
750 | struct kernfs_node *new_parent = new_dir->i_private; | ||
751 | struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops; | ||
752 | |||
753 | if (!kdops || !kdops->rename) | ||
754 | return -EPERM; | ||
755 | |||
756 | return kdops->rename(kn, new_parent, new_dentry->d_name.name); | ||
757 | } | ||
758 | |||
759 | const struct inode_operations kernfs_dir_iops = { | ||
760 | .lookup = kernfs_iop_lookup, | ||
761 | .permission = kernfs_iop_permission, | ||
762 | .setattr = kernfs_iop_setattr, | ||
763 | .getattr = kernfs_iop_getattr, | ||
764 | .setxattr = kernfs_iop_setxattr, | ||
765 | .removexattr = kernfs_iop_removexattr, | ||
766 | .getxattr = kernfs_iop_getxattr, | ||
767 | .listxattr = kernfs_iop_listxattr, | ||
768 | |||
769 | .mkdir = kernfs_iop_mkdir, | ||
770 | .rmdir = kernfs_iop_rmdir, | ||
771 | .rename = kernfs_iop_rename, | ||
772 | }; | ||
773 | |||
774 | static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos) | ||
775 | { | ||
776 | struct kernfs_node *last; | ||
777 | |||
778 | while (true) { | ||
779 | struct rb_node *rbn; | ||
780 | |||
781 | last = pos; | ||
782 | |||
783 | if (kernfs_type(pos) != KERNFS_DIR) | ||
784 | break; | ||
785 | |||
786 | rbn = rb_first(&pos->dir.children); | ||
787 | if (!rbn) | ||
788 | break; | ||
789 | |||
790 | pos = rb_to_kn(rbn); | ||
791 | } | ||
792 | |||
793 | return last; | ||
794 | } | ||
795 | |||
796 | /** | ||
797 | * kernfs_next_descendant_post - find the next descendant for post-order walk | ||
798 | * @pos: the current position (%NULL to initiate traversal) | ||
799 | * @root: kernfs_node whose descendants to walk | ||
800 | * | ||
801 | * Find the next descendant to visit for post-order traversal of @root's | ||
802 | * descendants. @root is included in the iteration and the last node to be | ||
803 | * visited. | ||
804 | */ | ||
805 | static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos, | ||
806 | struct kernfs_node *root) | ||
807 | { | ||
808 | struct rb_node *rbn; | ||
809 | |||
810 | lockdep_assert_held(&kernfs_mutex); | ||
811 | |||
812 | /* if first iteration, visit leftmost descendant which may be root */ | ||
813 | if (!pos) | ||
814 | return kernfs_leftmost_descendant(root); | ||
815 | |||
816 | /* if we visited @root, we're done */ | ||
817 | if (pos == root) | ||
818 | return NULL; | ||
819 | |||
820 | /* if there's an unvisited sibling, visit its leftmost descendant */ | ||
821 | rbn = rb_next(&pos->rb); | ||
822 | if (rbn) | ||
823 | return kernfs_leftmost_descendant(rb_to_kn(rbn)); | ||
824 | |||
825 | /* no sibling left, visit parent */ | ||
826 | return pos->parent; | ||
827 | } | ||
828 | |||
829 | static void __kernfs_remove(struct kernfs_addrm_cxt *acxt, | ||
830 | struct kernfs_node *kn) | ||
831 | { | ||
832 | struct kernfs_node *pos, *next; | ||
833 | |||
834 | if (!kn) | ||
835 | return; | ||
836 | |||
837 | pr_debug("kernfs %s: removing\n", kn->name); | ||
838 | |||
839 | next = NULL; | ||
840 | do { | ||
841 | pos = next; | ||
842 | next = kernfs_next_descendant_post(pos, kn); | ||
843 | if (pos) | ||
844 | kernfs_remove_one(acxt, pos); | ||
845 | } while (next); | ||
846 | } | ||
847 | |||
848 | /** | ||
849 | * kernfs_remove - remove a kernfs_node recursively | ||
850 | * @kn: the kernfs_node to remove | ||
851 | * | ||
852 | * Remove @kn along with all its subdirectories and files. | ||
853 | */ | ||
854 | void kernfs_remove(struct kernfs_node *kn) | ||
855 | { | ||
856 | struct kernfs_addrm_cxt acxt; | ||
857 | |||
858 | kernfs_addrm_start(&acxt); | ||
859 | __kernfs_remove(&acxt, kn); | ||
860 | kernfs_addrm_finish(&acxt); | ||
861 | } | ||
862 | |||
863 | /** | ||
864 | * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it | ||
865 | * @parent: parent of the target | ||
866 | * @name: name of the kernfs_node to remove | ||
867 | * @ns: namespace tag of the kernfs_node to remove | ||
868 | * | ||
869 | * Look for the kernfs_node with @name and @ns under @parent and remove it. | ||
870 | * Returns 0 on success, -ENOENT if such entry doesn't exist. | ||
871 | */ | ||
872 | int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, | ||
873 | const void *ns) | ||
874 | { | ||
875 | struct kernfs_addrm_cxt acxt; | ||
876 | struct kernfs_node *kn; | ||
877 | |||
878 | if (!parent) { | ||
879 | WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n", | ||
880 | name); | ||
881 | return -ENOENT; | ||
882 | } | ||
883 | |||
884 | kernfs_addrm_start(&acxt); | ||
885 | |||
886 | kn = kernfs_find_ns(parent, name, ns); | ||
887 | if (kn) | ||
888 | __kernfs_remove(&acxt, kn); | ||
889 | |||
890 | kernfs_addrm_finish(&acxt); | ||
891 | |||
892 | if (kn) | ||
893 | return 0; | ||
894 | else | ||
895 | return -ENOENT; | ||
896 | } | ||
897 | |||
898 | /** | ||
899 | * kernfs_rename_ns - move and rename a kernfs_node | ||
900 | * @kn: target node | ||
901 | * @new_parent: new parent to put @sd under | ||
902 | * @new_name: new name | ||
903 | * @new_ns: new namespace tag | ||
904 | */ | ||
905 | int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, | ||
906 | const char *new_name, const void *new_ns) | ||
907 | { | ||
908 | int error; | ||
909 | |||
910 | mutex_lock(&kernfs_mutex); | ||
911 | |||
912 | error = -ENOENT; | ||
913 | if ((kn->flags | new_parent->flags) & KERNFS_REMOVED) | ||
914 | goto out; | ||
915 | |||
916 | error = 0; | ||
917 | if ((kn->parent == new_parent) && (kn->ns == new_ns) && | ||
918 | (strcmp(kn->name, new_name) == 0)) | ||
919 | goto out; /* nothing to rename */ | ||
920 | |||
921 | error = -EEXIST; | ||
922 | if (kernfs_find_ns(new_parent, new_name, new_ns)) | ||
923 | goto out; | ||
924 | |||
925 | /* rename kernfs_node */ | ||
926 | if (strcmp(kn->name, new_name) != 0) { | ||
927 | error = -ENOMEM; | ||
928 | new_name = kstrdup(new_name, GFP_KERNEL); | ||
929 | if (!new_name) | ||
930 | goto out; | ||
931 | |||
932 | if (kn->flags & KERNFS_STATIC_NAME) | ||
933 | kn->flags &= ~KERNFS_STATIC_NAME; | ||
934 | else | ||
935 | kfree(kn->name); | ||
936 | |||
937 | kn->name = new_name; | ||
938 | } | ||
939 | |||
940 | /* | ||
941 | * Move to the appropriate place in the appropriate directories rbtree. | ||
942 | */ | ||
943 | kernfs_unlink_sibling(kn); | ||
944 | kernfs_get(new_parent); | ||
945 | kernfs_put(kn->parent); | ||
946 | kn->ns = new_ns; | ||
947 | kn->hash = kernfs_name_hash(kn->name, kn->ns); | ||
948 | kn->parent = new_parent; | ||
949 | kernfs_link_sibling(kn); | ||
950 | |||
951 | error = 0; | ||
952 | out: | ||
953 | mutex_unlock(&kernfs_mutex); | ||
954 | return error; | ||
955 | } | ||
956 | |||
957 | /* Relationship between s_mode and the DT_xxx types */ | ||
958 | static inline unsigned char dt_type(struct kernfs_node *kn) | ||
959 | { | ||
960 | return (kn->mode >> 12) & 15; | ||
961 | } | ||
962 | |||
963 | static int kernfs_dir_fop_release(struct inode *inode, struct file *filp) | ||
964 | { | ||
965 | kernfs_put(filp->private_data); | ||
966 | return 0; | ||
967 | } | ||
968 | |||
969 | static struct kernfs_node *kernfs_dir_pos(const void *ns, | ||
970 | struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos) | ||
971 | { | ||
972 | if (pos) { | ||
973 | int valid = !(pos->flags & KERNFS_REMOVED) && | ||
974 | pos->parent == parent && hash == pos->hash; | ||
975 | kernfs_put(pos); | ||
976 | if (!valid) | ||
977 | pos = NULL; | ||
978 | } | ||
979 | if (!pos && (hash > 1) && (hash < INT_MAX)) { | ||
980 | struct rb_node *node = parent->dir.children.rb_node; | ||
981 | while (node) { | ||
982 | pos = rb_to_kn(node); | ||
983 | |||
984 | if (hash < pos->hash) | ||
985 | node = node->rb_left; | ||
986 | else if (hash > pos->hash) | ||
987 | node = node->rb_right; | ||
988 | else | ||
989 | break; | ||
990 | } | ||
991 | } | ||
992 | /* Skip over entries in the wrong namespace */ | ||
993 | while (pos && pos->ns != ns) { | ||
994 | struct rb_node *node = rb_next(&pos->rb); | ||
995 | if (!node) | ||
996 | pos = NULL; | ||
997 | else | ||
998 | pos = rb_to_kn(node); | ||
999 | } | ||
1000 | return pos; | ||
1001 | } | ||
1002 | |||
1003 | static struct kernfs_node *kernfs_dir_next_pos(const void *ns, | ||
1004 | struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos) | ||
1005 | { | ||
1006 | pos = kernfs_dir_pos(ns, parent, ino, pos); | ||
1007 | if (pos) | ||
1008 | do { | ||
1009 | struct rb_node *node = rb_next(&pos->rb); | ||
1010 | if (!node) | ||
1011 | pos = NULL; | ||
1012 | else | ||
1013 | pos = rb_to_kn(node); | ||
1014 | } while (pos && pos->ns != ns); | ||
1015 | return pos; | ||
1016 | } | ||
1017 | |||
1018 | static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) | ||
1019 | { | ||
1020 | struct dentry *dentry = file->f_path.dentry; | ||
1021 | struct kernfs_node *parent = dentry->d_fsdata; | ||
1022 | struct kernfs_node *pos = file->private_data; | ||
1023 | const void *ns = NULL; | ||
1024 | |||
1025 | if (!dir_emit_dots(file, ctx)) | ||
1026 | return 0; | ||
1027 | mutex_lock(&kernfs_mutex); | ||
1028 | |||
1029 | if (kernfs_ns_enabled(parent)) | ||
1030 | ns = kernfs_info(dentry->d_sb)->ns; | ||
1031 | |||
1032 | for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos); | ||
1033 | pos; | ||
1034 | pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) { | ||
1035 | const char *name = pos->name; | ||
1036 | unsigned int type = dt_type(pos); | ||
1037 | int len = strlen(name); | ||
1038 | ino_t ino = pos->ino; | ||
1039 | |||
1040 | ctx->pos = pos->hash; | ||
1041 | file->private_data = pos; | ||
1042 | kernfs_get(pos); | ||
1043 | |||
1044 | mutex_unlock(&kernfs_mutex); | ||
1045 | if (!dir_emit(ctx, name, len, ino, type)) | ||
1046 | return 0; | ||
1047 | mutex_lock(&kernfs_mutex); | ||
1048 | } | ||
1049 | mutex_unlock(&kernfs_mutex); | ||
1050 | file->private_data = NULL; | ||
1051 | ctx->pos = INT_MAX; | ||
1052 | return 0; | ||
1053 | } | ||
1054 | |||
1055 | static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset, | ||
1056 | int whence) | ||
1057 | { | ||
1058 | struct inode *inode = file_inode(file); | ||
1059 | loff_t ret; | ||
1060 | |||
1061 | mutex_lock(&inode->i_mutex); | ||
1062 | ret = generic_file_llseek(file, offset, whence); | ||
1063 | mutex_unlock(&inode->i_mutex); | ||
1064 | |||
1065 | return ret; | ||
1066 | } | ||
1067 | |||
1068 | const struct file_operations kernfs_dir_fops = { | ||
1069 | .read = generic_read_dir, | ||
1070 | .iterate = kernfs_fop_readdir, | ||
1071 | .release = kernfs_dir_fop_release, | ||
1072 | .llseek = kernfs_dir_fop_llseek, | ||
1073 | }; | ||
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c new file mode 100644 index 000000000000..dbf397bfdff2 --- /dev/null +++ b/fs/kernfs/file.c | |||
@@ -0,0 +1,867 @@ | |||
1 | /* | ||
2 | * fs/kernfs/file.c - kernfs file implementation | ||
3 | * | ||
4 | * Copyright (c) 2001-3 Patrick Mochel | ||
5 | * Copyright (c) 2007 SUSE Linux Products GmbH | ||
6 | * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> | ||
7 | * | ||
8 | * This file is released under the GPLv2. | ||
9 | */ | ||
10 | |||
11 | #include <linux/fs.h> | ||
12 | #include <linux/seq_file.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include <linux/poll.h> | ||
15 | #include <linux/pagemap.h> | ||
16 | #include <linux/sched.h> | ||
17 | |||
18 | #include "kernfs-internal.h" | ||
19 | |||
20 | /* | ||
21 | * There's one kernfs_open_file for each open file and one kernfs_open_node | ||
22 | * for each kernfs_node with one or more open files. | ||
23 | * | ||
24 | * kernfs_node->attr.open points to kernfs_open_node. attr.open is | ||
25 | * protected by kernfs_open_node_lock. | ||
26 | * | ||
27 | * filp->private_data points to seq_file whose ->private points to | ||
28 | * kernfs_open_file. kernfs_open_files are chained at | ||
29 | * kernfs_open_node->files, which is protected by kernfs_open_file_mutex. | ||
30 | */ | ||
31 | static DEFINE_SPINLOCK(kernfs_open_node_lock); | ||
32 | static DEFINE_MUTEX(kernfs_open_file_mutex); | ||
33 | |||
34 | struct kernfs_open_node { | ||
35 | atomic_t refcnt; | ||
36 | atomic_t event; | ||
37 | wait_queue_head_t poll; | ||
38 | struct list_head files; /* goes through kernfs_open_file.list */ | ||
39 | }; | ||
40 | |||
41 | static struct kernfs_open_file *kernfs_of(struct file *file) | ||
42 | { | ||
43 | return ((struct seq_file *)file->private_data)->private; | ||
44 | } | ||
45 | |||
46 | /* | ||
47 | * Determine the kernfs_ops for the given kernfs_node. This function must | ||
48 | * be called while holding an active reference. | ||
49 | */ | ||
50 | static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn) | ||
51 | { | ||
52 | if (kn->flags & KERNFS_LOCKDEP) | ||
53 | lockdep_assert_held(kn); | ||
54 | return kn->attr.ops; | ||
55 | } | ||
56 | |||
57 | /* | ||
58 | * As kernfs_seq_stop() is also called after kernfs_seq_start() or | ||
59 | * kernfs_seq_next() failure, it needs to distinguish whether it's stopping | ||
60 | * a seq_file iteration which is fully initialized with an active reference | ||
61 | * or an aborted kernfs_seq_start() due to get_active failure. The | ||
62 | * position pointer is the only context for each seq_file iteration and | ||
63 | * thus the stop condition should be encoded in it. As the return value is | ||
64 | * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable | ||
65 | * choice to indicate get_active failure. | ||
66 | * | ||
67 | * Unfortunately, this is complicated due to the optional custom seq_file | ||
68 | * operations which may return ERR_PTR(-ENODEV) too. kernfs_seq_stop() | ||
69 | * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or | ||
70 | * custom seq_file operations and thus can't decide whether put_active | ||
71 | * should be performed or not only on ERR_PTR(-ENODEV). | ||
72 | * | ||
73 | * This is worked around by factoring out the custom seq_stop() and | ||
74 | * put_active part into kernfs_seq_stop_active(), skipping it from | ||
75 | * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after | ||
76 | * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures | ||
77 | * that kernfs_seq_stop_active() is skipped only after get_active failure. | ||
78 | */ | ||
79 | static void kernfs_seq_stop_active(struct seq_file *sf, void *v) | ||
80 | { | ||
81 | struct kernfs_open_file *of = sf->private; | ||
82 | const struct kernfs_ops *ops = kernfs_ops(of->kn); | ||
83 | |||
84 | if (ops->seq_stop) | ||
85 | ops->seq_stop(sf, v); | ||
86 | kernfs_put_active(of->kn); | ||
87 | } | ||
88 | |||
89 | static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos) | ||
90 | { | ||
91 | struct kernfs_open_file *of = sf->private; | ||
92 | const struct kernfs_ops *ops; | ||
93 | |||
94 | /* | ||
95 | * @of->mutex nests outside active ref and is just to ensure that | ||
96 | * the ops aren't called concurrently for the same open file. | ||
97 | */ | ||
98 | mutex_lock(&of->mutex); | ||
99 | if (!kernfs_get_active(of->kn)) | ||
100 | return ERR_PTR(-ENODEV); | ||
101 | |||
102 | ops = kernfs_ops(of->kn); | ||
103 | if (ops->seq_start) { | ||
104 | void *next = ops->seq_start(sf, ppos); | ||
105 | /* see the comment above kernfs_seq_stop_active() */ | ||
106 | if (next == ERR_PTR(-ENODEV)) | ||
107 | kernfs_seq_stop_active(sf, next); | ||
108 | return next; | ||
109 | } else { | ||
110 | /* | ||
111 | * The same behavior and code as single_open(). Returns | ||
112 | * !NULL if pos is at the beginning; otherwise, NULL. | ||
113 | */ | ||
114 | return NULL + !*ppos; | ||
115 | } | ||
116 | } | ||
117 | |||
118 | static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos) | ||
119 | { | ||
120 | struct kernfs_open_file *of = sf->private; | ||
121 | const struct kernfs_ops *ops = kernfs_ops(of->kn); | ||
122 | |||
123 | if (ops->seq_next) { | ||
124 | void *next = ops->seq_next(sf, v, ppos); | ||
125 | /* see the comment above kernfs_seq_stop_active() */ | ||
126 | if (next == ERR_PTR(-ENODEV)) | ||
127 | kernfs_seq_stop_active(sf, next); | ||
128 | return next; | ||
129 | } else { | ||
130 | /* | ||
131 | * The same behavior and code as single_open(), always | ||
132 | * terminate after the initial read. | ||
133 | */ | ||
134 | ++*ppos; | ||
135 | return NULL; | ||
136 | } | ||
137 | } | ||
138 | |||
139 | static void kernfs_seq_stop(struct seq_file *sf, void *v) | ||
140 | { | ||
141 | struct kernfs_open_file *of = sf->private; | ||
142 | |||
143 | if (v != ERR_PTR(-ENODEV)) | ||
144 | kernfs_seq_stop_active(sf, v); | ||
145 | mutex_unlock(&of->mutex); | ||
146 | } | ||
147 | |||
148 | static int kernfs_seq_show(struct seq_file *sf, void *v) | ||
149 | { | ||
150 | struct kernfs_open_file *of = sf->private; | ||
151 | |||
152 | of->event = atomic_read(&of->kn->attr.open->event); | ||
153 | |||
154 | return of->kn->attr.ops->seq_show(sf, v); | ||
155 | } | ||
156 | |||
157 | static const struct seq_operations kernfs_seq_ops = { | ||
158 | .start = kernfs_seq_start, | ||
159 | .next = kernfs_seq_next, | ||
160 | .stop = kernfs_seq_stop, | ||
161 | .show = kernfs_seq_show, | ||
162 | }; | ||
163 | |||
164 | /* | ||
165 | * As reading a bin file can have side-effects, the exact offset and bytes | ||
166 | * specified in read(2) call should be passed to the read callback making | ||
167 | * it difficult to use seq_file. Implement simplistic custom buffering for | ||
168 | * bin files. | ||
169 | */ | ||
170 | static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of, | ||
171 | char __user *user_buf, size_t count, | ||
172 | loff_t *ppos) | ||
173 | { | ||
174 | ssize_t len = min_t(size_t, count, PAGE_SIZE); | ||
175 | const struct kernfs_ops *ops; | ||
176 | char *buf; | ||
177 | |||
178 | buf = kmalloc(len, GFP_KERNEL); | ||
179 | if (!buf) | ||
180 | return -ENOMEM; | ||
181 | |||
182 | /* | ||
183 | * @of->mutex nests outside active ref and is just to ensure that | ||
184 | * the ops aren't called concurrently for the same open file. | ||
185 | */ | ||
186 | mutex_lock(&of->mutex); | ||
187 | if (!kernfs_get_active(of->kn)) { | ||
188 | len = -ENODEV; | ||
189 | mutex_unlock(&of->mutex); | ||
190 | goto out_free; | ||
191 | } | ||
192 | |||
193 | ops = kernfs_ops(of->kn); | ||
194 | if (ops->read) | ||
195 | len = ops->read(of, buf, len, *ppos); | ||
196 | else | ||
197 | len = -EINVAL; | ||
198 | |||
199 | kernfs_put_active(of->kn); | ||
200 | mutex_unlock(&of->mutex); | ||
201 | |||
202 | if (len < 0) | ||
203 | goto out_free; | ||
204 | |||
205 | if (copy_to_user(user_buf, buf, len)) { | ||
206 | len = -EFAULT; | ||
207 | goto out_free; | ||
208 | } | ||
209 | |||
210 | *ppos += len; | ||
211 | |||
212 | out_free: | ||
213 | kfree(buf); | ||
214 | return len; | ||
215 | } | ||
216 | |||
217 | /** | ||
218 | * kernfs_fop_read - kernfs vfs read callback | ||
219 | * @file: file pointer | ||
220 | * @user_buf: data to write | ||
221 | * @count: number of bytes | ||
222 | * @ppos: starting offset | ||
223 | */ | ||
224 | static ssize_t kernfs_fop_read(struct file *file, char __user *user_buf, | ||
225 | size_t count, loff_t *ppos) | ||
226 | { | ||
227 | struct kernfs_open_file *of = kernfs_of(file); | ||
228 | |||
229 | if (of->kn->flags & KERNFS_HAS_SEQ_SHOW) | ||
230 | return seq_read(file, user_buf, count, ppos); | ||
231 | else | ||
232 | return kernfs_file_direct_read(of, user_buf, count, ppos); | ||
233 | } | ||
234 | |||
235 | /** | ||
236 | * kernfs_fop_write - kernfs vfs write callback | ||
237 | * @file: file pointer | ||
238 | * @user_buf: data to write | ||
239 | * @count: number of bytes | ||
240 | * @ppos: starting offset | ||
241 | * | ||
242 | * Copy data in from userland and pass it to the matching kernfs write | ||
243 | * operation. | ||
244 | * | ||
245 | * There is no easy way for us to know if userspace is only doing a partial | ||
246 | * write, so we don't support them. We expect the entire buffer to come on | ||
247 | * the first write. Hint: if you're writing a value, first read the file, | ||
248 | * modify only the the value you're changing, then write entire buffer | ||
249 | * back. | ||
250 | */ | ||
251 | static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, | ||
252 | size_t count, loff_t *ppos) | ||
253 | { | ||
254 | struct kernfs_open_file *of = kernfs_of(file); | ||
255 | ssize_t len = min_t(size_t, count, PAGE_SIZE); | ||
256 | const struct kernfs_ops *ops; | ||
257 | char *buf; | ||
258 | |||
259 | buf = kmalloc(len + 1, GFP_KERNEL); | ||
260 | if (!buf) | ||
261 | return -ENOMEM; | ||
262 | |||
263 | if (copy_from_user(buf, user_buf, len)) { | ||
264 | len = -EFAULT; | ||
265 | goto out_free; | ||
266 | } | ||
267 | buf[len] = '\0'; /* guarantee string termination */ | ||
268 | |||
269 | /* | ||
270 | * @of->mutex nests outside active ref and is just to ensure that | ||
271 | * the ops aren't called concurrently for the same open file. | ||
272 | */ | ||
273 | mutex_lock(&of->mutex); | ||
274 | if (!kernfs_get_active(of->kn)) { | ||
275 | mutex_unlock(&of->mutex); | ||
276 | len = -ENODEV; | ||
277 | goto out_free; | ||
278 | } | ||
279 | |||
280 | ops = kernfs_ops(of->kn); | ||
281 | if (ops->write) | ||
282 | len = ops->write(of, buf, len, *ppos); | ||
283 | else | ||
284 | len = -EINVAL; | ||
285 | |||
286 | kernfs_put_active(of->kn); | ||
287 | mutex_unlock(&of->mutex); | ||
288 | |||
289 | if (len > 0) | ||
290 | *ppos += len; | ||
291 | out_free: | ||
292 | kfree(buf); | ||
293 | return len; | ||
294 | } | ||
295 | |||
296 | static void kernfs_vma_open(struct vm_area_struct *vma) | ||
297 | { | ||
298 | struct file *file = vma->vm_file; | ||
299 | struct kernfs_open_file *of = kernfs_of(file); | ||
300 | |||
301 | if (!of->vm_ops) | ||
302 | return; | ||
303 | |||
304 | if (!kernfs_get_active(of->kn)) | ||
305 | return; | ||
306 | |||
307 | if (of->vm_ops->open) | ||
308 | of->vm_ops->open(vma); | ||
309 | |||
310 | kernfs_put_active(of->kn); | ||
311 | } | ||
312 | |||
313 | static int kernfs_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
314 | { | ||
315 | struct file *file = vma->vm_file; | ||
316 | struct kernfs_open_file *of = kernfs_of(file); | ||
317 | int ret; | ||
318 | |||
319 | if (!of->vm_ops) | ||
320 | return VM_FAULT_SIGBUS; | ||
321 | |||
322 | if (!kernfs_get_active(of->kn)) | ||
323 | return VM_FAULT_SIGBUS; | ||
324 | |||
325 | ret = VM_FAULT_SIGBUS; | ||
326 | if (of->vm_ops->fault) | ||
327 | ret = of->vm_ops->fault(vma, vmf); | ||
328 | |||
329 | kernfs_put_active(of->kn); | ||
330 | return ret; | ||
331 | } | ||
332 | |||
333 | static int kernfs_vma_page_mkwrite(struct vm_area_struct *vma, | ||
334 | struct vm_fault *vmf) | ||
335 | { | ||
336 | struct file *file = vma->vm_file; | ||
337 | struct kernfs_open_file *of = kernfs_of(file); | ||
338 | int ret; | ||
339 | |||
340 | if (!of->vm_ops) | ||
341 | return VM_FAULT_SIGBUS; | ||
342 | |||
343 | if (!kernfs_get_active(of->kn)) | ||
344 | return VM_FAULT_SIGBUS; | ||
345 | |||
346 | ret = 0; | ||
347 | if (of->vm_ops->page_mkwrite) | ||
348 | ret = of->vm_ops->page_mkwrite(vma, vmf); | ||
349 | else | ||
350 | file_update_time(file); | ||
351 | |||
352 | kernfs_put_active(of->kn); | ||
353 | return ret; | ||
354 | } | ||
355 | |||
356 | static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr, | ||
357 | void *buf, int len, int write) | ||
358 | { | ||
359 | struct file *file = vma->vm_file; | ||
360 | struct kernfs_open_file *of = kernfs_of(file); | ||
361 | int ret; | ||
362 | |||
363 | if (!of->vm_ops) | ||
364 | return -EINVAL; | ||
365 | |||
366 | if (!kernfs_get_active(of->kn)) | ||
367 | return -EINVAL; | ||
368 | |||
369 | ret = -EINVAL; | ||
370 | if (of->vm_ops->access) | ||
371 | ret = of->vm_ops->access(vma, addr, buf, len, write); | ||
372 | |||
373 | kernfs_put_active(of->kn); | ||
374 | return ret; | ||
375 | } | ||
376 | |||
377 | #ifdef CONFIG_NUMA | ||
378 | static int kernfs_vma_set_policy(struct vm_area_struct *vma, | ||
379 | struct mempolicy *new) | ||
380 | { | ||
381 | struct file *file = vma->vm_file; | ||
382 | struct kernfs_open_file *of = kernfs_of(file); | ||
383 | int ret; | ||
384 | |||
385 | if (!of->vm_ops) | ||
386 | return 0; | ||
387 | |||
388 | if (!kernfs_get_active(of->kn)) | ||
389 | return -EINVAL; | ||
390 | |||
391 | ret = 0; | ||
392 | if (of->vm_ops->set_policy) | ||
393 | ret = of->vm_ops->set_policy(vma, new); | ||
394 | |||
395 | kernfs_put_active(of->kn); | ||
396 | return ret; | ||
397 | } | ||
398 | |||
399 | static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma, | ||
400 | unsigned long addr) | ||
401 | { | ||
402 | struct file *file = vma->vm_file; | ||
403 | struct kernfs_open_file *of = kernfs_of(file); | ||
404 | struct mempolicy *pol; | ||
405 | |||
406 | if (!of->vm_ops) | ||
407 | return vma->vm_policy; | ||
408 | |||
409 | if (!kernfs_get_active(of->kn)) | ||
410 | return vma->vm_policy; | ||
411 | |||
412 | pol = vma->vm_policy; | ||
413 | if (of->vm_ops->get_policy) | ||
414 | pol = of->vm_ops->get_policy(vma, addr); | ||
415 | |||
416 | kernfs_put_active(of->kn); | ||
417 | return pol; | ||
418 | } | ||
419 | |||
420 | static int kernfs_vma_migrate(struct vm_area_struct *vma, | ||
421 | const nodemask_t *from, const nodemask_t *to, | ||
422 | unsigned long flags) | ||
423 | { | ||
424 | struct file *file = vma->vm_file; | ||
425 | struct kernfs_open_file *of = kernfs_of(file); | ||
426 | int ret; | ||
427 | |||
428 | if (!of->vm_ops) | ||
429 | return 0; | ||
430 | |||
431 | if (!kernfs_get_active(of->kn)) | ||
432 | return 0; | ||
433 | |||
434 | ret = 0; | ||
435 | if (of->vm_ops->migrate) | ||
436 | ret = of->vm_ops->migrate(vma, from, to, flags); | ||
437 | |||
438 | kernfs_put_active(of->kn); | ||
439 | return ret; | ||
440 | } | ||
441 | #endif | ||
442 | |||
443 | static const struct vm_operations_struct kernfs_vm_ops = { | ||
444 | .open = kernfs_vma_open, | ||
445 | .fault = kernfs_vma_fault, | ||
446 | .page_mkwrite = kernfs_vma_page_mkwrite, | ||
447 | .access = kernfs_vma_access, | ||
448 | #ifdef CONFIG_NUMA | ||
449 | .set_policy = kernfs_vma_set_policy, | ||
450 | .get_policy = kernfs_vma_get_policy, | ||
451 | .migrate = kernfs_vma_migrate, | ||
452 | #endif | ||
453 | }; | ||
454 | |||
455 | static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) | ||
456 | { | ||
457 | struct kernfs_open_file *of = kernfs_of(file); | ||
458 | const struct kernfs_ops *ops; | ||
459 | int rc; | ||
460 | |||
461 | /* | ||
462 | * mmap path and of->mutex are prone to triggering spurious lockdep | ||
463 | * warnings and we don't want to add spurious locking dependency | ||
464 | * between the two. Check whether mmap is actually implemented | ||
465 | * without grabbing @of->mutex by testing HAS_MMAP flag. See the | ||
466 | * comment in kernfs_file_open() for more details. | ||
467 | */ | ||
468 | if (!(of->kn->flags & KERNFS_HAS_MMAP)) | ||
469 | return -ENODEV; | ||
470 | |||
471 | mutex_lock(&of->mutex); | ||
472 | |||
473 | rc = -ENODEV; | ||
474 | if (!kernfs_get_active(of->kn)) | ||
475 | goto out_unlock; | ||
476 | |||
477 | ops = kernfs_ops(of->kn); | ||
478 | rc = ops->mmap(of, vma); | ||
479 | |||
480 | /* | ||
481 | * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() | ||
482 | * to satisfy versions of X which crash if the mmap fails: that | ||
483 | * substitutes a new vm_file, and we don't then want bin_vm_ops. | ||
484 | */ | ||
485 | if (vma->vm_file != file) | ||
486 | goto out_put; | ||
487 | |||
488 | rc = -EINVAL; | ||
489 | if (of->mmapped && of->vm_ops != vma->vm_ops) | ||
490 | goto out_put; | ||
491 | |||
492 | /* | ||
493 | * It is not possible to successfully wrap close. | ||
494 | * So error if someone is trying to use close. | ||
495 | */ | ||
496 | rc = -EINVAL; | ||
497 | if (vma->vm_ops && vma->vm_ops->close) | ||
498 | goto out_put; | ||
499 | |||
500 | rc = 0; | ||
501 | of->mmapped = 1; | ||
502 | of->vm_ops = vma->vm_ops; | ||
503 | vma->vm_ops = &kernfs_vm_ops; | ||
504 | out_put: | ||
505 | kernfs_put_active(of->kn); | ||
506 | out_unlock: | ||
507 | mutex_unlock(&of->mutex); | ||
508 | |||
509 | return rc; | ||
510 | } | ||
511 | |||
512 | /** | ||
513 | * kernfs_get_open_node - get or create kernfs_open_node | ||
514 | * @kn: target kernfs_node | ||
515 | * @of: kernfs_open_file for this instance of open | ||
516 | * | ||
517 | * If @kn->attr.open exists, increment its reference count; otherwise, | ||
518 | * create one. @of is chained to the files list. | ||
519 | * | ||
520 | * LOCKING: | ||
521 | * Kernel thread context (may sleep). | ||
522 | * | ||
523 | * RETURNS: | ||
524 | * 0 on success, -errno on failure. | ||
525 | */ | ||
526 | static int kernfs_get_open_node(struct kernfs_node *kn, | ||
527 | struct kernfs_open_file *of) | ||
528 | { | ||
529 | struct kernfs_open_node *on, *new_on = NULL; | ||
530 | |||
531 | retry: | ||
532 | mutex_lock(&kernfs_open_file_mutex); | ||
533 | spin_lock_irq(&kernfs_open_node_lock); | ||
534 | |||
535 | if (!kn->attr.open && new_on) { | ||
536 | kn->attr.open = new_on; | ||
537 | new_on = NULL; | ||
538 | } | ||
539 | |||
540 | on = kn->attr.open; | ||
541 | if (on) { | ||
542 | atomic_inc(&on->refcnt); | ||
543 | list_add_tail(&of->list, &on->files); | ||
544 | } | ||
545 | |||
546 | spin_unlock_irq(&kernfs_open_node_lock); | ||
547 | mutex_unlock(&kernfs_open_file_mutex); | ||
548 | |||
549 | if (on) { | ||
550 | kfree(new_on); | ||
551 | return 0; | ||
552 | } | ||
553 | |||
554 | /* not there, initialize a new one and retry */ | ||
555 | new_on = kmalloc(sizeof(*new_on), GFP_KERNEL); | ||
556 | if (!new_on) | ||
557 | return -ENOMEM; | ||
558 | |||
559 | atomic_set(&new_on->refcnt, 0); | ||
560 | atomic_set(&new_on->event, 1); | ||
561 | init_waitqueue_head(&new_on->poll); | ||
562 | INIT_LIST_HEAD(&new_on->files); | ||
563 | goto retry; | ||
564 | } | ||
565 | |||
566 | /** | ||
567 | * kernfs_put_open_node - put kernfs_open_node | ||
568 | * @kn: target kernfs_nodet | ||
569 | * @of: associated kernfs_open_file | ||
570 | * | ||
571 | * Put @kn->attr.open and unlink @of from the files list. If | ||
572 | * reference count reaches zero, disassociate and free it. | ||
573 | * | ||
574 | * LOCKING: | ||
575 | * None. | ||
576 | */ | ||
577 | static void kernfs_put_open_node(struct kernfs_node *kn, | ||
578 | struct kernfs_open_file *of) | ||
579 | { | ||
580 | struct kernfs_open_node *on = kn->attr.open; | ||
581 | unsigned long flags; | ||
582 | |||
583 | mutex_lock(&kernfs_open_file_mutex); | ||
584 | spin_lock_irqsave(&kernfs_open_node_lock, flags); | ||
585 | |||
586 | if (of) | ||
587 | list_del(&of->list); | ||
588 | |||
589 | if (atomic_dec_and_test(&on->refcnt)) | ||
590 | kn->attr.open = NULL; | ||
591 | else | ||
592 | on = NULL; | ||
593 | |||
594 | spin_unlock_irqrestore(&kernfs_open_node_lock, flags); | ||
595 | mutex_unlock(&kernfs_open_file_mutex); | ||
596 | |||
597 | kfree(on); | ||
598 | } | ||
599 | |||
600 | static int kernfs_fop_open(struct inode *inode, struct file *file) | ||
601 | { | ||
602 | struct kernfs_node *kn = file->f_path.dentry->d_fsdata; | ||
603 | const struct kernfs_ops *ops; | ||
604 | struct kernfs_open_file *of; | ||
605 | bool has_read, has_write, has_mmap; | ||
606 | int error = -EACCES; | ||
607 | |||
608 | if (!kernfs_get_active(kn)) | ||
609 | return -ENODEV; | ||
610 | |||
611 | ops = kernfs_ops(kn); | ||
612 | |||
613 | has_read = ops->seq_show || ops->read || ops->mmap; | ||
614 | has_write = ops->write || ops->mmap; | ||
615 | has_mmap = ops->mmap; | ||
616 | |||
617 | /* check perms and supported operations */ | ||
618 | if ((file->f_mode & FMODE_WRITE) && | ||
619 | (!(inode->i_mode & S_IWUGO) || !has_write)) | ||
620 | goto err_out; | ||
621 | |||
622 | if ((file->f_mode & FMODE_READ) && | ||
623 | (!(inode->i_mode & S_IRUGO) || !has_read)) | ||
624 | goto err_out; | ||
625 | |||
626 | /* allocate a kernfs_open_file for the file */ | ||
627 | error = -ENOMEM; | ||
628 | of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL); | ||
629 | if (!of) | ||
630 | goto err_out; | ||
631 | |||
632 | /* | ||
633 | * The following is done to give a different lockdep key to | ||
634 | * @of->mutex for files which implement mmap. This is a rather | ||
635 | * crude way to avoid false positive lockdep warning around | ||
636 | * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and | ||
637 | * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under | ||
638 | * which mm->mmap_sem nests, while holding @of->mutex. As each | ||
639 | * open file has a separate mutex, it's okay as long as those don't | ||
640 | * happen on the same file. At this point, we can't easily give | ||
641 | * each file a separate locking class. Let's differentiate on | ||
642 | * whether the file has mmap or not for now. | ||
643 | * | ||
644 | * Both paths of the branch look the same. They're supposed to | ||
645 | * look that way and give @of->mutex different static lockdep keys. | ||
646 | */ | ||
647 | if (has_mmap) | ||
648 | mutex_init(&of->mutex); | ||
649 | else | ||
650 | mutex_init(&of->mutex); | ||
651 | |||
652 | of->kn = kn; | ||
653 | of->file = file; | ||
654 | |||
655 | /* | ||
656 | * Always instantiate seq_file even if read access doesn't use | ||
657 | * seq_file or is not requested. This unifies private data access | ||
658 | * and readable regular files are the vast majority anyway. | ||
659 | */ | ||
660 | if (ops->seq_show) | ||
661 | error = seq_open(file, &kernfs_seq_ops); | ||
662 | else | ||
663 | error = seq_open(file, NULL); | ||
664 | if (error) | ||
665 | goto err_free; | ||
666 | |||
667 | ((struct seq_file *)file->private_data)->private = of; | ||
668 | |||
669 | /* seq_file clears PWRITE unconditionally, restore it if WRITE */ | ||
670 | if (file->f_mode & FMODE_WRITE) | ||
671 | file->f_mode |= FMODE_PWRITE; | ||
672 | |||
673 | /* make sure we have open node struct */ | ||
674 | error = kernfs_get_open_node(kn, of); | ||
675 | if (error) | ||
676 | goto err_close; | ||
677 | |||
678 | /* open succeeded, put active references */ | ||
679 | kernfs_put_active(kn); | ||
680 | return 0; | ||
681 | |||
682 | err_close: | ||
683 | seq_release(inode, file); | ||
684 | err_free: | ||
685 | kfree(of); | ||
686 | err_out: | ||
687 | kernfs_put_active(kn); | ||
688 | return error; | ||
689 | } | ||
690 | |||
691 | static int kernfs_fop_release(struct inode *inode, struct file *filp) | ||
692 | { | ||
693 | struct kernfs_node *kn = filp->f_path.dentry->d_fsdata; | ||
694 | struct kernfs_open_file *of = kernfs_of(filp); | ||
695 | |||
696 | kernfs_put_open_node(kn, of); | ||
697 | seq_release(inode, filp); | ||
698 | kfree(of); | ||
699 | |||
700 | return 0; | ||
701 | } | ||
702 | |||
703 | void kernfs_unmap_bin_file(struct kernfs_node *kn) | ||
704 | { | ||
705 | struct kernfs_open_node *on; | ||
706 | struct kernfs_open_file *of; | ||
707 | |||
708 | if (!(kn->flags & KERNFS_HAS_MMAP)) | ||
709 | return; | ||
710 | |||
711 | spin_lock_irq(&kernfs_open_node_lock); | ||
712 | on = kn->attr.open; | ||
713 | if (on) | ||
714 | atomic_inc(&on->refcnt); | ||
715 | spin_unlock_irq(&kernfs_open_node_lock); | ||
716 | if (!on) | ||
717 | return; | ||
718 | |||
719 | mutex_lock(&kernfs_open_file_mutex); | ||
720 | list_for_each_entry(of, &on->files, list) { | ||
721 | struct inode *inode = file_inode(of->file); | ||
722 | unmap_mapping_range(inode->i_mapping, 0, 0, 1); | ||
723 | } | ||
724 | mutex_unlock(&kernfs_open_file_mutex); | ||
725 | |||
726 | kernfs_put_open_node(kn, NULL); | ||
727 | } | ||
728 | |||
729 | /* | ||
730 | * Kernfs attribute files are pollable. The idea is that you read | ||
731 | * the content and then you use 'poll' or 'select' to wait for | ||
732 | * the content to change. When the content changes (assuming the | ||
733 | * manager for the kobject supports notification), poll will | ||
734 | * return POLLERR|POLLPRI, and select will return the fd whether | ||
735 | * it is waiting for read, write, or exceptions. | ||
736 | * Once poll/select indicates that the value has changed, you | ||
737 | * need to close and re-open the file, or seek to 0 and read again. | ||
738 | * Reminder: this only works for attributes which actively support | ||
739 | * it, and it is not possible to test an attribute from userspace | ||
740 | * to see if it supports poll (Neither 'poll' nor 'select' return | ||
741 | * an appropriate error code). When in doubt, set a suitable timeout value. | ||
742 | */ | ||
743 | static unsigned int kernfs_fop_poll(struct file *filp, poll_table *wait) | ||
744 | { | ||
745 | struct kernfs_open_file *of = kernfs_of(filp); | ||
746 | struct kernfs_node *kn = filp->f_path.dentry->d_fsdata; | ||
747 | struct kernfs_open_node *on = kn->attr.open; | ||
748 | |||
749 | /* need parent for the kobj, grab both */ | ||
750 | if (!kernfs_get_active(kn)) | ||
751 | goto trigger; | ||
752 | |||
753 | poll_wait(filp, &on->poll, wait); | ||
754 | |||
755 | kernfs_put_active(kn); | ||
756 | |||
757 | if (of->event != atomic_read(&on->event)) | ||
758 | goto trigger; | ||
759 | |||
760 | return DEFAULT_POLLMASK; | ||
761 | |||
762 | trigger: | ||
763 | return DEFAULT_POLLMASK|POLLERR|POLLPRI; | ||
764 | } | ||
765 | |||
766 | /** | ||
767 | * kernfs_notify - notify a kernfs file | ||
768 | * @kn: file to notify | ||
769 | * | ||
770 | * Notify @kn such that poll(2) on @kn wakes up. | ||
771 | */ | ||
772 | void kernfs_notify(struct kernfs_node *kn) | ||
773 | { | ||
774 | struct kernfs_open_node *on; | ||
775 | unsigned long flags; | ||
776 | |||
777 | spin_lock_irqsave(&kernfs_open_node_lock, flags); | ||
778 | |||
779 | if (!WARN_ON(kernfs_type(kn) != KERNFS_FILE)) { | ||
780 | on = kn->attr.open; | ||
781 | if (on) { | ||
782 | atomic_inc(&on->event); | ||
783 | wake_up_interruptible(&on->poll); | ||
784 | } | ||
785 | } | ||
786 | |||
787 | spin_unlock_irqrestore(&kernfs_open_node_lock, flags); | ||
788 | } | ||
789 | EXPORT_SYMBOL_GPL(kernfs_notify); | ||
790 | |||
791 | const struct file_operations kernfs_file_fops = { | ||
792 | .read = kernfs_fop_read, | ||
793 | .write = kernfs_fop_write, | ||
794 | .llseek = generic_file_llseek, | ||
795 | .mmap = kernfs_fop_mmap, | ||
796 | .open = kernfs_fop_open, | ||
797 | .release = kernfs_fop_release, | ||
798 | .poll = kernfs_fop_poll, | ||
799 | }; | ||
800 | |||
801 | /** | ||
802 | * __kernfs_create_file - kernfs internal function to create a file | ||
803 | * @parent: directory to create the file in | ||
804 | * @name: name of the file | ||
805 | * @mode: mode of the file | ||
806 | * @size: size of the file | ||
807 | * @ops: kernfs operations for the file | ||
808 | * @priv: private data for the file | ||
809 | * @ns: optional namespace tag of the file | ||
810 | * @static_name: don't copy file name | ||
811 | * @key: lockdep key for the file's active_ref, %NULL to disable lockdep | ||
812 | * | ||
813 | * Returns the created node on success, ERR_PTR() value on error. | ||
814 | */ | ||
815 | struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, | ||
816 | const char *name, | ||
817 | umode_t mode, loff_t size, | ||
818 | const struct kernfs_ops *ops, | ||
819 | void *priv, const void *ns, | ||
820 | bool name_is_static, | ||
821 | struct lock_class_key *key) | ||
822 | { | ||
823 | struct kernfs_addrm_cxt acxt; | ||
824 | struct kernfs_node *kn; | ||
825 | unsigned flags; | ||
826 | int rc; | ||
827 | |||
828 | flags = KERNFS_FILE; | ||
829 | if (name_is_static) | ||
830 | flags |= KERNFS_STATIC_NAME; | ||
831 | |||
832 | kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, flags); | ||
833 | if (!kn) | ||
834 | return ERR_PTR(-ENOMEM); | ||
835 | |||
836 | kn->attr.ops = ops; | ||
837 | kn->attr.size = size; | ||
838 | kn->ns = ns; | ||
839 | kn->priv = priv; | ||
840 | |||
841 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
842 | if (key) { | ||
843 | lockdep_init_map(&kn->dep_map, "s_active", key, 0); | ||
844 | kn->flags |= KERNFS_LOCKDEP; | ||
845 | } | ||
846 | #endif | ||
847 | |||
848 | /* | ||
849 | * kn->attr.ops is accesible only while holding active ref. We | ||
850 | * need to know whether some ops are implemented outside active | ||
851 | * ref. Cache their existence in flags. | ||
852 | */ | ||
853 | if (ops->seq_show) | ||
854 | kn->flags |= KERNFS_HAS_SEQ_SHOW; | ||
855 | if (ops->mmap) | ||
856 | kn->flags |= KERNFS_HAS_MMAP; | ||
857 | |||
858 | kernfs_addrm_start(&acxt); | ||
859 | rc = kernfs_add_one(&acxt, kn); | ||
860 | kernfs_addrm_finish(&acxt); | ||
861 | |||
862 | if (rc) { | ||
863 | kernfs_put(kn); | ||
864 | return ERR_PTR(rc); | ||
865 | } | ||
866 | return kn; | ||
867 | } | ||
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c new file mode 100644 index 000000000000..e55126f85bd2 --- /dev/null +++ b/fs/kernfs/inode.c | |||
@@ -0,0 +1,377 @@ | |||
1 | /* | ||
2 | * fs/kernfs/inode.c - kernfs inode implementation | ||
3 | * | ||
4 | * Copyright (c) 2001-3 Patrick Mochel | ||
5 | * Copyright (c) 2007 SUSE Linux Products GmbH | ||
6 | * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> | ||
7 | * | ||
8 | * This file is released under the GPLv2. | ||
9 | */ | ||
10 | |||
11 | #include <linux/pagemap.h> | ||
12 | #include <linux/backing-dev.h> | ||
13 | #include <linux/capability.h> | ||
14 | #include <linux/errno.h> | ||
15 | #include <linux/slab.h> | ||
16 | #include <linux/xattr.h> | ||
17 | #include <linux/security.h> | ||
18 | |||
19 | #include "kernfs-internal.h" | ||
20 | |||
21 | static const struct address_space_operations kernfs_aops = { | ||
22 | .readpage = simple_readpage, | ||
23 | .write_begin = simple_write_begin, | ||
24 | .write_end = simple_write_end, | ||
25 | }; | ||
26 | |||
27 | static struct backing_dev_info kernfs_bdi = { | ||
28 | .name = "kernfs", | ||
29 | .ra_pages = 0, /* No readahead */ | ||
30 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, | ||
31 | }; | ||
32 | |||
33 | static const struct inode_operations kernfs_iops = { | ||
34 | .permission = kernfs_iop_permission, | ||
35 | .setattr = kernfs_iop_setattr, | ||
36 | .getattr = kernfs_iop_getattr, | ||
37 | .setxattr = kernfs_iop_setxattr, | ||
38 | .removexattr = kernfs_iop_removexattr, | ||
39 | .getxattr = kernfs_iop_getxattr, | ||
40 | .listxattr = kernfs_iop_listxattr, | ||
41 | }; | ||
42 | |||
43 | void __init kernfs_inode_init(void) | ||
44 | { | ||
45 | if (bdi_init(&kernfs_bdi)) | ||
46 | panic("failed to init kernfs_bdi"); | ||
47 | } | ||
48 | |||
49 | static struct kernfs_iattrs *kernfs_iattrs(struct kernfs_node *kn) | ||
50 | { | ||
51 | struct iattr *iattrs; | ||
52 | |||
53 | if (kn->iattr) | ||
54 | return kn->iattr; | ||
55 | |||
56 | kn->iattr = kzalloc(sizeof(struct kernfs_iattrs), GFP_KERNEL); | ||
57 | if (!kn->iattr) | ||
58 | return NULL; | ||
59 | iattrs = &kn->iattr->ia_iattr; | ||
60 | |||
61 | /* assign default attributes */ | ||
62 | iattrs->ia_mode = kn->mode; | ||
63 | iattrs->ia_uid = GLOBAL_ROOT_UID; | ||
64 | iattrs->ia_gid = GLOBAL_ROOT_GID; | ||
65 | iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME; | ||
66 | |||
67 | simple_xattrs_init(&kn->iattr->xattrs); | ||
68 | |||
69 | return kn->iattr; | ||
70 | } | ||
71 | |||
72 | static int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr) | ||
73 | { | ||
74 | struct kernfs_iattrs *attrs; | ||
75 | struct iattr *iattrs; | ||
76 | unsigned int ia_valid = iattr->ia_valid; | ||
77 | |||
78 | attrs = kernfs_iattrs(kn); | ||
79 | if (!attrs) | ||
80 | return -ENOMEM; | ||
81 | |||
82 | iattrs = &attrs->ia_iattr; | ||
83 | |||
84 | if (ia_valid & ATTR_UID) | ||
85 | iattrs->ia_uid = iattr->ia_uid; | ||
86 | if (ia_valid & ATTR_GID) | ||
87 | iattrs->ia_gid = iattr->ia_gid; | ||
88 | if (ia_valid & ATTR_ATIME) | ||
89 | iattrs->ia_atime = iattr->ia_atime; | ||
90 | if (ia_valid & ATTR_MTIME) | ||
91 | iattrs->ia_mtime = iattr->ia_mtime; | ||
92 | if (ia_valid & ATTR_CTIME) | ||
93 | iattrs->ia_ctime = iattr->ia_ctime; | ||
94 | if (ia_valid & ATTR_MODE) { | ||
95 | umode_t mode = iattr->ia_mode; | ||
96 | iattrs->ia_mode = kn->mode = mode; | ||
97 | } | ||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | /** | ||
102 | * kernfs_setattr - set iattr on a node | ||
103 | * @kn: target node | ||
104 | * @iattr: iattr to set | ||
105 | * | ||
106 | * Returns 0 on success, -errno on failure. | ||
107 | */ | ||
108 | int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr) | ||
109 | { | ||
110 | int ret; | ||
111 | |||
112 | mutex_lock(&kernfs_mutex); | ||
113 | ret = __kernfs_setattr(kn, iattr); | ||
114 | mutex_unlock(&kernfs_mutex); | ||
115 | return ret; | ||
116 | } | ||
117 | |||
118 | int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr) | ||
119 | { | ||
120 | struct inode *inode = dentry->d_inode; | ||
121 | struct kernfs_node *kn = dentry->d_fsdata; | ||
122 | int error; | ||
123 | |||
124 | if (!kn) | ||
125 | return -EINVAL; | ||
126 | |||
127 | mutex_lock(&kernfs_mutex); | ||
128 | error = inode_change_ok(inode, iattr); | ||
129 | if (error) | ||
130 | goto out; | ||
131 | |||
132 | error = __kernfs_setattr(kn, iattr); | ||
133 | if (error) | ||
134 | goto out; | ||
135 | |||
136 | /* this ignores size changes */ | ||
137 | setattr_copy(inode, iattr); | ||
138 | |||
139 | out: | ||
140 | mutex_unlock(&kernfs_mutex); | ||
141 | return error; | ||
142 | } | ||
143 | |||
144 | static int kernfs_node_setsecdata(struct kernfs_node *kn, void **secdata, | ||
145 | u32 *secdata_len) | ||
146 | { | ||
147 | struct kernfs_iattrs *attrs; | ||
148 | void *old_secdata; | ||
149 | size_t old_secdata_len; | ||
150 | |||
151 | attrs = kernfs_iattrs(kn); | ||
152 | if (!attrs) | ||
153 | return -ENOMEM; | ||
154 | |||
155 | old_secdata = attrs->ia_secdata; | ||
156 | old_secdata_len = attrs->ia_secdata_len; | ||
157 | |||
158 | attrs->ia_secdata = *secdata; | ||
159 | attrs->ia_secdata_len = *secdata_len; | ||
160 | |||
161 | *secdata = old_secdata; | ||
162 | *secdata_len = old_secdata_len; | ||
163 | return 0; | ||
164 | } | ||
165 | |||
166 | int kernfs_iop_setxattr(struct dentry *dentry, const char *name, | ||
167 | const void *value, size_t size, int flags) | ||
168 | { | ||
169 | struct kernfs_node *kn = dentry->d_fsdata; | ||
170 | struct kernfs_iattrs *attrs; | ||
171 | void *secdata; | ||
172 | int error; | ||
173 | u32 secdata_len = 0; | ||
174 | |||
175 | attrs = kernfs_iattrs(kn); | ||
176 | if (!attrs) | ||
177 | return -ENOMEM; | ||
178 | |||
179 | if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) { | ||
180 | const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; | ||
181 | error = security_inode_setsecurity(dentry->d_inode, suffix, | ||
182 | value, size, flags); | ||
183 | if (error) | ||
184 | return error; | ||
185 | error = security_inode_getsecctx(dentry->d_inode, | ||
186 | &secdata, &secdata_len); | ||
187 | if (error) | ||
188 | return error; | ||
189 | |||
190 | mutex_lock(&kernfs_mutex); | ||
191 | error = kernfs_node_setsecdata(kn, &secdata, &secdata_len); | ||
192 | mutex_unlock(&kernfs_mutex); | ||
193 | |||
194 | if (secdata) | ||
195 | security_release_secctx(secdata, secdata_len); | ||
196 | return error; | ||
197 | } else if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) { | ||
198 | return simple_xattr_set(&attrs->xattrs, name, value, size, | ||
199 | flags); | ||
200 | } | ||
201 | |||
202 | return -EINVAL; | ||
203 | } | ||
204 | |||
205 | int kernfs_iop_removexattr(struct dentry *dentry, const char *name) | ||
206 | { | ||
207 | struct kernfs_node *kn = dentry->d_fsdata; | ||
208 | struct kernfs_iattrs *attrs; | ||
209 | |||
210 | attrs = kernfs_iattrs(kn); | ||
211 | if (!attrs) | ||
212 | return -ENOMEM; | ||
213 | |||
214 | return simple_xattr_remove(&attrs->xattrs, name); | ||
215 | } | ||
216 | |||
217 | ssize_t kernfs_iop_getxattr(struct dentry *dentry, const char *name, void *buf, | ||
218 | size_t size) | ||
219 | { | ||
220 | struct kernfs_node *kn = dentry->d_fsdata; | ||
221 | struct kernfs_iattrs *attrs; | ||
222 | |||
223 | attrs = kernfs_iattrs(kn); | ||
224 | if (!attrs) | ||
225 | return -ENOMEM; | ||
226 | |||
227 | return simple_xattr_get(&attrs->xattrs, name, buf, size); | ||
228 | } | ||
229 | |||
230 | ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size) | ||
231 | { | ||
232 | struct kernfs_node *kn = dentry->d_fsdata; | ||
233 | struct kernfs_iattrs *attrs; | ||
234 | |||
235 | attrs = kernfs_iattrs(kn); | ||
236 | if (!attrs) | ||
237 | return -ENOMEM; | ||
238 | |||
239 | return simple_xattr_list(&attrs->xattrs, buf, size); | ||
240 | } | ||
241 | |||
242 | static inline void set_default_inode_attr(struct inode *inode, umode_t mode) | ||
243 | { | ||
244 | inode->i_mode = mode; | ||
245 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
246 | } | ||
247 | |||
248 | static inline void set_inode_attr(struct inode *inode, struct iattr *iattr) | ||
249 | { | ||
250 | inode->i_uid = iattr->ia_uid; | ||
251 | inode->i_gid = iattr->ia_gid; | ||
252 | inode->i_atime = iattr->ia_atime; | ||
253 | inode->i_mtime = iattr->ia_mtime; | ||
254 | inode->i_ctime = iattr->ia_ctime; | ||
255 | } | ||
256 | |||
257 | static void kernfs_refresh_inode(struct kernfs_node *kn, struct inode *inode) | ||
258 | { | ||
259 | struct kernfs_iattrs *attrs = kn->iattr; | ||
260 | |||
261 | inode->i_mode = kn->mode; | ||
262 | if (attrs) { | ||
263 | /* | ||
264 | * kernfs_node has non-default attributes get them from | ||
265 | * persistent copy in kernfs_node. | ||
266 | */ | ||
267 | set_inode_attr(inode, &attrs->ia_iattr); | ||
268 | security_inode_notifysecctx(inode, attrs->ia_secdata, | ||
269 | attrs->ia_secdata_len); | ||
270 | } | ||
271 | |||
272 | if (kernfs_type(kn) == KERNFS_DIR) | ||
273 | set_nlink(inode, kn->dir.subdirs + 2); | ||
274 | } | ||
275 | |||
276 | int kernfs_iop_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
277 | struct kstat *stat) | ||
278 | { | ||
279 | struct kernfs_node *kn = dentry->d_fsdata; | ||
280 | struct inode *inode = dentry->d_inode; | ||
281 | |||
282 | mutex_lock(&kernfs_mutex); | ||
283 | kernfs_refresh_inode(kn, inode); | ||
284 | mutex_unlock(&kernfs_mutex); | ||
285 | |||
286 | generic_fillattr(inode, stat); | ||
287 | return 0; | ||
288 | } | ||
289 | |||
290 | static void kernfs_init_inode(struct kernfs_node *kn, struct inode *inode) | ||
291 | { | ||
292 | kernfs_get(kn); | ||
293 | inode->i_private = kn; | ||
294 | inode->i_mapping->a_ops = &kernfs_aops; | ||
295 | inode->i_mapping->backing_dev_info = &kernfs_bdi; | ||
296 | inode->i_op = &kernfs_iops; | ||
297 | |||
298 | set_default_inode_attr(inode, kn->mode); | ||
299 | kernfs_refresh_inode(kn, inode); | ||
300 | |||
301 | /* initialize inode according to type */ | ||
302 | switch (kernfs_type(kn)) { | ||
303 | case KERNFS_DIR: | ||
304 | inode->i_op = &kernfs_dir_iops; | ||
305 | inode->i_fop = &kernfs_dir_fops; | ||
306 | break; | ||
307 | case KERNFS_FILE: | ||
308 | inode->i_size = kn->attr.size; | ||
309 | inode->i_fop = &kernfs_file_fops; | ||
310 | break; | ||
311 | case KERNFS_LINK: | ||
312 | inode->i_op = &kernfs_symlink_iops; | ||
313 | break; | ||
314 | default: | ||
315 | BUG(); | ||
316 | } | ||
317 | |||
318 | unlock_new_inode(inode); | ||
319 | } | ||
320 | |||
321 | /** | ||
322 | * kernfs_get_inode - get inode for kernfs_node | ||
323 | * @sb: super block | ||
324 | * @kn: kernfs_node to allocate inode for | ||
325 | * | ||
326 | * Get inode for @kn. If such inode doesn't exist, a new inode is | ||
327 | * allocated and basics are initialized. New inode is returned | ||
328 | * locked. | ||
329 | * | ||
330 | * LOCKING: | ||
331 | * Kernel thread context (may sleep). | ||
332 | * | ||
333 | * RETURNS: | ||
334 | * Pointer to allocated inode on success, NULL on failure. | ||
335 | */ | ||
336 | struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn) | ||
337 | { | ||
338 | struct inode *inode; | ||
339 | |||
340 | inode = iget_locked(sb, kn->ino); | ||
341 | if (inode && (inode->i_state & I_NEW)) | ||
342 | kernfs_init_inode(kn, inode); | ||
343 | |||
344 | return inode; | ||
345 | } | ||
346 | |||
347 | /* | ||
348 | * The kernfs_node serves as both an inode and a directory entry for | ||
349 | * kernfs. To prevent the kernfs inode numbers from being freed | ||
350 | * prematurely we take a reference to kernfs_node from the kernfs inode. A | ||
351 | * super_operations.evict_inode() implementation is needed to drop that | ||
352 | * reference upon inode destruction. | ||
353 | */ | ||
354 | void kernfs_evict_inode(struct inode *inode) | ||
355 | { | ||
356 | struct kernfs_node *kn = inode->i_private; | ||
357 | |||
358 | truncate_inode_pages(&inode->i_data, 0); | ||
359 | clear_inode(inode); | ||
360 | kernfs_put(kn); | ||
361 | } | ||
362 | |||
363 | int kernfs_iop_permission(struct inode *inode, int mask) | ||
364 | { | ||
365 | struct kernfs_node *kn; | ||
366 | |||
367 | if (mask & MAY_NOT_BLOCK) | ||
368 | return -ECHILD; | ||
369 | |||
370 | kn = inode->i_private; | ||
371 | |||
372 | mutex_lock(&kernfs_mutex); | ||
373 | kernfs_refresh_inode(kn, inode); | ||
374 | mutex_unlock(&kernfs_mutex); | ||
375 | |||
376 | return generic_permission(inode, mask); | ||
377 | } | ||
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h new file mode 100644 index 000000000000..eb536b76374a --- /dev/null +++ b/fs/kernfs/kernfs-internal.h | |||
@@ -0,0 +1,122 @@ | |||
1 | /* | ||
2 | * fs/kernfs/kernfs-internal.h - kernfs internal header file | ||
3 | * | ||
4 | * Copyright (c) 2001-3 Patrick Mochel | ||
5 | * Copyright (c) 2007 SUSE Linux Products GmbH | ||
6 | * Copyright (c) 2007, 2013 Tejun Heo <teheo@suse.de> | ||
7 | * | ||
8 | * This file is released under the GPLv2. | ||
9 | */ | ||
10 | |||
11 | #ifndef __KERNFS_INTERNAL_H | ||
12 | #define __KERNFS_INTERNAL_H | ||
13 | |||
14 | #include <linux/lockdep.h> | ||
15 | #include <linux/fs.h> | ||
16 | #include <linux/mutex.h> | ||
17 | #include <linux/xattr.h> | ||
18 | |||
19 | #include <linux/kernfs.h> | ||
20 | |||
21 | struct kernfs_iattrs { | ||
22 | struct iattr ia_iattr; | ||
23 | void *ia_secdata; | ||
24 | u32 ia_secdata_len; | ||
25 | |||
26 | struct simple_xattrs xattrs; | ||
27 | }; | ||
28 | |||
29 | #define KN_DEACTIVATED_BIAS INT_MIN | ||
30 | |||
31 | /* KERNFS_TYPE_MASK and types are defined in include/linux/kernfs.h */ | ||
32 | |||
33 | /** | ||
34 | * kernfs_root - find out the kernfs_root a kernfs_node belongs to | ||
35 | * @kn: kernfs_node of interest | ||
36 | * | ||
37 | * Return the kernfs_root @kn belongs to. | ||
38 | */ | ||
39 | static inline struct kernfs_root *kernfs_root(struct kernfs_node *kn) | ||
40 | { | ||
41 | /* if parent exists, it's always a dir; otherwise, @sd is a dir */ | ||
42 | if (kn->parent) | ||
43 | kn = kn->parent; | ||
44 | return kn->dir.root; | ||
45 | } | ||
46 | |||
47 | /* | ||
48 | * Context structure to be used while adding/removing nodes. | ||
49 | */ | ||
50 | struct kernfs_addrm_cxt { | ||
51 | struct kernfs_node *removed; | ||
52 | }; | ||
53 | |||
54 | /* | ||
55 | * mount.c | ||
56 | */ | ||
57 | struct kernfs_super_info { | ||
58 | /* | ||
59 | * The root associated with this super_block. Each super_block is | ||
60 | * identified by the root and ns it's associated with. | ||
61 | */ | ||
62 | struct kernfs_root *root; | ||
63 | |||
64 | /* | ||
65 | * Each sb is associated with one namespace tag, currently the | ||
66 | * network namespace of the task which mounted this kernfs | ||
67 | * instance. If multiple tags become necessary, make the following | ||
68 | * an array and compare kernfs_node tag against every entry. | ||
69 | */ | ||
70 | const void *ns; | ||
71 | }; | ||
72 | #define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info)) | ||
73 | |||
74 | extern struct kmem_cache *kernfs_node_cache; | ||
75 | |||
76 | /* | ||
77 | * inode.c | ||
78 | */ | ||
79 | struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn); | ||
80 | void kernfs_evict_inode(struct inode *inode); | ||
81 | int kernfs_iop_permission(struct inode *inode, int mask); | ||
82 | int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr); | ||
83 | int kernfs_iop_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
84 | struct kstat *stat); | ||
85 | int kernfs_iop_setxattr(struct dentry *dentry, const char *name, const void *value, | ||
86 | size_t size, int flags); | ||
87 | int kernfs_iop_removexattr(struct dentry *dentry, const char *name); | ||
88 | ssize_t kernfs_iop_getxattr(struct dentry *dentry, const char *name, void *buf, | ||
89 | size_t size); | ||
90 | ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size); | ||
91 | void kernfs_inode_init(void); | ||
92 | |||
93 | /* | ||
94 | * dir.c | ||
95 | */ | ||
96 | extern struct mutex kernfs_mutex; | ||
97 | extern const struct dentry_operations kernfs_dops; | ||
98 | extern const struct file_operations kernfs_dir_fops; | ||
99 | extern const struct inode_operations kernfs_dir_iops; | ||
100 | |||
101 | struct kernfs_node *kernfs_get_active(struct kernfs_node *kn); | ||
102 | void kernfs_put_active(struct kernfs_node *kn); | ||
103 | void kernfs_addrm_start(struct kernfs_addrm_cxt *acxt); | ||
104 | int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn); | ||
105 | void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt); | ||
106 | struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, | ||
107 | const char *name, umode_t mode, | ||
108 | unsigned flags); | ||
109 | |||
110 | /* | ||
111 | * file.c | ||
112 | */ | ||
113 | extern const struct file_operations kernfs_file_fops; | ||
114 | |||
115 | void kernfs_unmap_bin_file(struct kernfs_node *kn); | ||
116 | |||
117 | /* | ||
118 | * symlink.c | ||
119 | */ | ||
120 | extern const struct inode_operations kernfs_symlink_iops; | ||
121 | |||
122 | #endif /* __KERNFS_INTERNAL_H */ | ||
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c new file mode 100644 index 000000000000..0d6ce895a9ee --- /dev/null +++ b/fs/kernfs/mount.c | |||
@@ -0,0 +1,165 @@ | |||
1 | /* | ||
2 | * fs/kernfs/mount.c - kernfs mount implementation | ||
3 | * | ||
4 | * Copyright (c) 2001-3 Patrick Mochel | ||
5 | * Copyright (c) 2007 SUSE Linux Products GmbH | ||
6 | * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> | ||
7 | * | ||
8 | * This file is released under the GPLv2. | ||
9 | */ | ||
10 | |||
11 | #include <linux/fs.h> | ||
12 | #include <linux/mount.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/magic.h> | ||
15 | #include <linux/slab.h> | ||
16 | #include <linux/pagemap.h> | ||
17 | |||
18 | #include "kernfs-internal.h" | ||
19 | |||
20 | struct kmem_cache *kernfs_node_cache; | ||
21 | |||
22 | static const struct super_operations kernfs_sops = { | ||
23 | .statfs = simple_statfs, | ||
24 | .drop_inode = generic_delete_inode, | ||
25 | .evict_inode = kernfs_evict_inode, | ||
26 | }; | ||
27 | |||
28 | static int kernfs_fill_super(struct super_block *sb) | ||
29 | { | ||
30 | struct kernfs_super_info *info = kernfs_info(sb); | ||
31 | struct inode *inode; | ||
32 | struct dentry *root; | ||
33 | |||
34 | sb->s_blocksize = PAGE_CACHE_SIZE; | ||
35 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; | ||
36 | sb->s_magic = SYSFS_MAGIC; | ||
37 | sb->s_op = &kernfs_sops; | ||
38 | sb->s_time_gran = 1; | ||
39 | |||
40 | /* get root inode, initialize and unlock it */ | ||
41 | mutex_lock(&kernfs_mutex); | ||
42 | inode = kernfs_get_inode(sb, info->root->kn); | ||
43 | mutex_unlock(&kernfs_mutex); | ||
44 | if (!inode) { | ||
45 | pr_debug("kernfs: could not get root inode\n"); | ||
46 | return -ENOMEM; | ||
47 | } | ||
48 | |||
49 | /* instantiate and link root dentry */ | ||
50 | root = d_make_root(inode); | ||
51 | if (!root) { | ||
52 | pr_debug("%s: could not get root dentry!\n", __func__); | ||
53 | return -ENOMEM; | ||
54 | } | ||
55 | kernfs_get(info->root->kn); | ||
56 | root->d_fsdata = info->root->kn; | ||
57 | sb->s_root = root; | ||
58 | sb->s_d_op = &kernfs_dops; | ||
59 | return 0; | ||
60 | } | ||
61 | |||
62 | static int kernfs_test_super(struct super_block *sb, void *data) | ||
63 | { | ||
64 | struct kernfs_super_info *sb_info = kernfs_info(sb); | ||
65 | struct kernfs_super_info *info = data; | ||
66 | |||
67 | return sb_info->root == info->root && sb_info->ns == info->ns; | ||
68 | } | ||
69 | |||
70 | static int kernfs_set_super(struct super_block *sb, void *data) | ||
71 | { | ||
72 | int error; | ||
73 | error = set_anon_super(sb, data); | ||
74 | if (!error) | ||
75 | sb->s_fs_info = data; | ||
76 | return error; | ||
77 | } | ||
78 | |||
79 | /** | ||
80 | * kernfs_super_ns - determine the namespace tag of a kernfs super_block | ||
81 | * @sb: super_block of interest | ||
82 | * | ||
83 | * Return the namespace tag associated with kernfs super_block @sb. | ||
84 | */ | ||
85 | const void *kernfs_super_ns(struct super_block *sb) | ||
86 | { | ||
87 | struct kernfs_super_info *info = kernfs_info(sb); | ||
88 | |||
89 | return info->ns; | ||
90 | } | ||
91 | |||
92 | /** | ||
93 | * kernfs_mount_ns - kernfs mount helper | ||
94 | * @fs_type: file_system_type of the fs being mounted | ||
95 | * @flags: mount flags specified for the mount | ||
96 | * @root: kernfs_root of the hierarchy being mounted | ||
97 | * @ns: optional namespace tag of the mount | ||
98 | * | ||
99 | * This is to be called from each kernfs user's file_system_type->mount() | ||
100 | * implementation, which should pass through the specified @fs_type and | ||
101 | * @flags, and specify the hierarchy and namespace tag to mount via @root | ||
102 | * and @ns, respectively. | ||
103 | * | ||
104 | * The return value can be passed to the vfs layer verbatim. | ||
105 | */ | ||
106 | struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, | ||
107 | struct kernfs_root *root, const void *ns) | ||
108 | { | ||
109 | struct super_block *sb; | ||
110 | struct kernfs_super_info *info; | ||
111 | int error; | ||
112 | |||
113 | info = kzalloc(sizeof(*info), GFP_KERNEL); | ||
114 | if (!info) | ||
115 | return ERR_PTR(-ENOMEM); | ||
116 | |||
117 | info->root = root; | ||
118 | info->ns = ns; | ||
119 | |||
120 | sb = sget(fs_type, kernfs_test_super, kernfs_set_super, flags, info); | ||
121 | if (IS_ERR(sb) || sb->s_fs_info != info) | ||
122 | kfree(info); | ||
123 | if (IS_ERR(sb)) | ||
124 | return ERR_CAST(sb); | ||
125 | if (!sb->s_root) { | ||
126 | error = kernfs_fill_super(sb); | ||
127 | if (error) { | ||
128 | deactivate_locked_super(sb); | ||
129 | return ERR_PTR(error); | ||
130 | } | ||
131 | sb->s_flags |= MS_ACTIVE; | ||
132 | } | ||
133 | |||
134 | return dget(sb->s_root); | ||
135 | } | ||
136 | |||
137 | /** | ||
138 | * kernfs_kill_sb - kill_sb for kernfs | ||
139 | * @sb: super_block being killed | ||
140 | * | ||
141 | * This can be used directly for file_system_type->kill_sb(). If a kernfs | ||
142 | * user needs extra cleanup, it can implement its own kill_sb() and call | ||
143 | * this function at the end. | ||
144 | */ | ||
145 | void kernfs_kill_sb(struct super_block *sb) | ||
146 | { | ||
147 | struct kernfs_super_info *info = kernfs_info(sb); | ||
148 | struct kernfs_node *root_kn = sb->s_root->d_fsdata; | ||
149 | |||
150 | /* | ||
151 | * Remove the superblock from fs_supers/s_instances | ||
152 | * so we can't find it, before freeing kernfs_super_info. | ||
153 | */ | ||
154 | kill_anon_super(sb); | ||
155 | kfree(info); | ||
156 | kernfs_put(root_kn); | ||
157 | } | ||
158 | |||
159 | void __init kernfs_init(void) | ||
160 | { | ||
161 | kernfs_node_cache = kmem_cache_create("kernfs_node_cache", | ||
162 | sizeof(struct kernfs_node), | ||
163 | 0, SLAB_PANIC, NULL); | ||
164 | kernfs_inode_init(); | ||
165 | } | ||
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c new file mode 100644 index 000000000000..4d457055acb9 --- /dev/null +++ b/fs/kernfs/symlink.c | |||
@@ -0,0 +1,151 @@ | |||
1 | /* | ||
2 | * fs/kernfs/symlink.c - kernfs symlink implementation | ||
3 | * | ||
4 | * Copyright (c) 2001-3 Patrick Mochel | ||
5 | * Copyright (c) 2007 SUSE Linux Products GmbH | ||
6 | * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> | ||
7 | * | ||
8 | * This file is released under the GPLv2. | ||
9 | */ | ||
10 | |||
11 | #include <linux/fs.h> | ||
12 | #include <linux/gfp.h> | ||
13 | #include <linux/namei.h> | ||
14 | |||
15 | #include "kernfs-internal.h" | ||
16 | |||
17 | /** | ||
18 | * kernfs_create_link - create a symlink | ||
19 | * @parent: directory to create the symlink in | ||
20 | * @name: name of the symlink | ||
21 | * @target: target node for the symlink to point to | ||
22 | * | ||
23 | * Returns the created node on success, ERR_PTR() value on error. | ||
24 | */ | ||
25 | struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, | ||
26 | const char *name, | ||
27 | struct kernfs_node *target) | ||
28 | { | ||
29 | struct kernfs_node *kn; | ||
30 | struct kernfs_addrm_cxt acxt; | ||
31 | int error; | ||
32 | |||
33 | kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, KERNFS_LINK); | ||
34 | if (!kn) | ||
35 | return ERR_PTR(-ENOMEM); | ||
36 | |||
37 | if (kernfs_ns_enabled(parent)) | ||
38 | kn->ns = target->ns; | ||
39 | kn->symlink.target_kn = target; | ||
40 | kernfs_get(target); /* ref owned by symlink */ | ||
41 | |||
42 | kernfs_addrm_start(&acxt); | ||
43 | error = kernfs_add_one(&acxt, kn); | ||
44 | kernfs_addrm_finish(&acxt); | ||
45 | |||
46 | if (!error) | ||
47 | return kn; | ||
48 | |||
49 | kernfs_put(kn); | ||
50 | return ERR_PTR(error); | ||
51 | } | ||
52 | |||
53 | static int kernfs_get_target_path(struct kernfs_node *parent, | ||
54 | struct kernfs_node *target, char *path) | ||
55 | { | ||
56 | struct kernfs_node *base, *kn; | ||
57 | char *s = path; | ||
58 | int len = 0; | ||
59 | |||
60 | /* go up to the root, stop at the base */ | ||
61 | base = parent; | ||
62 | while (base->parent) { | ||
63 | kn = target->parent; | ||
64 | while (kn->parent && base != kn) | ||
65 | kn = kn->parent; | ||
66 | |||
67 | if (base == kn) | ||
68 | break; | ||
69 | |||
70 | strcpy(s, "../"); | ||
71 | s += 3; | ||
72 | base = base->parent; | ||
73 | } | ||
74 | |||
75 | /* determine end of target string for reverse fillup */ | ||
76 | kn = target; | ||
77 | while (kn->parent && kn != base) { | ||
78 | len += strlen(kn->name) + 1; | ||
79 | kn = kn->parent; | ||
80 | } | ||
81 | |||
82 | /* check limits */ | ||
83 | if (len < 2) | ||
84 | return -EINVAL; | ||
85 | len--; | ||
86 | if ((s - path) + len > PATH_MAX) | ||
87 | return -ENAMETOOLONG; | ||
88 | |||
89 | /* reverse fillup of target string from target to base */ | ||
90 | kn = target; | ||
91 | while (kn->parent && kn != base) { | ||
92 | int slen = strlen(kn->name); | ||
93 | |||
94 | len -= slen; | ||
95 | strncpy(s + len, kn->name, slen); | ||
96 | if (len) | ||
97 | s[--len] = '/'; | ||
98 | |||
99 | kn = kn->parent; | ||
100 | } | ||
101 | |||
102 | return 0; | ||
103 | } | ||
104 | |||
105 | static int kernfs_getlink(struct dentry *dentry, char *path) | ||
106 | { | ||
107 | struct kernfs_node *kn = dentry->d_fsdata; | ||
108 | struct kernfs_node *parent = kn->parent; | ||
109 | struct kernfs_node *target = kn->symlink.target_kn; | ||
110 | int error; | ||
111 | |||
112 | mutex_lock(&kernfs_mutex); | ||
113 | error = kernfs_get_target_path(parent, target, path); | ||
114 | mutex_unlock(&kernfs_mutex); | ||
115 | |||
116 | return error; | ||
117 | } | ||
118 | |||
119 | static void *kernfs_iop_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
120 | { | ||
121 | int error = -ENOMEM; | ||
122 | unsigned long page = get_zeroed_page(GFP_KERNEL); | ||
123 | if (page) { | ||
124 | error = kernfs_getlink(dentry, (char *) page); | ||
125 | if (error < 0) | ||
126 | free_page((unsigned long)page); | ||
127 | } | ||
128 | nd_set_link(nd, error ? ERR_PTR(error) : (char *)page); | ||
129 | return NULL; | ||
130 | } | ||
131 | |||
132 | static void kernfs_iop_put_link(struct dentry *dentry, struct nameidata *nd, | ||
133 | void *cookie) | ||
134 | { | ||
135 | char *page = nd_get_link(nd); | ||
136 | if (!IS_ERR(page)) | ||
137 | free_page((unsigned long)page); | ||
138 | } | ||
139 | |||
140 | const struct inode_operations kernfs_symlink_iops = { | ||
141 | .setxattr = kernfs_iop_setxattr, | ||
142 | .removexattr = kernfs_iop_removexattr, | ||
143 | .getxattr = kernfs_iop_getxattr, | ||
144 | .listxattr = kernfs_iop_listxattr, | ||
145 | .readlink = generic_readlink, | ||
146 | .follow_link = kernfs_iop_follow_link, | ||
147 | .put_link = kernfs_iop_put_link, | ||
148 | .setattr = kernfs_iop_setattr, | ||
149 | .getattr = kernfs_iop_getattr, | ||
150 | .permission = kernfs_iop_permission, | ||
151 | }; | ||
diff --git a/fs/namespace.c b/fs/namespace.c index ac2ce8a766e1..22e536705c45 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -2790,6 +2790,8 @@ void __init mnt_init(void) | |||
2790 | for (u = 0; u < HASH_SIZE; u++) | 2790 | for (u = 0; u < HASH_SIZE; u++) |
2791 | INIT_LIST_HEAD(&mountpoint_hashtable[u]); | 2791 | INIT_LIST_HEAD(&mountpoint_hashtable[u]); |
2792 | 2792 | ||
2793 | kernfs_init(); | ||
2794 | |||
2793 | err = sysfs_init(); | 2795 | err = sysfs_init(); |
2794 | if (err) | 2796 | if (err) |
2795 | printk(KERN_WARNING "%s: sysfs_init error: %d\n", | 2797 | printk(KERN_WARNING "%s: sysfs_init error: %d\n", |
@@ -2886,7 +2888,7 @@ bool fs_fully_visible(struct file_system_type *type) | |||
2886 | struct inode *inode = child->mnt_mountpoint->d_inode; | 2888 | struct inode *inode = child->mnt_mountpoint->d_inode; |
2887 | if (!S_ISDIR(inode->i_mode)) | 2889 | if (!S_ISDIR(inode->i_mode)) |
2888 | goto next; | 2890 | goto next; |
2889 | if (inode->i_nlink != 2) | 2891 | if (inode->i_nlink > 2) |
2890 | goto next; | 2892 | goto next; |
2891 | } | 2893 | } |
2892 | visible = true; | 2894 | visible = true; |
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 9f6b486b6c01..a1a191634abc 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c | |||
@@ -1440,17 +1440,19 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci, | |||
1440 | 1440 | ||
1441 | nilfs_clear_logs(&sci->sc_segbufs); | 1441 | nilfs_clear_logs(&sci->sc_segbufs); |
1442 | 1442 | ||
1443 | err = nilfs_segctor_extend_segments(sci, nilfs, nadd); | ||
1444 | if (unlikely(err)) | ||
1445 | return err; | ||
1446 | |||
1447 | if (sci->sc_stage.flags & NILFS_CF_SUFREED) { | 1443 | if (sci->sc_stage.flags & NILFS_CF_SUFREED) { |
1448 | err = nilfs_sufile_cancel_freev(nilfs->ns_sufile, | 1444 | err = nilfs_sufile_cancel_freev(nilfs->ns_sufile, |
1449 | sci->sc_freesegs, | 1445 | sci->sc_freesegs, |
1450 | sci->sc_nfreesegs, | 1446 | sci->sc_nfreesegs, |
1451 | NULL); | 1447 | NULL); |
1452 | WARN_ON(err); /* do not happen */ | 1448 | WARN_ON(err); /* do not happen */ |
1449 | sci->sc_stage.flags &= ~NILFS_CF_SUFREED; | ||
1453 | } | 1450 | } |
1451 | |||
1452 | err = nilfs_segctor_extend_segments(sci, nilfs, nadd); | ||
1453 | if (unlikely(err)) | ||
1454 | return err; | ||
1455 | |||
1454 | nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); | 1456 | nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); |
1455 | sci->sc_stage = prev_stage; | 1457 | sci->sc_stage = prev_stage; |
1456 | } | 1458 | } |
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 1fedd5f7ccc4..0b9ff4395e6a 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c | |||
@@ -82,20 +82,23 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark) | |||
82 | * events. | 82 | * events. |
83 | */ | 83 | */ |
84 | static int dnotify_handle_event(struct fsnotify_group *group, | 84 | static int dnotify_handle_event(struct fsnotify_group *group, |
85 | struct inode *inode, | ||
85 | struct fsnotify_mark *inode_mark, | 86 | struct fsnotify_mark *inode_mark, |
86 | struct fsnotify_mark *vfsmount_mark, | 87 | struct fsnotify_mark *vfsmount_mark, |
87 | struct fsnotify_event *event) | 88 | u32 mask, void *data, int data_type, |
89 | const unsigned char *file_name) | ||
88 | { | 90 | { |
89 | struct dnotify_mark *dn_mark; | 91 | struct dnotify_mark *dn_mark; |
90 | struct inode *to_tell; | ||
91 | struct dnotify_struct *dn; | 92 | struct dnotify_struct *dn; |
92 | struct dnotify_struct **prev; | 93 | struct dnotify_struct **prev; |
93 | struct fown_struct *fown; | 94 | struct fown_struct *fown; |
94 | __u32 test_mask = event->mask & ~FS_EVENT_ON_CHILD; | 95 | __u32 test_mask = mask & ~FS_EVENT_ON_CHILD; |
95 | 96 | ||
96 | BUG_ON(vfsmount_mark); | 97 | /* not a dir, dnotify doesn't care */ |
98 | if (!S_ISDIR(inode->i_mode)) | ||
99 | return 0; | ||
97 | 100 | ||
98 | to_tell = event->to_tell; | 101 | BUG_ON(vfsmount_mark); |
99 | 102 | ||
100 | dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark); | 103 | dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark); |
101 | 104 | ||
@@ -122,23 +125,6 @@ static int dnotify_handle_event(struct fsnotify_group *group, | |||
122 | return 0; | 125 | return 0; |
123 | } | 126 | } |
124 | 127 | ||
125 | /* | ||
126 | * Given an inode and mask determine if dnotify would be interested in sending | ||
127 | * userspace notification for that pair. | ||
128 | */ | ||
129 | static bool dnotify_should_send_event(struct fsnotify_group *group, | ||
130 | struct inode *inode, | ||
131 | struct fsnotify_mark *inode_mark, | ||
132 | struct fsnotify_mark *vfsmount_mark, | ||
133 | __u32 mask, void *data, int data_type) | ||
134 | { | ||
135 | /* not a dir, dnotify doesn't care */ | ||
136 | if (!S_ISDIR(inode->i_mode)) | ||
137 | return false; | ||
138 | |||
139 | return true; | ||
140 | } | ||
141 | |||
142 | static void dnotify_free_mark(struct fsnotify_mark *fsn_mark) | 128 | static void dnotify_free_mark(struct fsnotify_mark *fsn_mark) |
143 | { | 129 | { |
144 | struct dnotify_mark *dn_mark = container_of(fsn_mark, | 130 | struct dnotify_mark *dn_mark = container_of(fsn_mark, |
@@ -152,10 +138,6 @@ static void dnotify_free_mark(struct fsnotify_mark *fsn_mark) | |||
152 | 138 | ||
153 | static struct fsnotify_ops dnotify_fsnotify_ops = { | 139 | static struct fsnotify_ops dnotify_fsnotify_ops = { |
154 | .handle_event = dnotify_handle_event, | 140 | .handle_event = dnotify_handle_event, |
155 | .should_send_event = dnotify_should_send_event, | ||
156 | .free_group_priv = NULL, | ||
157 | .freeing_mark = NULL, | ||
158 | .free_event_priv = NULL, | ||
159 | }; | 141 | }; |
160 | 142 | ||
161 | /* | 143 | /* |
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 0c2f9122b262..58772623f02a 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c | |||
@@ -9,31 +9,27 @@ | |||
9 | #include <linux/types.h> | 9 | #include <linux/types.h> |
10 | #include <linux/wait.h> | 10 | #include <linux/wait.h> |
11 | 11 | ||
12 | static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new) | 12 | #include "fanotify.h" |
13 | |||
14 | static bool should_merge(struct fsnotify_event *old_fsn, | ||
15 | struct fsnotify_event *new_fsn) | ||
13 | { | 16 | { |
14 | pr_debug("%s: old=%p new=%p\n", __func__, old, new); | 17 | struct fanotify_event_info *old, *new; |
15 | 18 | ||
16 | if (old->to_tell == new->to_tell && | ||
17 | old->data_type == new->data_type && | ||
18 | old->tgid == new->tgid) { | ||
19 | switch (old->data_type) { | ||
20 | case (FSNOTIFY_EVENT_PATH): | ||
21 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | 19 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
22 | /* dont merge two permission events */ | 20 | /* dont merge two permission events */ |
23 | if ((old->mask & FAN_ALL_PERM_EVENTS) && | 21 | if ((old_fsn->mask & FAN_ALL_PERM_EVENTS) && |
24 | (new->mask & FAN_ALL_PERM_EVENTS)) | 22 | (new_fsn->mask & FAN_ALL_PERM_EVENTS)) |
25 | return false; | 23 | return false; |
26 | #endif | 24 | #endif |
27 | if ((old->path.mnt == new->path.mnt) && | 25 | pr_debug("%s: old=%p new=%p\n", __func__, old_fsn, new_fsn); |
28 | (old->path.dentry == new->path.dentry)) | 26 | old = FANOTIFY_E(old_fsn); |
29 | return true; | 27 | new = FANOTIFY_E(new_fsn); |
30 | break; | 28 | |
31 | case (FSNOTIFY_EVENT_NONE): | 29 | if (old_fsn->inode == new_fsn->inode && old->tgid == new->tgid && |
32 | return true; | 30 | old->path.mnt == new->path.mnt && |
33 | default: | 31 | old->path.dentry == new->path.dentry) |
34 | BUG(); | 32 | return true; |
35 | }; | ||
36 | } | ||
37 | return false; | 33 | return false; |
38 | } | 34 | } |
39 | 35 | ||
@@ -41,59 +37,28 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new) | |||
41 | static struct fsnotify_event *fanotify_merge(struct list_head *list, | 37 | static struct fsnotify_event *fanotify_merge(struct list_head *list, |
42 | struct fsnotify_event *event) | 38 | struct fsnotify_event *event) |
43 | { | 39 | { |
44 | struct fsnotify_event_holder *test_holder; | 40 | struct fsnotify_event *test_event; |
45 | struct fsnotify_event *test_event = NULL; | 41 | bool do_merge = false; |
46 | struct fsnotify_event *new_event; | ||
47 | 42 | ||
48 | pr_debug("%s: list=%p event=%p\n", __func__, list, event); | 43 | pr_debug("%s: list=%p event=%p\n", __func__, list, event); |
49 | 44 | ||
50 | 45 | list_for_each_entry_reverse(test_event, list, list) { | |
51 | list_for_each_entry_reverse(test_holder, list, event_list) { | 46 | if (should_merge(test_event, event)) { |
52 | if (should_merge(test_holder->event, event)) { | 47 | do_merge = true; |
53 | test_event = test_holder->event; | ||
54 | break; | 48 | break; |
55 | } | 49 | } |
56 | } | 50 | } |
57 | 51 | ||
58 | if (!test_event) | 52 | if (!do_merge) |
59 | return NULL; | 53 | return NULL; |
60 | 54 | ||
61 | fsnotify_get_event(test_event); | 55 | test_event->mask |= event->mask; |
62 | 56 | return test_event; | |
63 | /* if they are exactly the same we are done */ | ||
64 | if (test_event->mask == event->mask) | ||
65 | return test_event; | ||
66 | |||
67 | /* | ||
68 | * if the refcnt == 2 this is the only queue | ||
69 | * for this event and so we can update the mask | ||
70 | * in place. | ||
71 | */ | ||
72 | if (atomic_read(&test_event->refcnt) == 2) { | ||
73 | test_event->mask |= event->mask; | ||
74 | return test_event; | ||
75 | } | ||
76 | |||
77 | new_event = fsnotify_clone_event(test_event); | ||
78 | |||
79 | /* done with test_event */ | ||
80 | fsnotify_put_event(test_event); | ||
81 | |||
82 | /* couldn't allocate memory, merge was not possible */ | ||
83 | if (unlikely(!new_event)) | ||
84 | return ERR_PTR(-ENOMEM); | ||
85 | |||
86 | /* build new event and replace it on the list */ | ||
87 | new_event->mask = (test_event->mask | event->mask); | ||
88 | fsnotify_replace_event(test_holder, new_event); | ||
89 | |||
90 | /* we hold a reference on new_event from clone_event */ | ||
91 | return new_event; | ||
92 | } | 57 | } |
93 | 58 | ||
94 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | 59 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
95 | static int fanotify_get_response_from_access(struct fsnotify_group *group, | 60 | static int fanotify_get_response_from_access(struct fsnotify_group *group, |
96 | struct fsnotify_event *event) | 61 | struct fanotify_event_info *event) |
97 | { | 62 | { |
98 | int ret; | 63 | int ret; |
99 | 64 | ||
@@ -106,7 +71,6 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group, | |||
106 | return 0; | 71 | return 0; |
107 | 72 | ||
108 | /* userspace responded, convert to something usable */ | 73 | /* userspace responded, convert to something usable */ |
109 | spin_lock(&event->lock); | ||
110 | switch (event->response) { | 74 | switch (event->response) { |
111 | case FAN_ALLOW: | 75 | case FAN_ALLOW: |
112 | ret = 0; | 76 | ret = 0; |
@@ -116,7 +80,6 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group, | |||
116 | ret = -EPERM; | 80 | ret = -EPERM; |
117 | } | 81 | } |
118 | event->response = 0; | 82 | event->response = 0; |
119 | spin_unlock(&event->lock); | ||
120 | 83 | ||
121 | pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__, | 84 | pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__, |
122 | group, event, ret); | 85 | group, event, ret); |
@@ -125,58 +88,17 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group, | |||
125 | } | 88 | } |
126 | #endif | 89 | #endif |
127 | 90 | ||
128 | static int fanotify_handle_event(struct fsnotify_group *group, | 91 | static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, |
129 | struct fsnotify_mark *inode_mark, | ||
130 | struct fsnotify_mark *fanotify_mark, | ||
131 | struct fsnotify_event *event) | ||
132 | { | ||
133 | int ret = 0; | ||
134 | struct fsnotify_event *notify_event = NULL; | ||
135 | |||
136 | BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS); | ||
137 | BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY); | ||
138 | BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE); | ||
139 | BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE); | ||
140 | BUILD_BUG_ON(FAN_OPEN != FS_OPEN); | ||
141 | BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD); | ||
142 | BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW); | ||
143 | BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM); | ||
144 | BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM); | ||
145 | BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR); | ||
146 | |||
147 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); | ||
148 | |||
149 | notify_event = fsnotify_add_notify_event(group, event, NULL, fanotify_merge); | ||
150 | if (IS_ERR(notify_event)) | ||
151 | return PTR_ERR(notify_event); | ||
152 | |||
153 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
154 | if (event->mask & FAN_ALL_PERM_EVENTS) { | ||
155 | /* if we merged we need to wait on the new event */ | ||
156 | if (notify_event) | ||
157 | event = notify_event; | ||
158 | ret = fanotify_get_response_from_access(group, event); | ||
159 | } | ||
160 | #endif | ||
161 | |||
162 | if (notify_event) | ||
163 | fsnotify_put_event(notify_event); | ||
164 | |||
165 | return ret; | ||
166 | } | ||
167 | |||
168 | static bool fanotify_should_send_event(struct fsnotify_group *group, | ||
169 | struct inode *to_tell, | ||
170 | struct fsnotify_mark *inode_mark, | ||
171 | struct fsnotify_mark *vfsmnt_mark, | 92 | struct fsnotify_mark *vfsmnt_mark, |
172 | __u32 event_mask, void *data, int data_type) | 93 | u32 event_mask, |
94 | void *data, int data_type) | ||
173 | { | 95 | { |
174 | __u32 marks_mask, marks_ignored_mask; | 96 | __u32 marks_mask, marks_ignored_mask; |
175 | struct path *path = data; | 97 | struct path *path = data; |
176 | 98 | ||
177 | pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p " | 99 | pr_debug("%s: inode_mark=%p vfsmnt_mark=%p mask=%x data=%p" |
178 | "mask=%x data=%p data_type=%d\n", __func__, group, to_tell, | 100 | " data_type=%d\n", __func__, inode_mark, vfsmnt_mark, |
179 | inode_mark, vfsmnt_mark, event_mask, data, data_type); | 101 | event_mask, data, data_type); |
180 | 102 | ||
181 | /* if we don't have enough info to send an event to userspace say no */ | 103 | /* if we don't have enough info to send an event to userspace say no */ |
182 | if (data_type != FSNOTIFY_EVENT_PATH) | 104 | if (data_type != FSNOTIFY_EVENT_PATH) |
@@ -217,6 +139,74 @@ static bool fanotify_should_send_event(struct fsnotify_group *group, | |||
217 | return false; | 139 | return false; |
218 | } | 140 | } |
219 | 141 | ||
142 | static int fanotify_handle_event(struct fsnotify_group *group, | ||
143 | struct inode *inode, | ||
144 | struct fsnotify_mark *inode_mark, | ||
145 | struct fsnotify_mark *fanotify_mark, | ||
146 | u32 mask, void *data, int data_type, | ||
147 | const unsigned char *file_name) | ||
148 | { | ||
149 | int ret = 0; | ||
150 | struct fanotify_event_info *event; | ||
151 | struct fsnotify_event *fsn_event; | ||
152 | struct fsnotify_event *notify_fsn_event; | ||
153 | |||
154 | BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS); | ||
155 | BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY); | ||
156 | BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE); | ||
157 | BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE); | ||
158 | BUILD_BUG_ON(FAN_OPEN != FS_OPEN); | ||
159 | BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD); | ||
160 | BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW); | ||
161 | BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM); | ||
162 | BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM); | ||
163 | BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR); | ||
164 | |||
165 | if (!fanotify_should_send_event(inode_mark, fanotify_mark, mask, data, | ||
166 | data_type)) | ||
167 | return 0; | ||
168 | |||
169 | pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode, | ||
170 | mask); | ||
171 | |||
172 | event = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL); | ||
173 | if (unlikely(!event)) | ||
174 | return -ENOMEM; | ||
175 | |||
176 | fsn_event = &event->fse; | ||
177 | fsnotify_init_event(fsn_event, inode, mask); | ||
178 | event->tgid = get_pid(task_tgid(current)); | ||
179 | if (data_type == FSNOTIFY_EVENT_PATH) { | ||
180 | struct path *path = data; | ||
181 | event->path = *path; | ||
182 | path_get(&event->path); | ||
183 | } else { | ||
184 | event->path.mnt = NULL; | ||
185 | event->path.dentry = NULL; | ||
186 | } | ||
187 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
188 | event->response = 0; | ||
189 | #endif | ||
190 | |||
191 | notify_fsn_event = fsnotify_add_notify_event(group, fsn_event, | ||
192 | fanotify_merge); | ||
193 | if (notify_fsn_event) { | ||
194 | /* Our event wasn't used in the end. Free it. */ | ||
195 | fsnotify_destroy_event(group, fsn_event); | ||
196 | if (IS_ERR(notify_fsn_event)) | ||
197 | return PTR_ERR(notify_fsn_event); | ||
198 | /* We need to ask about a different events after a merge... */ | ||
199 | event = FANOTIFY_E(notify_fsn_event); | ||
200 | fsn_event = notify_fsn_event; | ||
201 | } | ||
202 | |||
203 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
204 | if (fsn_event->mask & FAN_ALL_PERM_EVENTS) | ||
205 | ret = fanotify_get_response_from_access(group, event); | ||
206 | #endif | ||
207 | return ret; | ||
208 | } | ||
209 | |||
220 | static void fanotify_free_group_priv(struct fsnotify_group *group) | 210 | static void fanotify_free_group_priv(struct fsnotify_group *group) |
221 | { | 211 | { |
222 | struct user_struct *user; | 212 | struct user_struct *user; |
@@ -226,10 +216,18 @@ static void fanotify_free_group_priv(struct fsnotify_group *group) | |||
226 | free_uid(user); | 216 | free_uid(user); |
227 | } | 217 | } |
228 | 218 | ||
219 | static void fanotify_free_event(struct fsnotify_event *fsn_event) | ||
220 | { | ||
221 | struct fanotify_event_info *event; | ||
222 | |||
223 | event = FANOTIFY_E(fsn_event); | ||
224 | path_put(&event->path); | ||
225 | put_pid(event->tgid); | ||
226 | kmem_cache_free(fanotify_event_cachep, event); | ||
227 | } | ||
228 | |||
229 | const struct fsnotify_ops fanotify_fsnotify_ops = { | 229 | const struct fsnotify_ops fanotify_fsnotify_ops = { |
230 | .handle_event = fanotify_handle_event, | 230 | .handle_event = fanotify_handle_event, |
231 | .should_send_event = fanotify_should_send_event, | ||
232 | .free_group_priv = fanotify_free_group_priv, | 231 | .free_group_priv = fanotify_free_group_priv, |
233 | .free_event_priv = NULL, | 232 | .free_event = fanotify_free_event, |
234 | .freeing_mark = NULL, | ||
235 | }; | 233 | }; |
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h new file mode 100644 index 000000000000..0e90174a116a --- /dev/null +++ b/fs/notify/fanotify/fanotify.h | |||
@@ -0,0 +1,23 @@ | |||
1 | #include <linux/fsnotify_backend.h> | ||
2 | #include <linux/path.h> | ||
3 | #include <linux/slab.h> | ||
4 | |||
5 | extern struct kmem_cache *fanotify_event_cachep; | ||
6 | |||
7 | struct fanotify_event_info { | ||
8 | struct fsnotify_event fse; | ||
9 | /* | ||
10 | * We hold ref to this path so it may be dereferenced at any point | ||
11 | * during this object's lifetime | ||
12 | */ | ||
13 | struct path path; | ||
14 | struct pid *tgid; | ||
15 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
16 | u32 response; /* userspace answer to question */ | ||
17 | #endif | ||
18 | }; | ||
19 | |||
20 | static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse) | ||
21 | { | ||
22 | return container_of(fse, struct fanotify_event_info, fse); | ||
23 | } | ||
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index e44cb6427df3..57d7c083cb4b 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c | |||
@@ -19,6 +19,7 @@ | |||
19 | 19 | ||
20 | #include "../../mount.h" | 20 | #include "../../mount.h" |
21 | #include "../fdinfo.h" | 21 | #include "../fdinfo.h" |
22 | #include "fanotify.h" | ||
22 | 23 | ||
23 | #define FANOTIFY_DEFAULT_MAX_EVENTS 16384 | 24 | #define FANOTIFY_DEFAULT_MAX_EVENTS 16384 |
24 | #define FANOTIFY_DEFAULT_MAX_MARKS 8192 | 25 | #define FANOTIFY_DEFAULT_MAX_MARKS 8192 |
@@ -28,11 +29,12 @@ extern const struct fsnotify_ops fanotify_fsnotify_ops; | |||
28 | 29 | ||
29 | static struct kmem_cache *fanotify_mark_cache __read_mostly; | 30 | static struct kmem_cache *fanotify_mark_cache __read_mostly; |
30 | static struct kmem_cache *fanotify_response_event_cache __read_mostly; | 31 | static struct kmem_cache *fanotify_response_event_cache __read_mostly; |
32 | struct kmem_cache *fanotify_event_cachep __read_mostly; | ||
31 | 33 | ||
32 | struct fanotify_response_event { | 34 | struct fanotify_response_event { |
33 | struct list_head list; | 35 | struct list_head list; |
34 | __s32 fd; | 36 | __s32 fd; |
35 | struct fsnotify_event *event; | 37 | struct fanotify_event_info *event; |
36 | }; | 38 | }; |
37 | 39 | ||
38 | /* | 40 | /* |
@@ -61,8 +63,8 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, | |||
61 | } | 63 | } |
62 | 64 | ||
63 | static int create_fd(struct fsnotify_group *group, | 65 | static int create_fd(struct fsnotify_group *group, |
64 | struct fsnotify_event *event, | 66 | struct fanotify_event_info *event, |
65 | struct file **file) | 67 | struct file **file) |
66 | { | 68 | { |
67 | int client_fd; | 69 | int client_fd; |
68 | struct file *new_file; | 70 | struct file *new_file; |
@@ -73,12 +75,6 @@ static int create_fd(struct fsnotify_group *group, | |||
73 | if (client_fd < 0) | 75 | if (client_fd < 0) |
74 | return client_fd; | 76 | return client_fd; |
75 | 77 | ||
76 | if (event->data_type != FSNOTIFY_EVENT_PATH) { | ||
77 | WARN_ON(1); | ||
78 | put_unused_fd(client_fd); | ||
79 | return -EINVAL; | ||
80 | } | ||
81 | |||
82 | /* | 78 | /* |
83 | * we need a new file handle for the userspace program so it can read even if it was | 79 | * we need a new file handle for the userspace program so it can read even if it was |
84 | * originally opened O_WRONLY. | 80 | * originally opened O_WRONLY. |
@@ -109,23 +105,25 @@ static int create_fd(struct fsnotify_group *group, | |||
109 | } | 105 | } |
110 | 106 | ||
111 | static int fill_event_metadata(struct fsnotify_group *group, | 107 | static int fill_event_metadata(struct fsnotify_group *group, |
112 | struct fanotify_event_metadata *metadata, | 108 | struct fanotify_event_metadata *metadata, |
113 | struct fsnotify_event *event, | 109 | struct fsnotify_event *fsn_event, |
114 | struct file **file) | 110 | struct file **file) |
115 | { | 111 | { |
116 | int ret = 0; | 112 | int ret = 0; |
113 | struct fanotify_event_info *event; | ||
117 | 114 | ||
118 | pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, | 115 | pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, |
119 | group, metadata, event); | 116 | group, metadata, fsn_event); |
120 | 117 | ||
121 | *file = NULL; | 118 | *file = NULL; |
119 | event = container_of(fsn_event, struct fanotify_event_info, fse); | ||
122 | metadata->event_len = FAN_EVENT_METADATA_LEN; | 120 | metadata->event_len = FAN_EVENT_METADATA_LEN; |
123 | metadata->metadata_len = FAN_EVENT_METADATA_LEN; | 121 | metadata->metadata_len = FAN_EVENT_METADATA_LEN; |
124 | metadata->vers = FANOTIFY_METADATA_VERSION; | 122 | metadata->vers = FANOTIFY_METADATA_VERSION; |
125 | metadata->reserved = 0; | 123 | metadata->reserved = 0; |
126 | metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS; | 124 | metadata->mask = fsn_event->mask & FAN_ALL_OUTGOING_EVENTS; |
127 | metadata->pid = pid_vnr(event->tgid); | 125 | metadata->pid = pid_vnr(event->tgid); |
128 | if (unlikely(event->mask & FAN_Q_OVERFLOW)) | 126 | if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW)) |
129 | metadata->fd = FAN_NOFD; | 127 | metadata->fd = FAN_NOFD; |
130 | else { | 128 | else { |
131 | metadata->fd = create_fd(group, event, file); | 129 | metadata->fd = create_fd(group, event, file); |
@@ -209,7 +207,7 @@ static int prepare_for_access_response(struct fsnotify_group *group, | |||
209 | if (!re) | 207 | if (!re) |
210 | return -ENOMEM; | 208 | return -ENOMEM; |
211 | 209 | ||
212 | re->event = event; | 210 | re->event = FANOTIFY_E(event); |
213 | re->fd = fd; | 211 | re->fd = fd; |
214 | 212 | ||
215 | mutex_lock(&group->fanotify_data.access_mutex); | 213 | mutex_lock(&group->fanotify_data.access_mutex); |
@@ -217,7 +215,7 @@ static int prepare_for_access_response(struct fsnotify_group *group, | |||
217 | if (atomic_read(&group->fanotify_data.bypass_perm)) { | 215 | if (atomic_read(&group->fanotify_data.bypass_perm)) { |
218 | mutex_unlock(&group->fanotify_data.access_mutex); | 216 | mutex_unlock(&group->fanotify_data.access_mutex); |
219 | kmem_cache_free(fanotify_response_event_cache, re); | 217 | kmem_cache_free(fanotify_response_event_cache, re); |
220 | event->response = FAN_ALLOW; | 218 | FANOTIFY_E(event)->response = FAN_ALLOW; |
221 | return 0; | 219 | return 0; |
222 | } | 220 | } |
223 | 221 | ||
@@ -273,7 +271,7 @@ out_close_fd: | |||
273 | out: | 271 | out: |
274 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | 272 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
275 | if (event->mask & FAN_ALL_PERM_EVENTS) { | 273 | if (event->mask & FAN_ALL_PERM_EVENTS) { |
276 | event->response = FAN_DENY; | 274 | FANOTIFY_E(event)->response = FAN_DENY; |
277 | wake_up(&group->fanotify_data.access_waitq); | 275 | wake_up(&group->fanotify_data.access_waitq); |
278 | } | 276 | } |
279 | #endif | 277 | #endif |
@@ -321,7 +319,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, | |||
321 | if (IS_ERR(kevent)) | 319 | if (IS_ERR(kevent)) |
322 | break; | 320 | break; |
323 | ret = copy_event_to_user(group, kevent, buf); | 321 | ret = copy_event_to_user(group, kevent, buf); |
324 | fsnotify_put_event(kevent); | 322 | fsnotify_destroy_event(group, kevent); |
325 | if (ret < 0) | 323 | if (ret < 0) |
326 | break; | 324 | break; |
327 | buf += ret; | 325 | buf += ret; |
@@ -409,7 +407,7 @@ static int fanotify_release(struct inode *ignored, struct file *file) | |||
409 | static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 407 | static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
410 | { | 408 | { |
411 | struct fsnotify_group *group; | 409 | struct fsnotify_group *group; |
412 | struct fsnotify_event_holder *holder; | 410 | struct fsnotify_event *fsn_event; |
413 | void __user *p; | 411 | void __user *p; |
414 | int ret = -ENOTTY; | 412 | int ret = -ENOTTY; |
415 | size_t send_len = 0; | 413 | size_t send_len = 0; |
@@ -421,7 +419,7 @@ static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long ar | |||
421 | switch (cmd) { | 419 | switch (cmd) { |
422 | case FIONREAD: | 420 | case FIONREAD: |
423 | mutex_lock(&group->notification_mutex); | 421 | mutex_lock(&group->notification_mutex); |
424 | list_for_each_entry(holder, &group->notification_list, event_list) | 422 | list_for_each_entry(fsn_event, &group->notification_list, list) |
425 | send_len += FAN_EVENT_METADATA_LEN; | 423 | send_len += FAN_EVENT_METADATA_LEN; |
426 | mutex_unlock(&group->notification_mutex); | 424 | mutex_unlock(&group->notification_mutex); |
427 | ret = put_user(send_len, (int __user *) p); | 425 | ret = put_user(send_len, (int __user *) p); |
@@ -906,6 +904,7 @@ static int __init fanotify_user_setup(void) | |||
906 | fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC); | 904 | fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC); |
907 | fanotify_response_event_cache = KMEM_CACHE(fanotify_response_event, | 905 | fanotify_response_event_cache = KMEM_CACHE(fanotify_response_event, |
908 | SLAB_PANIC); | 906 | SLAB_PANIC); |
907 | fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC); | ||
909 | 908 | ||
910 | return 0; | 909 | return 0; |
911 | } | 910 | } |
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 4bb21d67d9b1..1d4e1ea2f37c 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c | |||
@@ -128,8 +128,7 @@ static int send_to_group(struct inode *to_tell, | |||
128 | struct fsnotify_mark *vfsmount_mark, | 128 | struct fsnotify_mark *vfsmount_mark, |
129 | __u32 mask, void *data, | 129 | __u32 mask, void *data, |
130 | int data_is, u32 cookie, | 130 | int data_is, u32 cookie, |
131 | const unsigned char *file_name, | 131 | const unsigned char *file_name) |
132 | struct fsnotify_event **event) | ||
133 | { | 132 | { |
134 | struct fsnotify_group *group = NULL; | 133 | struct fsnotify_group *group = NULL; |
135 | __u32 inode_test_mask = 0; | 134 | __u32 inode_test_mask = 0; |
@@ -170,27 +169,17 @@ static int send_to_group(struct inode *to_tell, | |||
170 | 169 | ||
171 | pr_debug("%s: group=%p to_tell=%p mask=%x inode_mark=%p" | 170 | pr_debug("%s: group=%p to_tell=%p mask=%x inode_mark=%p" |
172 | " inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x" | 171 | " inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x" |
173 | " data=%p data_is=%d cookie=%d event=%p\n", | 172 | " data=%p data_is=%d cookie=%d\n", |
174 | __func__, group, to_tell, mask, inode_mark, | 173 | __func__, group, to_tell, mask, inode_mark, |
175 | inode_test_mask, vfsmount_mark, vfsmount_test_mask, data, | 174 | inode_test_mask, vfsmount_mark, vfsmount_test_mask, data, |
176 | data_is, cookie, *event); | 175 | data_is, cookie); |
177 | 176 | ||
178 | if (!inode_test_mask && !vfsmount_test_mask) | 177 | if (!inode_test_mask && !vfsmount_test_mask) |
179 | return 0; | 178 | return 0; |
180 | 179 | ||
181 | if (group->ops->should_send_event(group, to_tell, inode_mark, | 180 | return group->ops->handle_event(group, to_tell, inode_mark, |
182 | vfsmount_mark, mask, data, | 181 | vfsmount_mark, mask, data, data_is, |
183 | data_is) == false) | 182 | file_name); |
184 | return 0; | ||
185 | |||
186 | if (!*event) { | ||
187 | *event = fsnotify_create_event(to_tell, mask, data, | ||
188 | data_is, file_name, | ||
189 | cookie, GFP_KERNEL); | ||
190 | if (!*event) | ||
191 | return -ENOMEM; | ||
192 | } | ||
193 | return group->ops->handle_event(group, inode_mark, vfsmount_mark, *event); | ||
194 | } | 183 | } |
195 | 184 | ||
196 | /* | 185 | /* |
@@ -205,7 +194,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, | |||
205 | struct hlist_node *inode_node = NULL, *vfsmount_node = NULL; | 194 | struct hlist_node *inode_node = NULL, *vfsmount_node = NULL; |
206 | struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL; | 195 | struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL; |
207 | struct fsnotify_group *inode_group, *vfsmount_group; | 196 | struct fsnotify_group *inode_group, *vfsmount_group; |
208 | struct fsnotify_event *event = NULL; | ||
209 | struct mount *mnt; | 197 | struct mount *mnt; |
210 | int idx, ret = 0; | 198 | int idx, ret = 0; |
211 | /* global tests shouldn't care about events on child only the specific event */ | 199 | /* global tests shouldn't care about events on child only the specific event */ |
@@ -258,18 +246,18 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, | |||
258 | 246 | ||
259 | if (inode_group > vfsmount_group) { | 247 | if (inode_group > vfsmount_group) { |
260 | /* handle inode */ | 248 | /* handle inode */ |
261 | ret = send_to_group(to_tell, inode_mark, NULL, mask, data, | 249 | ret = send_to_group(to_tell, inode_mark, NULL, mask, |
262 | data_is, cookie, file_name, &event); | 250 | data, data_is, cookie, file_name); |
263 | /* we didn't use the vfsmount_mark */ | 251 | /* we didn't use the vfsmount_mark */ |
264 | vfsmount_group = NULL; | 252 | vfsmount_group = NULL; |
265 | } else if (vfsmount_group > inode_group) { | 253 | } else if (vfsmount_group > inode_group) { |
266 | ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, data, | 254 | ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, |
267 | data_is, cookie, file_name, &event); | 255 | data, data_is, cookie, file_name); |
268 | inode_group = NULL; | 256 | inode_group = NULL; |
269 | } else { | 257 | } else { |
270 | ret = send_to_group(to_tell, inode_mark, vfsmount_mark, | 258 | ret = send_to_group(to_tell, inode_mark, vfsmount_mark, |
271 | mask, data, data_is, cookie, file_name, | 259 | mask, data, data_is, cookie, |
272 | &event); | 260 | file_name); |
273 | } | 261 | } |
274 | 262 | ||
275 | if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) | 263 | if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) |
@@ -285,12 +273,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, | |||
285 | ret = 0; | 273 | ret = 0; |
286 | out: | 274 | out: |
287 | srcu_read_unlock(&fsnotify_mark_srcu, idx); | 275 | srcu_read_unlock(&fsnotify_mark_srcu, idx); |
288 | /* | ||
289 | * fsnotify_create_event() took a reference so the event can't be cleaned | ||
290 | * up while we are still trying to add it to lists, drop that one. | ||
291 | */ | ||
292 | if (event) | ||
293 | fsnotify_put_event(event); | ||
294 | 276 | ||
295 | return ret; | 277 | return ret; |
296 | } | 278 | } |
diff --git a/fs/notify/group.c b/fs/notify/group.c index bd2625bd88b4..ee674fe2cec7 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c | |||
@@ -99,6 +99,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) | |||
99 | INIT_LIST_HEAD(&group->marks_list); | 99 | INIT_LIST_HEAD(&group->marks_list); |
100 | 100 | ||
101 | group->ops = ops; | 101 | group->ops = ops; |
102 | fsnotify_init_event(&group->overflow_event, NULL, FS_Q_OVERFLOW); | ||
102 | 103 | ||
103 | return group; | 104 | return group; |
104 | } | 105 | } |
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index b6642e4de4bf..485eef3f4407 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h | |||
@@ -2,11 +2,12 @@ | |||
2 | #include <linux/inotify.h> | 2 | #include <linux/inotify.h> |
3 | #include <linux/slab.h> /* struct kmem_cache */ | 3 | #include <linux/slab.h> /* struct kmem_cache */ |
4 | 4 | ||
5 | extern struct kmem_cache *event_priv_cachep; | 5 | struct inotify_event_info { |
6 | 6 | struct fsnotify_event fse; | |
7 | struct inotify_event_private_data { | ||
8 | struct fsnotify_event_private_data fsnotify_event_priv_data; | ||
9 | int wd; | 7 | int wd; |
8 | u32 sync_cookie; | ||
9 | int name_len; | ||
10 | char name[]; | ||
10 | }; | 11 | }; |
11 | 12 | ||
12 | struct inotify_inode_mark { | 13 | struct inotify_inode_mark { |
@@ -14,8 +15,18 @@ struct inotify_inode_mark { | |||
14 | int wd; | 15 | int wd; |
15 | }; | 16 | }; |
16 | 17 | ||
18 | static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse) | ||
19 | { | ||
20 | return container_of(fse, struct inotify_event_info, fse); | ||
21 | } | ||
22 | |||
17 | extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, | 23 | extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, |
18 | struct fsnotify_group *group); | 24 | struct fsnotify_group *group); |
19 | extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv); | 25 | extern int inotify_handle_event(struct fsnotify_group *group, |
26 | struct inode *inode, | ||
27 | struct fsnotify_mark *inode_mark, | ||
28 | struct fsnotify_mark *vfsmount_mark, | ||
29 | u32 mask, void *data, int data_type, | ||
30 | const unsigned char *file_name); | ||
20 | 31 | ||
21 | extern const struct fsnotify_ops inotify_fsnotify_ops; | 32 | extern const struct fsnotify_ops inotify_fsnotify_ops; |
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 4216308b81b4..aad1a35e9af1 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c | |||
@@ -34,100 +34,87 @@ | |||
34 | #include "inotify.h" | 34 | #include "inotify.h" |
35 | 35 | ||
36 | /* | 36 | /* |
37 | * Check if 2 events contain the same information. We do not compare private data | 37 | * Check if 2 events contain the same information. |
38 | * but at this moment that isn't a problem for any know fsnotify listeners. | ||
39 | */ | 38 | */ |
40 | static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new) | 39 | static bool event_compare(struct fsnotify_event *old_fsn, |
40 | struct fsnotify_event *new_fsn) | ||
41 | { | 41 | { |
42 | if ((old->mask == new->mask) && | 42 | struct inotify_event_info *old, *new; |
43 | (old->to_tell == new->to_tell) && | 43 | |
44 | (old->data_type == new->data_type) && | 44 | if (old_fsn->mask & FS_IN_IGNORED) |
45 | (old->name_len == new->name_len)) { | 45 | return false; |
46 | switch (old->data_type) { | 46 | old = INOTIFY_E(old_fsn); |
47 | case (FSNOTIFY_EVENT_INODE): | 47 | new = INOTIFY_E(new_fsn); |
48 | /* remember, after old was put on the wait_q we aren't | 48 | if ((old_fsn->mask == new_fsn->mask) && |
49 | * allowed to look at the inode any more, only thing | 49 | (old_fsn->inode == new_fsn->inode) && |
50 | * left to check was if the file_name is the same */ | 50 | (old->name_len == new->name_len) && |
51 | if (!old->name_len || | 51 | (!old->name_len || !strcmp(old->name, new->name))) |
52 | !strcmp(old->file_name, new->file_name)) | 52 | return true; |
53 | return true; | ||
54 | break; | ||
55 | case (FSNOTIFY_EVENT_PATH): | ||
56 | if ((old->path.mnt == new->path.mnt) && | ||
57 | (old->path.dentry == new->path.dentry)) | ||
58 | return true; | ||
59 | break; | ||
60 | case (FSNOTIFY_EVENT_NONE): | ||
61 | if (old->mask & FS_Q_OVERFLOW) | ||
62 | return true; | ||
63 | else if (old->mask & FS_IN_IGNORED) | ||
64 | return false; | ||
65 | return true; | ||
66 | }; | ||
67 | } | ||
68 | return false; | 53 | return false; |
69 | } | 54 | } |
70 | 55 | ||
71 | static struct fsnotify_event *inotify_merge(struct list_head *list, | 56 | static struct fsnotify_event *inotify_merge(struct list_head *list, |
72 | struct fsnotify_event *event) | 57 | struct fsnotify_event *event) |
73 | { | 58 | { |
74 | struct fsnotify_event_holder *last_holder; | ||
75 | struct fsnotify_event *last_event; | 59 | struct fsnotify_event *last_event; |
76 | 60 | ||
77 | /* and the list better be locked by something too */ | 61 | last_event = list_entry(list->prev, struct fsnotify_event, list); |
78 | spin_lock(&event->lock); | 62 | if (!event_compare(last_event, event)) |
79 | 63 | return NULL; | |
80 | last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list); | ||
81 | last_event = last_holder->event; | ||
82 | if (event_compare(last_event, event)) | ||
83 | fsnotify_get_event(last_event); | ||
84 | else | ||
85 | last_event = NULL; | ||
86 | |||
87 | spin_unlock(&event->lock); | ||
88 | |||
89 | return last_event; | 64 | return last_event; |
90 | } | 65 | } |
91 | 66 | ||
92 | static int inotify_handle_event(struct fsnotify_group *group, | 67 | int inotify_handle_event(struct fsnotify_group *group, |
93 | struct fsnotify_mark *inode_mark, | 68 | struct inode *inode, |
94 | struct fsnotify_mark *vfsmount_mark, | 69 | struct fsnotify_mark *inode_mark, |
95 | struct fsnotify_event *event) | 70 | struct fsnotify_mark *vfsmount_mark, |
71 | u32 mask, void *data, int data_type, | ||
72 | const unsigned char *file_name) | ||
96 | { | 73 | { |
97 | struct inotify_inode_mark *i_mark; | 74 | struct inotify_inode_mark *i_mark; |
98 | struct inode *to_tell; | 75 | struct inotify_event_info *event; |
99 | struct inotify_event_private_data *event_priv; | ||
100 | struct fsnotify_event_private_data *fsn_event_priv; | ||
101 | struct fsnotify_event *added_event; | 76 | struct fsnotify_event *added_event; |
102 | int wd, ret = 0; | 77 | struct fsnotify_event *fsn_event; |
78 | int ret = 0; | ||
79 | int len = 0; | ||
80 | int alloc_len = sizeof(struct inotify_event_info); | ||
103 | 81 | ||
104 | BUG_ON(vfsmount_mark); | 82 | BUG_ON(vfsmount_mark); |
105 | 83 | ||
106 | pr_debug("%s: group=%p event=%p to_tell=%p mask=%x\n", __func__, group, | 84 | if ((inode_mark->mask & FS_EXCL_UNLINK) && |
107 | event, event->to_tell, event->mask); | 85 | (data_type == FSNOTIFY_EVENT_PATH)) { |
86 | struct path *path = data; | ||
108 | 87 | ||
109 | to_tell = event->to_tell; | 88 | if (d_unlinked(path->dentry)) |
89 | return 0; | ||
90 | } | ||
91 | if (file_name) { | ||
92 | len = strlen(file_name); | ||
93 | alloc_len += len + 1; | ||
94 | } | ||
95 | |||
96 | pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode, | ||
97 | mask); | ||
110 | 98 | ||
111 | i_mark = container_of(inode_mark, struct inotify_inode_mark, | 99 | i_mark = container_of(inode_mark, struct inotify_inode_mark, |
112 | fsn_mark); | 100 | fsn_mark); |
113 | wd = i_mark->wd; | ||
114 | 101 | ||
115 | event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL); | 102 | event = kmalloc(alloc_len, GFP_KERNEL); |
116 | if (unlikely(!event_priv)) | 103 | if (unlikely(!event)) |
117 | return -ENOMEM; | 104 | return -ENOMEM; |
118 | 105 | ||
119 | fsn_event_priv = &event_priv->fsnotify_event_priv_data; | 106 | fsn_event = &event->fse; |
120 | 107 | fsnotify_init_event(fsn_event, inode, mask); | |
121 | fsnotify_get_group(group); | 108 | event->wd = i_mark->wd; |
122 | fsn_event_priv->group = group; | 109 | event->name_len = len; |
123 | event_priv->wd = wd; | 110 | if (len) |
111 | strcpy(event->name, file_name); | ||
124 | 112 | ||
125 | added_event = fsnotify_add_notify_event(group, event, fsn_event_priv, inotify_merge); | 113 | added_event = fsnotify_add_notify_event(group, fsn_event, inotify_merge); |
126 | if (added_event) { | 114 | if (added_event) { |
127 | inotify_free_event_priv(fsn_event_priv); | 115 | /* Our event wasn't used in the end. Free it. */ |
128 | if (!IS_ERR(added_event)) | 116 | fsnotify_destroy_event(group, fsn_event); |
129 | fsnotify_put_event(added_event); | 117 | if (IS_ERR(added_event)) |
130 | else | ||
131 | ret = PTR_ERR(added_event); | 118 | ret = PTR_ERR(added_event); |
132 | } | 119 | } |
133 | 120 | ||
@@ -142,22 +129,6 @@ static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify | |||
142 | inotify_ignored_and_remove_idr(fsn_mark, group); | 129 | inotify_ignored_and_remove_idr(fsn_mark, group); |
143 | } | 130 | } |
144 | 131 | ||
145 | static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, | ||
146 | struct fsnotify_mark *inode_mark, | ||
147 | struct fsnotify_mark *vfsmount_mark, | ||
148 | __u32 mask, void *data, int data_type) | ||
149 | { | ||
150 | if ((inode_mark->mask & FS_EXCL_UNLINK) && | ||
151 | (data_type == FSNOTIFY_EVENT_PATH)) { | ||
152 | struct path *path = data; | ||
153 | |||
154 | if (d_unlinked(path->dentry)) | ||
155 | return false; | ||
156 | } | ||
157 | |||
158 | return true; | ||
159 | } | ||
160 | |||
161 | /* | 132 | /* |
162 | * This is NEVER supposed to be called. Inotify marks should either have been | 133 | * This is NEVER supposed to be called. Inotify marks should either have been |
163 | * removed from the idr when the watch was removed or in the | 134 | * removed from the idr when the watch was removed or in the |
@@ -202,22 +173,14 @@ static void inotify_free_group_priv(struct fsnotify_group *group) | |||
202 | free_uid(group->inotify_data.user); | 173 | free_uid(group->inotify_data.user); |
203 | } | 174 | } |
204 | 175 | ||
205 | void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv) | 176 | static void inotify_free_event(struct fsnotify_event *fsn_event) |
206 | { | 177 | { |
207 | struct inotify_event_private_data *event_priv; | 178 | kfree(INOTIFY_E(fsn_event)); |
208 | |||
209 | |||
210 | event_priv = container_of(fsn_event_priv, struct inotify_event_private_data, | ||
211 | fsnotify_event_priv_data); | ||
212 | |||
213 | fsnotify_put_group(fsn_event_priv->group); | ||
214 | kmem_cache_free(event_priv_cachep, event_priv); | ||
215 | } | 179 | } |
216 | 180 | ||
217 | const struct fsnotify_ops inotify_fsnotify_ops = { | 181 | const struct fsnotify_ops inotify_fsnotify_ops = { |
218 | .handle_event = inotify_handle_event, | 182 | .handle_event = inotify_handle_event, |
219 | .should_send_event = inotify_should_send_event, | ||
220 | .free_group_priv = inotify_free_group_priv, | 183 | .free_group_priv = inotify_free_group_priv, |
221 | .free_event_priv = inotify_free_event_priv, | 184 | .free_event = inotify_free_event, |
222 | .freeing_mark = inotify_freeing_mark, | 185 | .freeing_mark = inotify_freeing_mark, |
223 | }; | 186 | }; |
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 60f954a891ab..497395c8274b 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c | |||
@@ -50,7 +50,6 @@ static int inotify_max_queued_events __read_mostly; | |||
50 | static int inotify_max_user_watches __read_mostly; | 50 | static int inotify_max_user_watches __read_mostly; |
51 | 51 | ||
52 | static struct kmem_cache *inotify_inode_mark_cachep __read_mostly; | 52 | static struct kmem_cache *inotify_inode_mark_cachep __read_mostly; |
53 | struct kmem_cache *event_priv_cachep __read_mostly; | ||
54 | 53 | ||
55 | #ifdef CONFIG_SYSCTL | 54 | #ifdef CONFIG_SYSCTL |
56 | 55 | ||
@@ -124,6 +123,16 @@ static unsigned int inotify_poll(struct file *file, poll_table *wait) | |||
124 | return ret; | 123 | return ret; |
125 | } | 124 | } |
126 | 125 | ||
126 | static int round_event_name_len(struct fsnotify_event *fsn_event) | ||
127 | { | ||
128 | struct inotify_event_info *event; | ||
129 | |||
130 | event = INOTIFY_E(fsn_event); | ||
131 | if (!event->name_len) | ||
132 | return 0; | ||
133 | return roundup(event->name_len + 1, sizeof(struct inotify_event)); | ||
134 | } | ||
135 | |||
127 | /* | 136 | /* |
128 | * Get an inotify_kernel_event if one exists and is small | 137 | * Get an inotify_kernel_event if one exists and is small |
129 | * enough to fit in "count". Return an error pointer if | 138 | * enough to fit in "count". Return an error pointer if |
@@ -144,9 +153,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, | |||
144 | 153 | ||
145 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); | 154 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); |
146 | 155 | ||
147 | if (event->name_len) | 156 | event_size += round_event_name_len(event); |
148 | event_size += roundup(event->name_len + 1, event_size); | ||
149 | |||
150 | if (event_size > count) | 157 | if (event_size > count) |
151 | return ERR_PTR(-EINVAL); | 158 | return ERR_PTR(-EINVAL); |
152 | 159 | ||
@@ -164,40 +171,27 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, | |||
164 | * buffer we had in "get_one_event()" above. | 171 | * buffer we had in "get_one_event()" above. |
165 | */ | 172 | */ |
166 | static ssize_t copy_event_to_user(struct fsnotify_group *group, | 173 | static ssize_t copy_event_to_user(struct fsnotify_group *group, |
167 | struct fsnotify_event *event, | 174 | struct fsnotify_event *fsn_event, |
168 | char __user *buf) | 175 | char __user *buf) |
169 | { | 176 | { |
170 | struct inotify_event inotify_event; | 177 | struct inotify_event inotify_event; |
171 | struct fsnotify_event_private_data *fsn_priv; | 178 | struct inotify_event_info *event; |
172 | struct inotify_event_private_data *priv; | ||
173 | size_t event_size = sizeof(struct inotify_event); | 179 | size_t event_size = sizeof(struct inotify_event); |
174 | size_t name_len = 0; | 180 | size_t name_len; |
175 | 181 | size_t pad_name_len; | |
176 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); | ||
177 | 182 | ||
178 | /* we get the inotify watch descriptor from the event private data */ | 183 | pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event); |
179 | spin_lock(&event->lock); | ||
180 | fsn_priv = fsnotify_remove_priv_from_event(group, event); | ||
181 | spin_unlock(&event->lock); | ||
182 | |||
183 | if (!fsn_priv) | ||
184 | inotify_event.wd = -1; | ||
185 | else { | ||
186 | priv = container_of(fsn_priv, struct inotify_event_private_data, | ||
187 | fsnotify_event_priv_data); | ||
188 | inotify_event.wd = priv->wd; | ||
189 | inotify_free_event_priv(fsn_priv); | ||
190 | } | ||
191 | 184 | ||
185 | event = INOTIFY_E(fsn_event); | ||
186 | name_len = event->name_len; | ||
192 | /* | 187 | /* |
193 | * round up event->name_len so it is a multiple of event_size | 188 | * round up name length so it is a multiple of event_size |
194 | * plus an extra byte for the terminating '\0'. | 189 | * plus an extra byte for the terminating '\0'. |
195 | */ | 190 | */ |
196 | if (event->name_len) | 191 | pad_name_len = round_event_name_len(fsn_event); |
197 | name_len = roundup(event->name_len + 1, event_size); | 192 | inotify_event.len = pad_name_len; |
198 | inotify_event.len = name_len; | 193 | inotify_event.mask = inotify_mask_to_arg(fsn_event->mask); |
199 | 194 | inotify_event.wd = event->wd; | |
200 | inotify_event.mask = inotify_mask_to_arg(event->mask); | ||
201 | inotify_event.cookie = event->sync_cookie; | 195 | inotify_event.cookie = event->sync_cookie; |
202 | 196 | ||
203 | /* send the main event */ | 197 | /* send the main event */ |
@@ -209,20 +203,18 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, | |||
209 | /* | 203 | /* |
210 | * fsnotify only stores the pathname, so here we have to send the pathname | 204 | * fsnotify only stores the pathname, so here we have to send the pathname |
211 | * and then pad that pathname out to a multiple of sizeof(inotify_event) | 205 | * and then pad that pathname out to a multiple of sizeof(inotify_event) |
212 | * with zeros. I get my zeros from the nul_inotify_event. | 206 | * with zeros. |
213 | */ | 207 | */ |
214 | if (name_len) { | 208 | if (pad_name_len) { |
215 | unsigned int len_to_zero = name_len - event->name_len; | ||
216 | /* copy the path name */ | 209 | /* copy the path name */ |
217 | if (copy_to_user(buf, event->file_name, event->name_len)) | 210 | if (copy_to_user(buf, event->name, name_len)) |
218 | return -EFAULT; | 211 | return -EFAULT; |
219 | buf += event->name_len; | 212 | buf += name_len; |
220 | 213 | ||
221 | /* fill userspace with 0's */ | 214 | /* fill userspace with 0's */ |
222 | if (clear_user(buf, len_to_zero)) | 215 | if (clear_user(buf, pad_name_len - name_len)) |
223 | return -EFAULT; | 216 | return -EFAULT; |
224 | buf += len_to_zero; | 217 | event_size += pad_name_len; |
225 | event_size += name_len; | ||
226 | } | 218 | } |
227 | 219 | ||
228 | return event_size; | 220 | return event_size; |
@@ -254,7 +246,7 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
254 | if (IS_ERR(kevent)) | 246 | if (IS_ERR(kevent)) |
255 | break; | 247 | break; |
256 | ret = copy_event_to_user(group, kevent, buf); | 248 | ret = copy_event_to_user(group, kevent, buf); |
257 | fsnotify_put_event(kevent); | 249 | fsnotify_destroy_event(group, kevent); |
258 | if (ret < 0) | 250 | if (ret < 0) |
259 | break; | 251 | break; |
260 | buf += ret; | 252 | buf += ret; |
@@ -297,8 +289,7 @@ static long inotify_ioctl(struct file *file, unsigned int cmd, | |||
297 | unsigned long arg) | 289 | unsigned long arg) |
298 | { | 290 | { |
299 | struct fsnotify_group *group; | 291 | struct fsnotify_group *group; |
300 | struct fsnotify_event_holder *holder; | 292 | struct fsnotify_event *fsn_event; |
301 | struct fsnotify_event *event; | ||
302 | void __user *p; | 293 | void __user *p; |
303 | int ret = -ENOTTY; | 294 | int ret = -ENOTTY; |
304 | size_t send_len = 0; | 295 | size_t send_len = 0; |
@@ -311,12 +302,10 @@ static long inotify_ioctl(struct file *file, unsigned int cmd, | |||
311 | switch (cmd) { | 302 | switch (cmd) { |
312 | case FIONREAD: | 303 | case FIONREAD: |
313 | mutex_lock(&group->notification_mutex); | 304 | mutex_lock(&group->notification_mutex); |
314 | list_for_each_entry(holder, &group->notification_list, event_list) { | 305 | list_for_each_entry(fsn_event, &group->notification_list, |
315 | event = holder->event; | 306 | list) { |
316 | send_len += sizeof(struct inotify_event); | 307 | send_len += sizeof(struct inotify_event); |
317 | if (event->name_len) | 308 | send_len += round_event_name_len(fsn_event); |
318 | send_len += roundup(event->name_len + 1, | ||
319 | sizeof(struct inotify_event)); | ||
320 | } | 309 | } |
321 | mutex_unlock(&group->notification_mutex); | 310 | mutex_unlock(&group->notification_mutex); |
322 | ret = put_user(send_len, (int __user *) p); | 311 | ret = put_user(send_len, (int __user *) p); |
@@ -503,43 +492,12 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, | |||
503 | struct fsnotify_group *group) | 492 | struct fsnotify_group *group) |
504 | { | 493 | { |
505 | struct inotify_inode_mark *i_mark; | 494 | struct inotify_inode_mark *i_mark; |
506 | struct fsnotify_event *ignored_event, *notify_event; | ||
507 | struct inotify_event_private_data *event_priv; | ||
508 | struct fsnotify_event_private_data *fsn_event_priv; | ||
509 | int ret; | ||
510 | |||
511 | i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); | ||
512 | |||
513 | ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, | ||
514 | FSNOTIFY_EVENT_NONE, NULL, 0, | ||
515 | GFP_NOFS); | ||
516 | if (!ignored_event) | ||
517 | goto skip_send_ignore; | ||
518 | |||
519 | event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS); | ||
520 | if (unlikely(!event_priv)) | ||
521 | goto skip_send_ignore; | ||
522 | |||
523 | fsn_event_priv = &event_priv->fsnotify_event_priv_data; | ||
524 | |||
525 | fsnotify_get_group(group); | ||
526 | fsn_event_priv->group = group; | ||
527 | event_priv->wd = i_mark->wd; | ||
528 | |||
529 | notify_event = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv, NULL); | ||
530 | if (notify_event) { | ||
531 | if (IS_ERR(notify_event)) | ||
532 | ret = PTR_ERR(notify_event); | ||
533 | else | ||
534 | fsnotify_put_event(notify_event); | ||
535 | inotify_free_event_priv(fsn_event_priv); | ||
536 | } | ||
537 | 495 | ||
538 | skip_send_ignore: | 496 | /* Queue ignore event for the watch */ |
539 | /* matches the reference taken when the event was created */ | 497 | inotify_handle_event(group, NULL, fsn_mark, NULL, FS_IN_IGNORED, |
540 | if (ignored_event) | 498 | NULL, FSNOTIFY_EVENT_NONE, NULL); |
541 | fsnotify_put_event(ignored_event); | ||
542 | 499 | ||
500 | i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); | ||
543 | /* remove this mark from the idr */ | 501 | /* remove this mark from the idr */ |
544 | inotify_remove_from_idr(group, i_mark); | 502 | inotify_remove_from_idr(group, i_mark); |
545 | 503 | ||
@@ -836,7 +794,6 @@ static int __init inotify_user_setup(void) | |||
836 | BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21); | 794 | BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21); |
837 | 795 | ||
838 | inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC); | 796 | inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC); |
839 | event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC); | ||
840 | 797 | ||
841 | inotify_max_queued_events = 16384; | 798 | inotify_max_queued_events = 16384; |
842 | inotify_max_user_instances = 128; | 799 | inotify_max_user_instances = 128; |
diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 7b51b05f160c..952237b8e2d2 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c | |||
@@ -48,15 +48,6 @@ | |||
48 | #include <linux/fsnotify_backend.h> | 48 | #include <linux/fsnotify_backend.h> |
49 | #include "fsnotify.h" | 49 | #include "fsnotify.h" |
50 | 50 | ||
51 | static struct kmem_cache *fsnotify_event_cachep; | ||
52 | static struct kmem_cache *fsnotify_event_holder_cachep; | ||
53 | /* | ||
54 | * This is a magic event we send when the q is too full. Since it doesn't | ||
55 | * hold real event information we just keep one system wide and use it any time | ||
56 | * it is needed. It's refcnt is set 1 at kernel init time and will never | ||
57 | * get set to 0 so it will never get 'freed' | ||
58 | */ | ||
59 | static struct fsnotify_event *q_overflow_event; | ||
60 | static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0); | 51 | static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0); |
61 | 52 | ||
62 | /** | 53 | /** |
@@ -76,60 +67,14 @@ bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group) | |||
76 | return list_empty(&group->notification_list) ? true : false; | 67 | return list_empty(&group->notification_list) ? true : false; |
77 | } | 68 | } |
78 | 69 | ||
79 | void fsnotify_get_event(struct fsnotify_event *event) | 70 | void fsnotify_destroy_event(struct fsnotify_group *group, |
71 | struct fsnotify_event *event) | ||
80 | { | 72 | { |
81 | atomic_inc(&event->refcnt); | 73 | /* Overflow events are per-group and we don't want to free them */ |
82 | } | 74 | if (!event || event->mask == FS_Q_OVERFLOW) |
83 | |||
84 | void fsnotify_put_event(struct fsnotify_event *event) | ||
85 | { | ||
86 | if (!event) | ||
87 | return; | 75 | return; |
88 | 76 | ||
89 | if (atomic_dec_and_test(&event->refcnt)) { | 77 | group->ops->free_event(event); |
90 | pr_debug("%s: event=%p\n", __func__, event); | ||
91 | |||
92 | if (event->data_type == FSNOTIFY_EVENT_PATH) | ||
93 | path_put(&event->path); | ||
94 | |||
95 | BUG_ON(!list_empty(&event->private_data_list)); | ||
96 | |||
97 | kfree(event->file_name); | ||
98 | put_pid(event->tgid); | ||
99 | kmem_cache_free(fsnotify_event_cachep, event); | ||
100 | } | ||
101 | } | ||
102 | |||
103 | struct fsnotify_event_holder *fsnotify_alloc_event_holder(void) | ||
104 | { | ||
105 | return kmem_cache_alloc(fsnotify_event_holder_cachep, GFP_KERNEL); | ||
106 | } | ||
107 | |||
108 | void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder) | ||
109 | { | ||
110 | if (holder) | ||
111 | kmem_cache_free(fsnotify_event_holder_cachep, holder); | ||
112 | } | ||
113 | |||
114 | /* | ||
115 | * Find the private data that the group previously attached to this event when | ||
116 | * the group added the event to the notification queue (fsnotify_add_notify_event) | ||
117 | */ | ||
118 | struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group, struct fsnotify_event *event) | ||
119 | { | ||
120 | struct fsnotify_event_private_data *lpriv; | ||
121 | struct fsnotify_event_private_data *priv = NULL; | ||
122 | |||
123 | assert_spin_locked(&event->lock); | ||
124 | |||
125 | list_for_each_entry(lpriv, &event->private_data_list, event_list) { | ||
126 | if (lpriv->group == group) { | ||
127 | priv = lpriv; | ||
128 | list_del(&priv->event_list); | ||
129 | break; | ||
130 | } | ||
131 | } | ||
132 | return priv; | ||
133 | } | 78 | } |
134 | 79 | ||
135 | /* | 80 | /* |
@@ -137,91 +82,35 @@ struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnot | |||
137 | * event off the queue to deal with. If the event is successfully added to the | 82 | * event off the queue to deal with. If the event is successfully added to the |
138 | * group's notification queue, a reference is taken on event. | 83 | * group's notification queue, a reference is taken on event. |
139 | */ | 84 | */ |
140 | struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event, | 85 | struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group, |
141 | struct fsnotify_event_private_data *priv, | 86 | struct fsnotify_event *event, |
142 | struct fsnotify_event *(*merge)(struct list_head *, | 87 | struct fsnotify_event *(*merge)(struct list_head *, |
143 | struct fsnotify_event *)) | 88 | struct fsnotify_event *)) |
144 | { | 89 | { |
145 | struct fsnotify_event *return_event = NULL; | 90 | struct fsnotify_event *return_event = NULL; |
146 | struct fsnotify_event_holder *holder = NULL; | ||
147 | struct list_head *list = &group->notification_list; | 91 | struct list_head *list = &group->notification_list; |
148 | 92 | ||
149 | pr_debug("%s: group=%p event=%p priv=%p\n", __func__, group, event, priv); | 93 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); |
150 | |||
151 | /* | ||
152 | * There is one fsnotify_event_holder embedded inside each fsnotify_event. | ||
153 | * Check if we expect to be able to use that holder. If not alloc a new | ||
154 | * holder. | ||
155 | * For the overflow event it's possible that something will use the in | ||
156 | * event holder before we get the lock so we may need to jump back and | ||
157 | * alloc a new holder, this can't happen for most events... | ||
158 | */ | ||
159 | if (!list_empty(&event->holder.event_list)) { | ||
160 | alloc_holder: | ||
161 | holder = fsnotify_alloc_event_holder(); | ||
162 | if (!holder) | ||
163 | return ERR_PTR(-ENOMEM); | ||
164 | } | ||
165 | 94 | ||
166 | mutex_lock(&group->notification_mutex); | 95 | mutex_lock(&group->notification_mutex); |
167 | 96 | ||
168 | if (group->q_len >= group->max_events) { | 97 | if (group->q_len >= group->max_events) { |
169 | event = q_overflow_event; | 98 | /* Queue overflow event only if it isn't already queued */ |
170 | 99 | if (list_empty(&group->overflow_event.list)) | |
171 | /* | 100 | event = &group->overflow_event; |
172 | * we need to return the overflow event | ||
173 | * which means we need a ref | ||
174 | */ | ||
175 | fsnotify_get_event(event); | ||
176 | return_event = event; | 101 | return_event = event; |
177 | |||
178 | /* sorry, no private data on the overflow event */ | ||
179 | priv = NULL; | ||
180 | } | 102 | } |
181 | 103 | ||
182 | if (!list_empty(list) && merge) { | 104 | if (!list_empty(list) && merge) { |
183 | struct fsnotify_event *tmp; | 105 | return_event = merge(list, event); |
184 | |||
185 | tmp = merge(list, event); | ||
186 | if (tmp) { | ||
187 | mutex_unlock(&group->notification_mutex); | ||
188 | |||
189 | if (return_event) | ||
190 | fsnotify_put_event(return_event); | ||
191 | if (holder != &event->holder) | ||
192 | fsnotify_destroy_event_holder(holder); | ||
193 | return tmp; | ||
194 | } | ||
195 | } | ||
196 | |||
197 | spin_lock(&event->lock); | ||
198 | |||
199 | if (list_empty(&event->holder.event_list)) { | ||
200 | if (unlikely(holder)) | ||
201 | fsnotify_destroy_event_holder(holder); | ||
202 | holder = &event->holder; | ||
203 | } else if (unlikely(!holder)) { | ||
204 | /* between the time we checked above and got the lock the in | ||
205 | * event holder was used, go back and get a new one */ | ||
206 | spin_unlock(&event->lock); | ||
207 | mutex_unlock(&group->notification_mutex); | ||
208 | |||
209 | if (return_event) { | 106 | if (return_event) { |
210 | fsnotify_put_event(return_event); | 107 | mutex_unlock(&group->notification_mutex); |
211 | return_event = NULL; | 108 | return return_event; |
212 | } | 109 | } |
213 | |||
214 | goto alloc_holder; | ||
215 | } | 110 | } |
216 | 111 | ||
217 | group->q_len++; | 112 | group->q_len++; |
218 | holder->event = event; | 113 | list_add_tail(&event->list, list); |
219 | |||
220 | fsnotify_get_event(event); | ||
221 | list_add_tail(&holder->event_list, list); | ||
222 | if (priv) | ||
223 | list_add_tail(&priv->event_list, &event->private_data_list); | ||
224 | spin_unlock(&event->lock); | ||
225 | mutex_unlock(&group->notification_mutex); | 114 | mutex_unlock(&group->notification_mutex); |
226 | 115 | ||
227 | wake_up(&group->notification_waitq); | 116 | wake_up(&group->notification_waitq); |
@@ -230,32 +119,20 @@ alloc_holder: | |||
230 | } | 119 | } |
231 | 120 | ||
232 | /* | 121 | /* |
233 | * Remove and return the first event from the notification list. There is a | 122 | * Remove and return the first event from the notification list. It is the |
234 | * reference held on this event since it was on the list. It is the responsibility | 123 | * responsibility of the caller to destroy the obtained event |
235 | * of the caller to drop this reference. | ||
236 | */ | 124 | */ |
237 | struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group) | 125 | struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group) |
238 | { | 126 | { |
239 | struct fsnotify_event *event; | 127 | struct fsnotify_event *event; |
240 | struct fsnotify_event_holder *holder; | ||
241 | 128 | ||
242 | BUG_ON(!mutex_is_locked(&group->notification_mutex)); | 129 | BUG_ON(!mutex_is_locked(&group->notification_mutex)); |
243 | 130 | ||
244 | pr_debug("%s: group=%p\n", __func__, group); | 131 | pr_debug("%s: group=%p\n", __func__, group); |
245 | 132 | ||
246 | holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list); | 133 | event = list_first_entry(&group->notification_list, |
247 | 134 | struct fsnotify_event, list); | |
248 | event = holder->event; | 135 | list_del(&event->list); |
249 | |||
250 | spin_lock(&event->lock); | ||
251 | holder->event = NULL; | ||
252 | list_del_init(&holder->event_list); | ||
253 | spin_unlock(&event->lock); | ||
254 | |||
255 | /* event == holder means we are referenced through the in event holder */ | ||
256 | if (holder != &event->holder) | ||
257 | fsnotify_destroy_event_holder(holder); | ||
258 | |||
259 | group->q_len--; | 136 | group->q_len--; |
260 | 137 | ||
261 | return event; | 138 | return event; |
@@ -266,15 +143,10 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group | |||
266 | */ | 143 | */ |
267 | struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group) | 144 | struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group) |
268 | { | 145 | { |
269 | struct fsnotify_event *event; | ||
270 | struct fsnotify_event_holder *holder; | ||
271 | |||
272 | BUG_ON(!mutex_is_locked(&group->notification_mutex)); | 146 | BUG_ON(!mutex_is_locked(&group->notification_mutex)); |
273 | 147 | ||
274 | holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list); | 148 | return list_first_entry(&group->notification_list, |
275 | event = holder->event; | 149 | struct fsnotify_event, list); |
276 | |||
277 | return event; | ||
278 | } | 150 | } |
279 | 151 | ||
280 | /* | 152 | /* |
@@ -284,181 +156,31 @@ struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group) | |||
284 | void fsnotify_flush_notify(struct fsnotify_group *group) | 156 | void fsnotify_flush_notify(struct fsnotify_group *group) |
285 | { | 157 | { |
286 | struct fsnotify_event *event; | 158 | struct fsnotify_event *event; |
287 | struct fsnotify_event_private_data *priv; | ||
288 | 159 | ||
289 | mutex_lock(&group->notification_mutex); | 160 | mutex_lock(&group->notification_mutex); |
290 | while (!fsnotify_notify_queue_is_empty(group)) { | 161 | while (!fsnotify_notify_queue_is_empty(group)) { |
291 | event = fsnotify_remove_notify_event(group); | 162 | event = fsnotify_remove_notify_event(group); |
292 | /* if they don't implement free_event_priv they better not have attached any */ | 163 | fsnotify_destroy_event(group, event); |
293 | if (group->ops->free_event_priv) { | ||
294 | spin_lock(&event->lock); | ||
295 | priv = fsnotify_remove_priv_from_event(group, event); | ||
296 | spin_unlock(&event->lock); | ||
297 | if (priv) | ||
298 | group->ops->free_event_priv(priv); | ||
299 | } | ||
300 | fsnotify_put_event(event); /* matches fsnotify_add_notify_event */ | ||
301 | } | 164 | } |
302 | mutex_unlock(&group->notification_mutex); | 165 | mutex_unlock(&group->notification_mutex); |
303 | } | 166 | } |
304 | 167 | ||
305 | static void initialize_event(struct fsnotify_event *event) | ||
306 | { | ||
307 | INIT_LIST_HEAD(&event->holder.event_list); | ||
308 | atomic_set(&event->refcnt, 1); | ||
309 | |||
310 | spin_lock_init(&event->lock); | ||
311 | |||
312 | INIT_LIST_HEAD(&event->private_data_list); | ||
313 | } | ||
314 | |||
315 | /* | ||
316 | * Caller damn well better be holding whatever mutex is protecting the | ||
317 | * old_holder->event_list and the new_event must be a clean event which | ||
318 | * cannot be found anywhere else in the kernel. | ||
319 | */ | ||
320 | int fsnotify_replace_event(struct fsnotify_event_holder *old_holder, | ||
321 | struct fsnotify_event *new_event) | ||
322 | { | ||
323 | struct fsnotify_event *old_event = old_holder->event; | ||
324 | struct fsnotify_event_holder *new_holder = &new_event->holder; | ||
325 | |||
326 | enum event_spinlock_class { | ||
327 | SPINLOCK_OLD, | ||
328 | SPINLOCK_NEW, | ||
329 | }; | ||
330 | |||
331 | pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, new_event); | ||
332 | |||
333 | /* | ||
334 | * if the new_event's embedded holder is in use someone | ||
335 | * screwed up and didn't give us a clean new event. | ||
336 | */ | ||
337 | BUG_ON(!list_empty(&new_holder->event_list)); | ||
338 | |||
339 | spin_lock_nested(&old_event->lock, SPINLOCK_OLD); | ||
340 | spin_lock_nested(&new_event->lock, SPINLOCK_NEW); | ||
341 | |||
342 | new_holder->event = new_event; | ||
343 | list_replace_init(&old_holder->event_list, &new_holder->event_list); | ||
344 | |||
345 | spin_unlock(&new_event->lock); | ||
346 | spin_unlock(&old_event->lock); | ||
347 | |||
348 | /* event == holder means we are referenced through the in event holder */ | ||
349 | if (old_holder != &old_event->holder) | ||
350 | fsnotify_destroy_event_holder(old_holder); | ||
351 | |||
352 | fsnotify_get_event(new_event); /* on the list take reference */ | ||
353 | fsnotify_put_event(old_event); /* off the list, drop reference */ | ||
354 | |||
355 | return 0; | ||
356 | } | ||
357 | |||
358 | struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event) | ||
359 | { | ||
360 | struct fsnotify_event *event; | ||
361 | |||
362 | event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL); | ||
363 | if (!event) | ||
364 | return NULL; | ||
365 | |||
366 | pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, event); | ||
367 | |||
368 | memcpy(event, old_event, sizeof(*event)); | ||
369 | initialize_event(event); | ||
370 | |||
371 | if (event->name_len) { | ||
372 | event->file_name = kstrdup(old_event->file_name, GFP_KERNEL); | ||
373 | if (!event->file_name) { | ||
374 | kmem_cache_free(fsnotify_event_cachep, event); | ||
375 | return NULL; | ||
376 | } | ||
377 | } | ||
378 | event->tgid = get_pid(old_event->tgid); | ||
379 | if (event->data_type == FSNOTIFY_EVENT_PATH) | ||
380 | path_get(&event->path); | ||
381 | |||
382 | return event; | ||
383 | } | ||
384 | |||
385 | /* | 168 | /* |
386 | * fsnotify_create_event - Allocate a new event which will be sent to each | 169 | * fsnotify_create_event - Allocate a new event which will be sent to each |
387 | * group's handle_event function if the group was interested in this | 170 | * group's handle_event function if the group was interested in this |
388 | * particular event. | 171 | * particular event. |
389 | * | 172 | * |
390 | * @to_tell the inode which is supposed to receive the event (sometimes a | 173 | * @inode the inode which is supposed to receive the event (sometimes a |
391 | * parent of the inode to which the event happened. | 174 | * parent of the inode to which the event happened. |
392 | * @mask what actually happened. | 175 | * @mask what actually happened. |
393 | * @data pointer to the object which was actually affected | 176 | * @data pointer to the object which was actually affected |
394 | * @data_type flag indication if the data is a file, path, inode, nothing... | 177 | * @data_type flag indication if the data is a file, path, inode, nothing... |
395 | * @name the filename, if available | 178 | * @name the filename, if available |
396 | */ | 179 | */ |
397 | struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data, | 180 | void fsnotify_init_event(struct fsnotify_event *event, struct inode *inode, |
398 | int data_type, const unsigned char *name, | 181 | u32 mask) |
399 | u32 cookie, gfp_t gfp) | ||
400 | { | 182 | { |
401 | struct fsnotify_event *event; | 183 | INIT_LIST_HEAD(&event->list); |
402 | 184 | event->inode = inode; | |
403 | event = kmem_cache_zalloc(fsnotify_event_cachep, gfp); | ||
404 | if (!event) | ||
405 | return NULL; | ||
406 | |||
407 | pr_debug("%s: event=%p to_tell=%p mask=%x data=%p data_type=%d\n", | ||
408 | __func__, event, to_tell, mask, data, data_type); | ||
409 | |||
410 | initialize_event(event); | ||
411 | |||
412 | if (name) { | ||
413 | event->file_name = kstrdup(name, gfp); | ||
414 | if (!event->file_name) { | ||
415 | kmem_cache_free(fsnotify_event_cachep, event); | ||
416 | return NULL; | ||
417 | } | ||
418 | event->name_len = strlen(event->file_name); | ||
419 | } | ||
420 | |||
421 | event->tgid = get_pid(task_tgid(current)); | ||
422 | event->sync_cookie = cookie; | ||
423 | event->to_tell = to_tell; | ||
424 | event->data_type = data_type; | ||
425 | |||
426 | switch (data_type) { | ||
427 | case FSNOTIFY_EVENT_PATH: { | ||
428 | struct path *path = data; | ||
429 | event->path.dentry = path->dentry; | ||
430 | event->path.mnt = path->mnt; | ||
431 | path_get(&event->path); | ||
432 | break; | ||
433 | } | ||
434 | case FSNOTIFY_EVENT_INODE: | ||
435 | event->inode = data; | ||
436 | break; | ||
437 | case FSNOTIFY_EVENT_NONE: | ||
438 | event->inode = NULL; | ||
439 | event->path.dentry = NULL; | ||
440 | event->path.mnt = NULL; | ||
441 | break; | ||
442 | default: | ||
443 | BUG(); | ||
444 | } | ||
445 | |||
446 | event->mask = mask; | 185 | event->mask = mask; |
447 | |||
448 | return event; | ||
449 | } | ||
450 | |||
451 | static __init int fsnotify_notification_init(void) | ||
452 | { | ||
453 | fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC); | ||
454 | fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC); | ||
455 | |||
456 | q_overflow_event = fsnotify_create_event(NULL, FS_Q_OVERFLOW, NULL, | ||
457 | FSNOTIFY_EVENT_NONE, NULL, 0, | ||
458 | GFP_KERNEL); | ||
459 | if (!q_overflow_event) | ||
460 | panic("unable to allocate fsnotify q_overflow_event\n"); | ||
461 | |||
462 | return 0; | ||
463 | } | 186 | } |
464 | subsys_initcall(fsnotify_notification_init); | ||
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index f17e58b32989..ce210d4951a1 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile | |||
@@ -38,7 +38,6 @@ ocfs2-objs := \ | |||
38 | symlink.o \ | 38 | symlink.o \ |
39 | sysfile.o \ | 39 | sysfile.o \ |
40 | uptodate.o \ | 40 | uptodate.o \ |
41 | ver.o \ | ||
42 | quota_local.o \ | 41 | quota_local.o \ |
43 | quota_global.o \ | 42 | quota_global.o \ |
44 | xattr.o \ | 43 | xattr.o \ |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index dc7411fe185d..8750ae1b8636 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -7260,14 +7260,8 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
7260 | start = range->start >> osb->s_clustersize_bits; | 7260 | start = range->start >> osb->s_clustersize_bits; |
7261 | len = range->len >> osb->s_clustersize_bits; | 7261 | len = range->len >> osb->s_clustersize_bits; |
7262 | minlen = range->minlen >> osb->s_clustersize_bits; | 7262 | minlen = range->minlen >> osb->s_clustersize_bits; |
7263 | trimmed = 0; | ||
7264 | |||
7265 | if (!len) { | ||
7266 | range->len = 0; | ||
7267 | return 0; | ||
7268 | } | ||
7269 | 7263 | ||
7270 | if (minlen >= osb->bitmap_cpg) | 7264 | if (minlen >= osb->bitmap_cpg || range->len < sb->s_blocksize) |
7271 | return -EINVAL; | 7265 | return -EINVAL; |
7272 | 7266 | ||
7273 | main_bm_inode = ocfs2_get_system_file_inode(osb, | 7267 | main_bm_inode = ocfs2_get_system_file_inode(osb, |
@@ -7293,6 +7287,7 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
7293 | goto out_unlock; | 7287 | goto out_unlock; |
7294 | } | 7288 | } |
7295 | 7289 | ||
7290 | len = range->len >> osb->s_clustersize_bits; | ||
7296 | if (start + len > le32_to_cpu(main_bm->i_clusters)) | 7291 | if (start + len > le32_to_cpu(main_bm->i_clusters)) |
7297 | len = le32_to_cpu(main_bm->i_clusters) - start; | 7292 | len = le32_to_cpu(main_bm->i_clusters) - start; |
7298 | 7293 | ||
@@ -7307,6 +7302,7 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
7307 | last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1); | 7302 | last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1); |
7308 | last_bit = osb->bitmap_cpg; | 7303 | last_bit = osb->bitmap_cpg; |
7309 | 7304 | ||
7305 | trimmed = 0; | ||
7310 | for (group = first_group; group <= last_group;) { | 7306 | for (group = first_group; group <= last_group;) { |
7311 | if (first_bit + len >= osb->bitmap_cpg) | 7307 | if (first_bit + len >= osb->bitmap_cpg) |
7312 | last_bit = osb->bitmap_cpg; | 7308 | last_bit = osb->bitmap_cpg; |
diff --git a/fs/ocfs2/cluster/Makefile b/fs/ocfs2/cluster/Makefile index bc8c5e7d8608..1aefc0350ec3 100644 --- a/fs/ocfs2/cluster/Makefile +++ b/fs/ocfs2/cluster/Makefile | |||
@@ -1,4 +1,4 @@ | |||
1 | obj-$(CONFIG_OCFS2_FS) += ocfs2_nodemanager.o | 1 | obj-$(CONFIG_OCFS2_FS) += ocfs2_nodemanager.o |
2 | 2 | ||
3 | ocfs2_nodemanager-objs := heartbeat.o masklog.o sys.o nodemanager.o \ | 3 | ocfs2_nodemanager-objs := heartbeat.o masklog.o sys.o nodemanager.o \ |
4 | quorum.o tcp.o netdebug.o ver.o | 4 | quorum.o tcp.o netdebug.o |
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index bb240647ca5f..441c84e169e6 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c | |||
@@ -29,7 +29,6 @@ | |||
29 | #include "heartbeat.h" | 29 | #include "heartbeat.h" |
30 | #include "masklog.h" | 30 | #include "masklog.h" |
31 | #include "sys.h" | 31 | #include "sys.h" |
32 | #include "ver.h" | ||
33 | 32 | ||
34 | /* for now we operate under the assertion that there can be only one | 33 | /* for now we operate under the assertion that there can be only one |
35 | * cluster active at a time. Changing this will require trickling | 34 | * cluster active at a time. Changing this will require trickling |
@@ -945,8 +944,6 @@ static int __init init_o2nm(void) | |||
945 | { | 944 | { |
946 | int ret = -1; | 945 | int ret = -1; |
947 | 946 | ||
948 | cluster_print_version(); | ||
949 | |||
950 | ret = o2hb_init(); | 947 | ret = o2hb_init(); |
951 | if (ret) | 948 | if (ret) |
952 | goto out; | 949 | goto out; |
@@ -984,6 +981,7 @@ out: | |||
984 | 981 | ||
985 | MODULE_AUTHOR("Oracle"); | 982 | MODULE_AUTHOR("Oracle"); |
986 | MODULE_LICENSE("GPL"); | 983 | MODULE_LICENSE("GPL"); |
984 | MODULE_DESCRIPTION("OCFS2 cluster management"); | ||
987 | 985 | ||
988 | module_init(init_o2nm) | 986 | module_init(init_o2nm) |
989 | module_exit(exit_o2nm) | 987 | module_exit(exit_o2nm) |
diff --git a/fs/ocfs2/cluster/ver.c b/fs/ocfs2/cluster/ver.c deleted file mode 100644 index a56eee6abad3..000000000000 --- a/fs/ocfs2/cluster/ver.c +++ /dev/null | |||
@@ -1,42 +0,0 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * ver.c | ||
5 | * | ||
6 | * version string | ||
7 | * | ||
8 | * Copyright (C) 2002, 2005 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #include <linux/module.h> | ||
27 | #include <linux/kernel.h> | ||
28 | |||
29 | #include "ver.h" | ||
30 | |||
31 | #define CLUSTER_BUILD_VERSION "1.5.0" | ||
32 | |||
33 | #define VERSION_STR "OCFS2 Node Manager " CLUSTER_BUILD_VERSION | ||
34 | |||
35 | void cluster_print_version(void) | ||
36 | { | ||
37 | printk(KERN_INFO "%s\n", VERSION_STR); | ||
38 | } | ||
39 | |||
40 | MODULE_DESCRIPTION(VERSION_STR); | ||
41 | |||
42 | MODULE_VERSION(CLUSTER_BUILD_VERSION); | ||
diff --git a/fs/ocfs2/cluster/ver.h b/fs/ocfs2/cluster/ver.h deleted file mode 100644 index 32554c3382c2..000000000000 --- a/fs/ocfs2/cluster/ver.h +++ /dev/null | |||
@@ -1,31 +0,0 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * ver.h | ||
5 | * | ||
6 | * Function prototypes | ||
7 | * | ||
8 | * Copyright (C) 2005 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #ifndef O2CLUSTER_VER_H | ||
27 | #define O2CLUSTER_VER_H | ||
28 | |||
29 | void cluster_print_version(void); | ||
30 | |||
31 | #endif /* O2CLUSTER_VER_H */ | ||
diff --git a/fs/ocfs2/dlm/Makefile b/fs/ocfs2/dlm/Makefile index c8a044efbb15..bd1aab1f49a4 100644 --- a/fs/ocfs2/dlm/Makefile +++ b/fs/ocfs2/dlm/Makefile | |||
@@ -3,5 +3,5 @@ ccflags-y := -Ifs/ocfs2 | |||
3 | obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o | 3 | obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o |
4 | 4 | ||
5 | ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \ | 5 | ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \ |
6 | dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o | 6 | dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o |
7 | 7 | ||
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 8b3382abf840..33660a4a52fa 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -43,8 +43,6 @@ | |||
43 | #include "dlmdomain.h" | 43 | #include "dlmdomain.h" |
44 | #include "dlmdebug.h" | 44 | #include "dlmdebug.h" |
45 | 45 | ||
46 | #include "dlmver.h" | ||
47 | |||
48 | #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN) | 46 | #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN) |
49 | #include "cluster/masklog.h" | 47 | #include "cluster/masklog.h" |
50 | 48 | ||
@@ -2328,8 +2326,6 @@ static int __init dlm_init(void) | |||
2328 | { | 2326 | { |
2329 | int status; | 2327 | int status; |
2330 | 2328 | ||
2331 | dlm_print_version(); | ||
2332 | |||
2333 | status = dlm_init_mle_cache(); | 2329 | status = dlm_init_mle_cache(); |
2334 | if (status) { | 2330 | if (status) { |
2335 | mlog(ML_ERROR, "Could not create o2dlm_mle slabcache\n"); | 2331 | mlog(ML_ERROR, "Could not create o2dlm_mle slabcache\n"); |
@@ -2379,6 +2375,7 @@ static void __exit dlm_exit (void) | |||
2379 | 2375 | ||
2380 | MODULE_AUTHOR("Oracle"); | 2376 | MODULE_AUTHOR("Oracle"); |
2381 | MODULE_LICENSE("GPL"); | 2377 | MODULE_LICENSE("GPL"); |
2378 | MODULE_DESCRIPTION("OCFS2 Distributed Lock Management"); | ||
2382 | 2379 | ||
2383 | module_init(dlm_init); | 2380 | module_init(dlm_init); |
2384 | module_exit(dlm_exit); | 2381 | module_exit(dlm_exit); |
diff --git a/fs/ocfs2/dlm/dlmver.c b/fs/ocfs2/dlm/dlmver.c deleted file mode 100644 index dfc0da4d158d..000000000000 --- a/fs/ocfs2/dlm/dlmver.c +++ /dev/null | |||
@@ -1,42 +0,0 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * dlmver.c | ||
5 | * | ||
6 | * version string | ||
7 | * | ||
8 | * Copyright (C) 2002, 2005 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #include <linux/module.h> | ||
27 | #include <linux/kernel.h> | ||
28 | |||
29 | #include "dlmver.h" | ||
30 | |||
31 | #define DLM_BUILD_VERSION "1.5.0" | ||
32 | |||
33 | #define VERSION_STR "OCFS2 DLM " DLM_BUILD_VERSION | ||
34 | |||
35 | void dlm_print_version(void) | ||
36 | { | ||
37 | printk(KERN_INFO "%s\n", VERSION_STR); | ||
38 | } | ||
39 | |||
40 | MODULE_DESCRIPTION(VERSION_STR); | ||
41 | |||
42 | MODULE_VERSION(DLM_BUILD_VERSION); | ||
diff --git a/fs/ocfs2/dlm/dlmver.h b/fs/ocfs2/dlm/dlmver.h deleted file mode 100644 index f674aee77a16..000000000000 --- a/fs/ocfs2/dlm/dlmver.h +++ /dev/null | |||
@@ -1,31 +0,0 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * dlmfsver.h | ||
5 | * | ||
6 | * Function prototypes | ||
7 | * | ||
8 | * Copyright (C) 2005 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #ifndef DLM_VER_H | ||
27 | #define DLM_VER_H | ||
28 | |||
29 | void dlm_print_version(void); | ||
30 | |||
31 | #endif /* DLM_VER_H */ | ||
diff --git a/fs/ocfs2/dlmfs/Makefile b/fs/ocfs2/dlmfs/Makefile index f14be89a6701..eed3db8c5b49 100644 --- a/fs/ocfs2/dlmfs/Makefile +++ b/fs/ocfs2/dlmfs/Makefile | |||
@@ -2,4 +2,4 @@ ccflags-y := -Ifs/ocfs2 | |||
2 | 2 | ||
3 | obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o | 3 | obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o |
4 | 4 | ||
5 | ocfs2_dlmfs-objs := userdlm.o dlmfs.o dlmfsver.o | 5 | ocfs2_dlmfs-objs := userdlm.o dlmfs.o |
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index efa2b3d339e3..09b7d9dac71d 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c | |||
@@ -49,7 +49,6 @@ | |||
49 | 49 | ||
50 | #include "stackglue.h" | 50 | #include "stackglue.h" |
51 | #include "userdlm.h" | 51 | #include "userdlm.h" |
52 | #include "dlmfsver.h" | ||
53 | 52 | ||
54 | #define MLOG_MASK_PREFIX ML_DLMFS | 53 | #define MLOG_MASK_PREFIX ML_DLMFS |
55 | #include "cluster/masklog.h" | 54 | #include "cluster/masklog.h" |
@@ -644,8 +643,6 @@ static int __init init_dlmfs_fs(void) | |||
644 | int status; | 643 | int status; |
645 | int cleanup_inode = 0, cleanup_worker = 0; | 644 | int cleanup_inode = 0, cleanup_worker = 0; |
646 | 645 | ||
647 | dlmfs_print_version(); | ||
648 | |||
649 | status = bdi_init(&dlmfs_backing_dev_info); | 646 | status = bdi_init(&dlmfs_backing_dev_info); |
650 | if (status) | 647 | if (status) |
651 | return status; | 648 | return status; |
@@ -701,6 +698,7 @@ static void __exit exit_dlmfs_fs(void) | |||
701 | 698 | ||
702 | MODULE_AUTHOR("Oracle"); | 699 | MODULE_AUTHOR("Oracle"); |
703 | MODULE_LICENSE("GPL"); | 700 | MODULE_LICENSE("GPL"); |
701 | MODULE_DESCRIPTION("OCFS2 DLM-Filesystem"); | ||
704 | 702 | ||
705 | module_init(init_dlmfs_fs) | 703 | module_init(init_dlmfs_fs) |
706 | module_exit(exit_dlmfs_fs) | 704 | module_exit(exit_dlmfs_fs) |
diff --git a/fs/ocfs2/dlmfs/dlmfsver.c b/fs/ocfs2/dlmfs/dlmfsver.c deleted file mode 100644 index a733b3321f83..000000000000 --- a/fs/ocfs2/dlmfs/dlmfsver.c +++ /dev/null | |||
@@ -1,42 +0,0 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * dlmfsver.c | ||
5 | * | ||
6 | * version string | ||
7 | * | ||
8 | * Copyright (C) 2002, 2005 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #include <linux/module.h> | ||
27 | #include <linux/kernel.h> | ||
28 | |||
29 | #include "dlmfsver.h" | ||
30 | |||
31 | #define DLM_BUILD_VERSION "1.5.0" | ||
32 | |||
33 | #define VERSION_STR "OCFS2 DLMFS " DLM_BUILD_VERSION | ||
34 | |||
35 | void dlmfs_print_version(void) | ||
36 | { | ||
37 | printk(KERN_INFO "%s\n", VERSION_STR); | ||
38 | } | ||
39 | |||
40 | MODULE_DESCRIPTION(VERSION_STR); | ||
41 | |||
42 | MODULE_VERSION(DLM_BUILD_VERSION); | ||
diff --git a/fs/ocfs2/dlmfs/dlmfsver.h b/fs/ocfs2/dlmfs/dlmfsver.h deleted file mode 100644 index f35eadbed25c..000000000000 --- a/fs/ocfs2/dlmfs/dlmfsver.h +++ /dev/null | |||
@@ -1,31 +0,0 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * dlmver.h | ||
5 | * | ||
6 | * Function prototypes | ||
7 | * | ||
8 | * Copyright (C) 2005 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #ifndef DLMFS_VER_H | ||
27 | #define DLMFS_VER_H | ||
28 | |||
29 | void dlmfs_print_version(void); | ||
30 | |||
31 | #endif /* DLMFS_VER_H */ | ||
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 3407b2c62b21..19986959d149 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -2996,6 +2996,8 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) | |||
2996 | 2996 | ||
2997 | /* for now, uuid == domain */ | 2997 | /* for now, uuid == domain */ |
2998 | status = ocfs2_cluster_connect(osb->osb_cluster_stack, | 2998 | status = ocfs2_cluster_connect(osb->osb_cluster_stack, |
2999 | osb->osb_cluster_name, | ||
3000 | strlen(osb->osb_cluster_name), | ||
2999 | osb->uuid_str, | 3001 | osb->uuid_str, |
3000 | strlen(osb->uuid_str), | 3002 | strlen(osb->uuid_str), |
3001 | &lproto, ocfs2_do_node_down, osb, | 3003 | &lproto, ocfs2_do_node_down, osb, |
@@ -3005,7 +3007,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) | |||
3005 | goto bail; | 3007 | goto bail; |
3006 | } | 3008 | } |
3007 | 3009 | ||
3008 | status = ocfs2_cluster_this_node(&osb->node_num); | 3010 | status = ocfs2_cluster_this_node(conn, &osb->node_num); |
3009 | if (status < 0) { | 3011 | if (status < 0) { |
3010 | mlog_errno(status); | 3012 | mlog_errno(status); |
3011 | mlog(ML_ERROR, | 3013 | mlog(ML_ERROR, |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 6fff128cad16..f42eecef6478 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -1869,7 +1869,8 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
1869 | } | 1869 | } |
1870 | size = sr->l_start + sr->l_len; | 1870 | size = sr->l_start + sr->l_len; |
1871 | 1871 | ||
1872 | if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) { | 1872 | if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64 || |
1873 | cmd == OCFS2_IOC_UNRESVSP || cmd == OCFS2_IOC_UNRESVSP64) { | ||
1873 | if (sr->l_len <= 0) { | 1874 | if (sr->l_len <= 0) { |
1874 | ret = -EINVAL; | 1875 | ret = -EINVAL; |
1875 | goto out_inode_unlock; | 1876 | goto out_inode_unlock; |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index fa32ce9b455d..8ca3c29accbf 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
@@ -7,6 +7,7 @@ | |||
7 | 7 | ||
8 | #include <linux/fs.h> | 8 | #include <linux/fs.h> |
9 | #include <linux/mount.h> | 9 | #include <linux/mount.h> |
10 | #include <linux/blkdev.h> | ||
10 | #include <linux/compat.h> | 11 | #include <linux/compat.h> |
11 | 12 | ||
12 | #include <cluster/masklog.h> | 13 | #include <cluster/masklog.h> |
@@ -966,15 +967,21 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
966 | case FITRIM: | 967 | case FITRIM: |
967 | { | 968 | { |
968 | struct super_block *sb = inode->i_sb; | 969 | struct super_block *sb = inode->i_sb; |
970 | struct request_queue *q = bdev_get_queue(sb->s_bdev); | ||
969 | struct fstrim_range range; | 971 | struct fstrim_range range; |
970 | int ret = 0; | 972 | int ret = 0; |
971 | 973 | ||
972 | if (!capable(CAP_SYS_ADMIN)) | 974 | if (!capable(CAP_SYS_ADMIN)) |
973 | return -EPERM; | 975 | return -EPERM; |
974 | 976 | ||
977 | if (!blk_queue_discard(q)) | ||
978 | return -EOPNOTSUPP; | ||
979 | |||
975 | if (copy_from_user(&range, argp, sizeof(range))) | 980 | if (copy_from_user(&range, argp, sizeof(range))) |
976 | return -EFAULT; | 981 | return -EFAULT; |
977 | 982 | ||
983 | range.minlen = max_t(u64, q->limits.discard_granularity, | ||
984 | range.minlen); | ||
978 | ret = ocfs2_trim_fs(sb, &range); | 985 | ret = ocfs2_trim_fs(sb, &range); |
979 | if (ret < 0) | 986 | if (ret < 0) |
980 | return ret; | 987 | return ret; |
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 631a98213474..64c304d668f0 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c | |||
@@ -561,83 +561,6 @@ static void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh, | |||
561 | mlog(0, "found phys_cpos: %u to fit the wanted moving.\n", *phys_cpos); | 561 | mlog(0, "found phys_cpos: %u to fit the wanted moving.\n", *phys_cpos); |
562 | } | 562 | } |
563 | 563 | ||
564 | static int ocfs2_alloc_dinode_update_counts(struct inode *inode, | ||
565 | handle_t *handle, | ||
566 | struct buffer_head *di_bh, | ||
567 | u32 num_bits, | ||
568 | u16 chain) | ||
569 | { | ||
570 | int ret; | ||
571 | u32 tmp_used; | ||
572 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; | ||
573 | struct ocfs2_chain_list *cl = | ||
574 | (struct ocfs2_chain_list *) &di->id2.i_chain; | ||
575 | |||
576 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, | ||
577 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
578 | if (ret < 0) { | ||
579 | mlog_errno(ret); | ||
580 | goto out; | ||
581 | } | ||
582 | |||
583 | tmp_used = le32_to_cpu(di->id1.bitmap1.i_used); | ||
584 | di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used); | ||
585 | le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits); | ||
586 | ocfs2_journal_dirty(handle, di_bh); | ||
587 | |||
588 | out: | ||
589 | return ret; | ||
590 | } | ||
591 | |||
592 | static inline int ocfs2_block_group_set_bits(handle_t *handle, | ||
593 | struct inode *alloc_inode, | ||
594 | struct ocfs2_group_desc *bg, | ||
595 | struct buffer_head *group_bh, | ||
596 | unsigned int bit_off, | ||
597 | unsigned int num_bits) | ||
598 | { | ||
599 | int status; | ||
600 | void *bitmap = bg->bg_bitmap; | ||
601 | int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; | ||
602 | |||
603 | /* All callers get the descriptor via | ||
604 | * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ | ||
605 | BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); | ||
606 | BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); | ||
607 | |||
608 | mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off, | ||
609 | num_bits); | ||
610 | |||
611 | if (ocfs2_is_cluster_bitmap(alloc_inode)) | ||
612 | journal_type = OCFS2_JOURNAL_ACCESS_UNDO; | ||
613 | |||
614 | status = ocfs2_journal_access_gd(handle, | ||
615 | INODE_CACHE(alloc_inode), | ||
616 | group_bh, | ||
617 | journal_type); | ||
618 | if (status < 0) { | ||
619 | mlog_errno(status); | ||
620 | goto bail; | ||
621 | } | ||
622 | |||
623 | le16_add_cpu(&bg->bg_free_bits_count, -num_bits); | ||
624 | if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { | ||
625 | ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit" | ||
626 | " count %u but claims %u are freed. num_bits %d", | ||
627 | (unsigned long long)le64_to_cpu(bg->bg_blkno), | ||
628 | le16_to_cpu(bg->bg_bits), | ||
629 | le16_to_cpu(bg->bg_free_bits_count), num_bits); | ||
630 | return -EROFS; | ||
631 | } | ||
632 | while (num_bits--) | ||
633 | ocfs2_set_bit(bit_off++, bitmap); | ||
634 | |||
635 | ocfs2_journal_dirty(handle, group_bh); | ||
636 | |||
637 | bail: | ||
638 | return status; | ||
639 | } | ||
640 | |||
641 | static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, | 564 | static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, |
642 | u32 cpos, u32 phys_cpos, u32 *new_phys_cpos, | 565 | u32 cpos, u32 phys_cpos, u32 *new_phys_cpos, |
643 | u32 len, int ext_flags) | 566 | u32 len, int ext_flags) |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 3a903470c794..553f53cc73ae 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -387,6 +387,7 @@ struct ocfs2_super | |||
387 | u8 osb_stackflags; | 387 | u8 osb_stackflags; |
388 | 388 | ||
389 | char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; | 389 | char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; |
390 | char osb_cluster_name[OCFS2_CLUSTER_NAME_LEN + 1]; | ||
390 | struct ocfs2_cluster_connection *cconn; | 391 | struct ocfs2_cluster_connection *cconn; |
391 | struct ocfs2_lock_res osb_super_lockres; | 392 | struct ocfs2_lock_res osb_super_lockres; |
392 | struct ocfs2_lock_res osb_rename_lockres; | 393 | struct ocfs2_lock_res osb_rename_lockres; |
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index bf1f8930456f..1724d43d3da1 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c | |||
@@ -398,7 +398,8 @@ static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn) | |||
398 | return 0; | 398 | return 0; |
399 | } | 399 | } |
400 | 400 | ||
401 | static int o2cb_cluster_this_node(unsigned int *node) | 401 | static int o2cb_cluster_this_node(struct ocfs2_cluster_connection *conn, |
402 | unsigned int *node) | ||
402 | { | 403 | { |
403 | int node_num; | 404 | int node_num; |
404 | 405 | ||
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 286edf1e231f..13a8537d8e8b 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/mutex.h> | 23 | #include <linux/mutex.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/reboot.h> | 25 | #include <linux/reboot.h> |
26 | #include <linux/sched.h> | ||
26 | #include <asm/uaccess.h> | 27 | #include <asm/uaccess.h> |
27 | 28 | ||
28 | #include "stackglue.h" | 29 | #include "stackglue.h" |
@@ -102,6 +103,12 @@ | |||
102 | #define OCFS2_TEXT_UUID_LEN 32 | 103 | #define OCFS2_TEXT_UUID_LEN 32 |
103 | #define OCFS2_CONTROL_MESSAGE_VERNUM_LEN 2 | 104 | #define OCFS2_CONTROL_MESSAGE_VERNUM_LEN 2 |
104 | #define OCFS2_CONTROL_MESSAGE_NODENUM_LEN 8 | 105 | #define OCFS2_CONTROL_MESSAGE_NODENUM_LEN 8 |
106 | #define VERSION_LOCK "version_lock" | ||
107 | |||
108 | enum ocfs2_connection_type { | ||
109 | WITH_CONTROLD, | ||
110 | NO_CONTROLD | ||
111 | }; | ||
105 | 112 | ||
106 | /* | 113 | /* |
107 | * ocfs2_live_connection is refcounted because the filesystem and | 114 | * ocfs2_live_connection is refcounted because the filesystem and |
@@ -110,6 +117,13 @@ | |||
110 | struct ocfs2_live_connection { | 117 | struct ocfs2_live_connection { |
111 | struct list_head oc_list; | 118 | struct list_head oc_list; |
112 | struct ocfs2_cluster_connection *oc_conn; | 119 | struct ocfs2_cluster_connection *oc_conn; |
120 | enum ocfs2_connection_type oc_type; | ||
121 | atomic_t oc_this_node; | ||
122 | int oc_our_slot; | ||
123 | struct dlm_lksb oc_version_lksb; | ||
124 | char oc_lvb[DLM_LVB_LEN]; | ||
125 | struct completion oc_sync_wait; | ||
126 | wait_queue_head_t oc_wait; | ||
113 | }; | 127 | }; |
114 | 128 | ||
115 | struct ocfs2_control_private { | 129 | struct ocfs2_control_private { |
@@ -198,20 +212,15 @@ static struct ocfs2_live_connection *ocfs2_connection_find(const char *name) | |||
198 | * mount path. Since the VFS prevents multiple calls to | 212 | * mount path. Since the VFS prevents multiple calls to |
199 | * fill_super(), we can't get dupes here. | 213 | * fill_super(), we can't get dupes here. |
200 | */ | 214 | */ |
201 | static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn, | 215 | static int ocfs2_live_connection_attach(struct ocfs2_cluster_connection *conn, |
202 | struct ocfs2_live_connection **c_ret) | 216 | struct ocfs2_live_connection *c) |
203 | { | 217 | { |
204 | int rc = 0; | 218 | int rc = 0; |
205 | struct ocfs2_live_connection *c; | ||
206 | |||
207 | c = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL); | ||
208 | if (!c) | ||
209 | return -ENOMEM; | ||
210 | 219 | ||
211 | mutex_lock(&ocfs2_control_lock); | 220 | mutex_lock(&ocfs2_control_lock); |
212 | c->oc_conn = conn; | 221 | c->oc_conn = conn; |
213 | 222 | ||
214 | if (atomic_read(&ocfs2_control_opened)) | 223 | if ((c->oc_type == NO_CONTROLD) || atomic_read(&ocfs2_control_opened)) |
215 | list_add(&c->oc_list, &ocfs2_live_connection_list); | 224 | list_add(&c->oc_list, &ocfs2_live_connection_list); |
216 | else { | 225 | else { |
217 | printk(KERN_ERR | 226 | printk(KERN_ERR |
@@ -220,12 +229,6 @@ static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn, | |||
220 | } | 229 | } |
221 | 230 | ||
222 | mutex_unlock(&ocfs2_control_lock); | 231 | mutex_unlock(&ocfs2_control_lock); |
223 | |||
224 | if (!rc) | ||
225 | *c_ret = c; | ||
226 | else | ||
227 | kfree(c); | ||
228 | |||
229 | return rc; | 232 | return rc; |
230 | } | 233 | } |
231 | 234 | ||
@@ -799,18 +802,251 @@ static int fs_protocol_compare(struct ocfs2_protocol_version *existing, | |||
799 | return 0; | 802 | return 0; |
800 | } | 803 | } |
801 | 804 | ||
805 | static void lvb_to_version(char *lvb, struct ocfs2_protocol_version *ver) | ||
806 | { | ||
807 | struct ocfs2_protocol_version *pv = | ||
808 | (struct ocfs2_protocol_version *)lvb; | ||
809 | /* | ||
810 | * ocfs2_protocol_version has two u8 variables, so we don't | ||
811 | * need any endian conversion. | ||
812 | */ | ||
813 | ver->pv_major = pv->pv_major; | ||
814 | ver->pv_minor = pv->pv_minor; | ||
815 | } | ||
816 | |||
817 | static void version_to_lvb(struct ocfs2_protocol_version *ver, char *lvb) | ||
818 | { | ||
819 | struct ocfs2_protocol_version *pv = | ||
820 | (struct ocfs2_protocol_version *)lvb; | ||
821 | /* | ||
822 | * ocfs2_protocol_version has two u8 variables, so we don't | ||
823 | * need any endian conversion. | ||
824 | */ | ||
825 | pv->pv_major = ver->pv_major; | ||
826 | pv->pv_minor = ver->pv_minor; | ||
827 | } | ||
828 | |||
829 | static void sync_wait_cb(void *arg) | ||
830 | { | ||
831 | struct ocfs2_cluster_connection *conn = arg; | ||
832 | struct ocfs2_live_connection *lc = conn->cc_private; | ||
833 | complete(&lc->oc_sync_wait); | ||
834 | } | ||
835 | |||
836 | static int sync_unlock(struct ocfs2_cluster_connection *conn, | ||
837 | struct dlm_lksb *lksb, char *name) | ||
838 | { | ||
839 | int error; | ||
840 | struct ocfs2_live_connection *lc = conn->cc_private; | ||
841 | |||
842 | error = dlm_unlock(conn->cc_lockspace, lksb->sb_lkid, 0, lksb, conn); | ||
843 | if (error) { | ||
844 | printk(KERN_ERR "%s lkid %x error %d\n", | ||
845 | name, lksb->sb_lkid, error); | ||
846 | return error; | ||
847 | } | ||
848 | |||
849 | wait_for_completion(&lc->oc_sync_wait); | ||
850 | |||
851 | if (lksb->sb_status != -DLM_EUNLOCK) { | ||
852 | printk(KERN_ERR "%s lkid %x status %d\n", | ||
853 | name, lksb->sb_lkid, lksb->sb_status); | ||
854 | return -1; | ||
855 | } | ||
856 | return 0; | ||
857 | } | ||
858 | |||
859 | static int sync_lock(struct ocfs2_cluster_connection *conn, | ||
860 | int mode, uint32_t flags, | ||
861 | struct dlm_lksb *lksb, char *name) | ||
862 | { | ||
863 | int error, status; | ||
864 | struct ocfs2_live_connection *lc = conn->cc_private; | ||
865 | |||
866 | error = dlm_lock(conn->cc_lockspace, mode, lksb, flags, | ||
867 | name, strlen(name), | ||
868 | 0, sync_wait_cb, conn, NULL); | ||
869 | if (error) { | ||
870 | printk(KERN_ERR "%s lkid %x flags %x mode %d error %d\n", | ||
871 | name, lksb->sb_lkid, flags, mode, error); | ||
872 | return error; | ||
873 | } | ||
874 | |||
875 | wait_for_completion(&lc->oc_sync_wait); | ||
876 | |||
877 | status = lksb->sb_status; | ||
878 | |||
879 | if (status && status != -EAGAIN) { | ||
880 | printk(KERN_ERR "%s lkid %x flags %x mode %d status %d\n", | ||
881 | name, lksb->sb_lkid, flags, mode, status); | ||
882 | } | ||
883 | |||
884 | return status; | ||
885 | } | ||
886 | |||
887 | |||
888 | static int version_lock(struct ocfs2_cluster_connection *conn, int mode, | ||
889 | int flags) | ||
890 | { | ||
891 | struct ocfs2_live_connection *lc = conn->cc_private; | ||
892 | return sync_lock(conn, mode, flags, | ||
893 | &lc->oc_version_lksb, VERSION_LOCK); | ||
894 | } | ||
895 | |||
896 | static int version_unlock(struct ocfs2_cluster_connection *conn) | ||
897 | { | ||
898 | struct ocfs2_live_connection *lc = conn->cc_private; | ||
899 | return sync_unlock(conn, &lc->oc_version_lksb, VERSION_LOCK); | ||
900 | } | ||
901 | |||
902 | /* get_protocol_version() | ||
903 | * | ||
904 | * To exchange ocfs2 versioning, we use the LVB of the version dlm lock. | ||
905 | * The algorithm is: | ||
906 | * 1. Attempt to take the lock in EX mode (non-blocking). | ||
907 | * 2. If successful (which means it is the first mount), write the | ||
908 | * version number and downconvert to PR lock. | ||
909 | * 3. If unsuccessful (returns -EAGAIN), read the version from the LVB after | ||
910 | * taking the PR lock. | ||
911 | */ | ||
912 | |||
913 | static int get_protocol_version(struct ocfs2_cluster_connection *conn) | ||
914 | { | ||
915 | int ret; | ||
916 | struct ocfs2_live_connection *lc = conn->cc_private; | ||
917 | struct ocfs2_protocol_version pv; | ||
918 | |||
919 | running_proto.pv_major = | ||
920 | ocfs2_user_plugin.sp_max_proto.pv_major; | ||
921 | running_proto.pv_minor = | ||
922 | ocfs2_user_plugin.sp_max_proto.pv_minor; | ||
923 | |||
924 | lc->oc_version_lksb.sb_lvbptr = lc->oc_lvb; | ||
925 | ret = version_lock(conn, DLM_LOCK_EX, | ||
926 | DLM_LKF_VALBLK|DLM_LKF_NOQUEUE); | ||
927 | if (!ret) { | ||
928 | conn->cc_version.pv_major = running_proto.pv_major; | ||
929 | conn->cc_version.pv_minor = running_proto.pv_minor; | ||
930 | version_to_lvb(&running_proto, lc->oc_lvb); | ||
931 | version_lock(conn, DLM_LOCK_PR, DLM_LKF_CONVERT|DLM_LKF_VALBLK); | ||
932 | } else if (ret == -EAGAIN) { | ||
933 | ret = version_lock(conn, DLM_LOCK_PR, DLM_LKF_VALBLK); | ||
934 | if (ret) | ||
935 | goto out; | ||
936 | lvb_to_version(lc->oc_lvb, &pv); | ||
937 | |||
938 | if ((pv.pv_major != running_proto.pv_major) || | ||
939 | (pv.pv_minor > running_proto.pv_minor)) { | ||
940 | ret = -EINVAL; | ||
941 | goto out; | ||
942 | } | ||
943 | |||
944 | conn->cc_version.pv_major = pv.pv_major; | ||
945 | conn->cc_version.pv_minor = pv.pv_minor; | ||
946 | } | ||
947 | out: | ||
948 | return ret; | ||
949 | } | ||
950 | |||
951 | static void user_recover_prep(void *arg) | ||
952 | { | ||
953 | } | ||
954 | |||
955 | static void user_recover_slot(void *arg, struct dlm_slot *slot) | ||
956 | { | ||
957 | struct ocfs2_cluster_connection *conn = arg; | ||
958 | printk(KERN_INFO "ocfs2: Node %d/%d down. Initiating recovery.\n", | ||
959 | slot->nodeid, slot->slot); | ||
960 | conn->cc_recovery_handler(slot->nodeid, conn->cc_recovery_data); | ||
961 | |||
962 | } | ||
963 | |||
964 | static void user_recover_done(void *arg, struct dlm_slot *slots, | ||
965 | int num_slots, int our_slot, | ||
966 | uint32_t generation) | ||
967 | { | ||
968 | struct ocfs2_cluster_connection *conn = arg; | ||
969 | struct ocfs2_live_connection *lc = conn->cc_private; | ||
970 | int i; | ||
971 | |||
972 | for (i = 0; i < num_slots; i++) | ||
973 | if (slots[i].slot == our_slot) { | ||
974 | atomic_set(&lc->oc_this_node, slots[i].nodeid); | ||
975 | break; | ||
976 | } | ||
977 | |||
978 | lc->oc_our_slot = our_slot; | ||
979 | wake_up(&lc->oc_wait); | ||
980 | } | ||
981 | |||
982 | static const struct dlm_lockspace_ops ocfs2_ls_ops = { | ||
983 | .recover_prep = user_recover_prep, | ||
984 | .recover_slot = user_recover_slot, | ||
985 | .recover_done = user_recover_done, | ||
986 | }; | ||
987 | |||
988 | static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn) | ||
989 | { | ||
990 | version_unlock(conn); | ||
991 | dlm_release_lockspace(conn->cc_lockspace, 2); | ||
992 | conn->cc_lockspace = NULL; | ||
993 | ocfs2_live_connection_drop(conn->cc_private); | ||
994 | conn->cc_private = NULL; | ||
995 | return 0; | ||
996 | } | ||
997 | |||
802 | static int user_cluster_connect(struct ocfs2_cluster_connection *conn) | 998 | static int user_cluster_connect(struct ocfs2_cluster_connection *conn) |
803 | { | 999 | { |
804 | dlm_lockspace_t *fsdlm; | 1000 | dlm_lockspace_t *fsdlm; |
805 | struct ocfs2_live_connection *uninitialized_var(control); | 1001 | struct ocfs2_live_connection *lc; |
806 | int rc = 0; | 1002 | int rc, ops_rv; |
807 | 1003 | ||
808 | BUG_ON(conn == NULL); | 1004 | BUG_ON(conn == NULL); |
809 | 1005 | ||
810 | rc = ocfs2_live_connection_new(conn, &control); | 1006 | lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL); |
1007 | if (!lc) { | ||
1008 | rc = -ENOMEM; | ||
1009 | goto out; | ||
1010 | } | ||
1011 | |||
1012 | init_waitqueue_head(&lc->oc_wait); | ||
1013 | init_completion(&lc->oc_sync_wait); | ||
1014 | atomic_set(&lc->oc_this_node, 0); | ||
1015 | conn->cc_private = lc; | ||
1016 | lc->oc_type = NO_CONTROLD; | ||
1017 | |||
1018 | rc = dlm_new_lockspace(conn->cc_name, conn->cc_cluster_name, | ||
1019 | DLM_LSFL_FS, DLM_LVB_LEN, | ||
1020 | &ocfs2_ls_ops, conn, &ops_rv, &fsdlm); | ||
1021 | if (rc) | ||
1022 | goto out; | ||
1023 | |||
1024 | if (ops_rv == -EOPNOTSUPP) { | ||
1025 | lc->oc_type = WITH_CONTROLD; | ||
1026 | printk(KERN_NOTICE "ocfs2: You seem to be using an older " | ||
1027 | "version of dlm_controld and/or ocfs2-tools." | ||
1028 | " Please consider upgrading.\n"); | ||
1029 | } else if (ops_rv) { | ||
1030 | rc = ops_rv; | ||
1031 | goto out; | ||
1032 | } | ||
1033 | conn->cc_lockspace = fsdlm; | ||
1034 | |||
1035 | rc = ocfs2_live_connection_attach(conn, lc); | ||
811 | if (rc) | 1036 | if (rc) |
812 | goto out; | 1037 | goto out; |
813 | 1038 | ||
1039 | if (lc->oc_type == NO_CONTROLD) { | ||
1040 | rc = get_protocol_version(conn); | ||
1041 | if (rc) { | ||
1042 | printk(KERN_ERR "ocfs2: Could not determine" | ||
1043 | " locking version\n"); | ||
1044 | user_cluster_disconnect(conn); | ||
1045 | goto out; | ||
1046 | } | ||
1047 | wait_event(lc->oc_wait, (atomic_read(&lc->oc_this_node) > 0)); | ||
1048 | } | ||
1049 | |||
814 | /* | 1050 | /* |
815 | * running_proto must have been set before we allowed any mounts | 1051 | * running_proto must have been set before we allowed any mounts |
816 | * to proceed. | 1052 | * to proceed. |
@@ -818,42 +1054,34 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn) | |||
818 | if (fs_protocol_compare(&running_proto, &conn->cc_version)) { | 1054 | if (fs_protocol_compare(&running_proto, &conn->cc_version)) { |
819 | printk(KERN_ERR | 1055 | printk(KERN_ERR |
820 | "Unable to mount with fs locking protocol version " | 1056 | "Unable to mount with fs locking protocol version " |
821 | "%u.%u because the userspace control daemon has " | 1057 | "%u.%u because negotiated protocol is %u.%u\n", |
822 | "negotiated %u.%u\n", | ||
823 | conn->cc_version.pv_major, conn->cc_version.pv_minor, | 1058 | conn->cc_version.pv_major, conn->cc_version.pv_minor, |
824 | running_proto.pv_major, running_proto.pv_minor); | 1059 | running_proto.pv_major, running_proto.pv_minor); |
825 | rc = -EPROTO; | 1060 | rc = -EPROTO; |
826 | ocfs2_live_connection_drop(control); | 1061 | ocfs2_live_connection_drop(lc); |
827 | goto out; | 1062 | lc = NULL; |
828 | } | ||
829 | |||
830 | rc = dlm_new_lockspace(conn->cc_name, NULL, DLM_LSFL_FS, DLM_LVB_LEN, | ||
831 | NULL, NULL, NULL, &fsdlm); | ||
832 | if (rc) { | ||
833 | ocfs2_live_connection_drop(control); | ||
834 | goto out; | ||
835 | } | 1063 | } |
836 | 1064 | ||
837 | conn->cc_private = control; | ||
838 | conn->cc_lockspace = fsdlm; | ||
839 | out: | 1065 | out: |
1066 | if (rc && lc) | ||
1067 | kfree(lc); | ||
840 | return rc; | 1068 | return rc; |
841 | } | 1069 | } |
842 | 1070 | ||
843 | static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn) | ||
844 | { | ||
845 | dlm_release_lockspace(conn->cc_lockspace, 2); | ||
846 | conn->cc_lockspace = NULL; | ||
847 | ocfs2_live_connection_drop(conn->cc_private); | ||
848 | conn->cc_private = NULL; | ||
849 | return 0; | ||
850 | } | ||
851 | 1071 | ||
852 | static int user_cluster_this_node(unsigned int *this_node) | 1072 | static int user_cluster_this_node(struct ocfs2_cluster_connection *conn, |
1073 | unsigned int *this_node) | ||
853 | { | 1074 | { |
854 | int rc; | 1075 | int rc; |
1076 | struct ocfs2_live_connection *lc = conn->cc_private; | ||
1077 | |||
1078 | if (lc->oc_type == WITH_CONTROLD) | ||
1079 | rc = ocfs2_control_get_this_node(); | ||
1080 | else if (lc->oc_type == NO_CONTROLD) | ||
1081 | rc = atomic_read(&lc->oc_this_node); | ||
1082 | else | ||
1083 | rc = -EINVAL; | ||
855 | 1084 | ||
856 | rc = ocfs2_control_get_this_node(); | ||
857 | if (rc < 0) | 1085 | if (rc < 0) |
858 | return rc; | 1086 | return rc; |
859 | 1087 | ||
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index cb7ec0b63ddc..1324e6600e57 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c | |||
@@ -309,6 +309,8 @@ int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino, | |||
309 | EXPORT_SYMBOL_GPL(ocfs2_plock); | 309 | EXPORT_SYMBOL_GPL(ocfs2_plock); |
310 | 310 | ||
311 | int ocfs2_cluster_connect(const char *stack_name, | 311 | int ocfs2_cluster_connect(const char *stack_name, |
312 | const char *cluster_name, | ||
313 | int cluster_name_len, | ||
312 | const char *group, | 314 | const char *group, |
313 | int grouplen, | 315 | int grouplen, |
314 | struct ocfs2_locking_protocol *lproto, | 316 | struct ocfs2_locking_protocol *lproto, |
@@ -342,8 +344,10 @@ int ocfs2_cluster_connect(const char *stack_name, | |||
342 | goto out; | 344 | goto out; |
343 | } | 345 | } |
344 | 346 | ||
345 | memcpy(new_conn->cc_name, group, grouplen); | 347 | strlcpy(new_conn->cc_name, group, GROUP_NAME_MAX + 1); |
346 | new_conn->cc_namelen = grouplen; | 348 | new_conn->cc_namelen = grouplen; |
349 | strlcpy(new_conn->cc_cluster_name, cluster_name, CLUSTER_NAME_MAX + 1); | ||
350 | new_conn->cc_cluster_name_len = cluster_name_len; | ||
347 | new_conn->cc_recovery_handler = recovery_handler; | 351 | new_conn->cc_recovery_handler = recovery_handler; |
348 | new_conn->cc_recovery_data = recovery_data; | 352 | new_conn->cc_recovery_data = recovery_data; |
349 | 353 | ||
@@ -386,8 +390,9 @@ int ocfs2_cluster_connect_agnostic(const char *group, | |||
386 | 390 | ||
387 | if (cluster_stack_name[0]) | 391 | if (cluster_stack_name[0]) |
388 | stack_name = cluster_stack_name; | 392 | stack_name = cluster_stack_name; |
389 | return ocfs2_cluster_connect(stack_name, group, grouplen, lproto, | 393 | return ocfs2_cluster_connect(stack_name, NULL, 0, group, grouplen, |
390 | recovery_handler, recovery_data, conn); | 394 | lproto, recovery_handler, recovery_data, |
395 | conn); | ||
391 | } | 396 | } |
392 | EXPORT_SYMBOL_GPL(ocfs2_cluster_connect_agnostic); | 397 | EXPORT_SYMBOL_GPL(ocfs2_cluster_connect_agnostic); |
393 | 398 | ||
@@ -460,9 +465,10 @@ void ocfs2_cluster_hangup(const char *group, int grouplen) | |||
460 | } | 465 | } |
461 | EXPORT_SYMBOL_GPL(ocfs2_cluster_hangup); | 466 | EXPORT_SYMBOL_GPL(ocfs2_cluster_hangup); |
462 | 467 | ||
463 | int ocfs2_cluster_this_node(unsigned int *node) | 468 | int ocfs2_cluster_this_node(struct ocfs2_cluster_connection *conn, |
469 | unsigned int *node) | ||
464 | { | 470 | { |
465 | return active_stack->sp_ops->this_node(node); | 471 | return active_stack->sp_ops->this_node(conn, node); |
466 | } | 472 | } |
467 | EXPORT_SYMBOL_GPL(ocfs2_cluster_this_node); | 473 | EXPORT_SYMBOL_GPL(ocfs2_cluster_this_node); |
468 | 474 | ||
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index 1ec56fdb8d0d..66334a30cea8 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h | |||
@@ -45,6 +45,9 @@ struct file_lock; | |||
45 | */ | 45 | */ |
46 | #define GROUP_NAME_MAX 64 | 46 | #define GROUP_NAME_MAX 64 |
47 | 47 | ||
48 | /* This shadows OCFS2_CLUSTER_NAME_LEN */ | ||
49 | #define CLUSTER_NAME_MAX 16 | ||
50 | |||
48 | 51 | ||
49 | /* | 52 | /* |
50 | * ocfs2_protocol_version changes when ocfs2 does something different in | 53 | * ocfs2_protocol_version changes when ocfs2 does something different in |
@@ -97,8 +100,10 @@ struct ocfs2_locking_protocol { | |||
97 | * locking compatibility. | 100 | * locking compatibility. |
98 | */ | 101 | */ |
99 | struct ocfs2_cluster_connection { | 102 | struct ocfs2_cluster_connection { |
100 | char cc_name[GROUP_NAME_MAX]; | 103 | char cc_name[GROUP_NAME_MAX + 1]; |
101 | int cc_namelen; | 104 | int cc_namelen; |
105 | char cc_cluster_name[CLUSTER_NAME_MAX + 1]; | ||
106 | int cc_cluster_name_len; | ||
102 | struct ocfs2_protocol_version cc_version; | 107 | struct ocfs2_protocol_version cc_version; |
103 | struct ocfs2_locking_protocol *cc_proto; | 108 | struct ocfs2_locking_protocol *cc_proto; |
104 | void (*cc_recovery_handler)(int node_num, void *recovery_data); | 109 | void (*cc_recovery_handler)(int node_num, void *recovery_data); |
@@ -152,7 +157,8 @@ struct ocfs2_stack_operations { | |||
152 | * ->this_node() returns the cluster's unique identifier for the | 157 | * ->this_node() returns the cluster's unique identifier for the |
153 | * local node. | 158 | * local node. |
154 | */ | 159 | */ |
155 | int (*this_node)(unsigned int *node); | 160 | int (*this_node)(struct ocfs2_cluster_connection *conn, |
161 | unsigned int *node); | ||
156 | 162 | ||
157 | /* | 163 | /* |
158 | * Call the underlying dlm lock function. The ->dlm_lock() | 164 | * Call the underlying dlm lock function. The ->dlm_lock() |
@@ -239,6 +245,8 @@ struct ocfs2_stack_plugin { | |||
239 | 245 | ||
240 | /* Used by the filesystem */ | 246 | /* Used by the filesystem */ |
241 | int ocfs2_cluster_connect(const char *stack_name, | 247 | int ocfs2_cluster_connect(const char *stack_name, |
248 | const char *cluster_name, | ||
249 | int cluster_name_len, | ||
242 | const char *group, | 250 | const char *group, |
243 | int grouplen, | 251 | int grouplen, |
244 | struct ocfs2_locking_protocol *lproto, | 252 | struct ocfs2_locking_protocol *lproto, |
@@ -260,7 +268,8 @@ int ocfs2_cluster_connect_agnostic(const char *group, | |||
260 | int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, | 268 | int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, |
261 | int hangup_pending); | 269 | int hangup_pending); |
262 | void ocfs2_cluster_hangup(const char *group, int grouplen); | 270 | void ocfs2_cluster_hangup(const char *group, int grouplen); |
263 | int ocfs2_cluster_this_node(unsigned int *node); | 271 | int ocfs2_cluster_this_node(struct ocfs2_cluster_connection *conn, |
272 | unsigned int *node); | ||
264 | 273 | ||
265 | struct ocfs2_lock_res; | 274 | struct ocfs2_lock_res; |
266 | int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, | 275 | int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 2c91452c4047..47ae2663a6f5 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -113,12 +113,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, | |||
113 | struct ocfs2_suballoc_result *res); | 113 | struct ocfs2_suballoc_result *res); |
114 | static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, | 114 | static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, |
115 | int nr); | 115 | int nr); |
116 | static inline int ocfs2_block_group_set_bits(handle_t *handle, | ||
117 | struct inode *alloc_inode, | ||
118 | struct ocfs2_group_desc *bg, | ||
119 | struct buffer_head *group_bh, | ||
120 | unsigned int bit_off, | ||
121 | unsigned int num_bits); | ||
122 | static int ocfs2_relink_block_group(handle_t *handle, | 116 | static int ocfs2_relink_block_group(handle_t *handle, |
123 | struct inode *alloc_inode, | 117 | struct inode *alloc_inode, |
124 | struct buffer_head *fe_bh, | 118 | struct buffer_head *fe_bh, |
@@ -1343,7 +1337,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, | |||
1343 | return status; | 1337 | return status; |
1344 | } | 1338 | } |
1345 | 1339 | ||
1346 | static inline int ocfs2_block_group_set_bits(handle_t *handle, | 1340 | int ocfs2_block_group_set_bits(handle_t *handle, |
1347 | struct inode *alloc_inode, | 1341 | struct inode *alloc_inode, |
1348 | struct ocfs2_group_desc *bg, | 1342 | struct ocfs2_group_desc *bg, |
1349 | struct buffer_head *group_bh, | 1343 | struct buffer_head *group_bh, |
@@ -1388,8 +1382,6 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle, | |||
1388 | ocfs2_journal_dirty(handle, group_bh); | 1382 | ocfs2_journal_dirty(handle, group_bh); |
1389 | 1383 | ||
1390 | bail: | 1384 | bail: |
1391 | if (status) | ||
1392 | mlog_errno(status); | ||
1393 | return status; | 1385 | return status; |
1394 | } | 1386 | } |
1395 | 1387 | ||
@@ -1588,7 +1580,7 @@ static int ocfs2_block_group_search(struct inode *inode, | |||
1588 | return ret; | 1580 | return ret; |
1589 | } | 1581 | } |
1590 | 1582 | ||
1591 | static int ocfs2_alloc_dinode_update_counts(struct inode *inode, | 1583 | int ocfs2_alloc_dinode_update_counts(struct inode *inode, |
1592 | handle_t *handle, | 1584 | handle_t *handle, |
1593 | struct buffer_head *di_bh, | 1585 | struct buffer_head *di_bh, |
1594 | u32 num_bits, | 1586 | u32 num_bits, |
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index a36d0aa50911..218d8036b3e7 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
@@ -86,6 +86,18 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb, | |||
86 | u32 bits_wanted, | 86 | u32 bits_wanted, |
87 | struct ocfs2_alloc_context **ac); | 87 | struct ocfs2_alloc_context **ac); |
88 | 88 | ||
89 | int ocfs2_alloc_dinode_update_counts(struct inode *inode, | ||
90 | handle_t *handle, | ||
91 | struct buffer_head *di_bh, | ||
92 | u32 num_bits, | ||
93 | u16 chain); | ||
94 | int ocfs2_block_group_set_bits(handle_t *handle, | ||
95 | struct inode *alloc_inode, | ||
96 | struct ocfs2_group_desc *bg, | ||
97 | struct buffer_head *group_bh, | ||
98 | unsigned int bit_off, | ||
99 | unsigned int num_bits); | ||
100 | |||
89 | int ocfs2_claim_metadata(handle_t *handle, | 101 | int ocfs2_claim_metadata(handle_t *handle, |
90 | struct ocfs2_alloc_context *ac, | 102 | struct ocfs2_alloc_context *ac, |
91 | u32 bits_wanted, | 103 | u32 bits_wanted, |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index c41492957aa5..49d84f80f36c 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -68,7 +68,6 @@ | |||
68 | #include "super.h" | 68 | #include "super.h" |
69 | #include "sysfile.h" | 69 | #include "sysfile.h" |
70 | #include "uptodate.h" | 70 | #include "uptodate.h" |
71 | #include "ver.h" | ||
72 | #include "xattr.h" | 71 | #include "xattr.h" |
73 | #include "quota.h" | 72 | #include "quota.h" |
74 | #include "refcounttree.h" | 73 | #include "refcounttree.h" |
@@ -90,6 +89,7 @@ static struct dentry *ocfs2_debugfs_root = NULL; | |||
90 | 89 | ||
91 | MODULE_AUTHOR("Oracle"); | 90 | MODULE_AUTHOR("Oracle"); |
92 | MODULE_LICENSE("GPL"); | 91 | MODULE_LICENSE("GPL"); |
92 | MODULE_DESCRIPTION("OCFS2 cluster file system"); | ||
93 | 93 | ||
94 | struct mount_options | 94 | struct mount_options |
95 | { | 95 | { |
@@ -1618,8 +1618,6 @@ static int __init ocfs2_init(void) | |||
1618 | { | 1618 | { |
1619 | int status, i; | 1619 | int status, i; |
1620 | 1620 | ||
1621 | ocfs2_print_version(); | ||
1622 | |||
1623 | for (i = 0; i < OCFS2_IOEND_WQ_HASH_SZ; i++) | 1621 | for (i = 0; i < OCFS2_IOEND_WQ_HASH_SZ; i++) |
1624 | init_waitqueue_head(&ocfs2__ioend_wq[i]); | 1622 | init_waitqueue_head(&ocfs2__ioend_wq[i]); |
1625 | 1623 | ||
@@ -1947,11 +1945,15 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
1947 | 1945 | ||
1948 | ocfs2_shutdown_local_alloc(osb); | 1946 | ocfs2_shutdown_local_alloc(osb); |
1949 | 1947 | ||
1950 | ocfs2_truncate_log_shutdown(osb); | ||
1951 | |||
1952 | /* This will disable recovery and flush any recovery work. */ | 1948 | /* This will disable recovery and flush any recovery work. */ |
1953 | ocfs2_recovery_exit(osb); | 1949 | ocfs2_recovery_exit(osb); |
1954 | 1950 | ||
1951 | /* | ||
1952 | * During dismount, when it recovers another node it will call | ||
1953 | * ocfs2_recover_orphans and queue delayed work osb_truncate_log_wq. | ||
1954 | */ | ||
1955 | ocfs2_truncate_log_shutdown(osb); | ||
1956 | |||
1955 | ocfs2_journal_shutdown(osb); | 1957 | ocfs2_journal_shutdown(osb); |
1956 | 1958 | ||
1957 | ocfs2_sync_blockdev(sb); | 1959 | ocfs2_sync_blockdev(sb); |
@@ -2225,10 +2227,9 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2225 | if (ocfs2_clusterinfo_valid(osb)) { | 2227 | if (ocfs2_clusterinfo_valid(osb)) { |
2226 | osb->osb_stackflags = | 2228 | osb->osb_stackflags = |
2227 | OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags; | 2229 | OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags; |
2228 | memcpy(osb->osb_cluster_stack, | 2230 | strlcpy(osb->osb_cluster_stack, |
2229 | OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, | 2231 | OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, |
2230 | OCFS2_STACK_LABEL_LEN); | 2232 | OCFS2_STACK_LABEL_LEN + 1); |
2231 | osb->osb_cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; | ||
2232 | if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) { | 2233 | if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) { |
2233 | mlog(ML_ERROR, | 2234 | mlog(ML_ERROR, |
2234 | "couldn't mount because of an invalid " | 2235 | "couldn't mount because of an invalid " |
@@ -2237,6 +2238,9 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2237 | status = -EINVAL; | 2238 | status = -EINVAL; |
2238 | goto bail; | 2239 | goto bail; |
2239 | } | 2240 | } |
2241 | strlcpy(osb->osb_cluster_name, | ||
2242 | OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster, | ||
2243 | OCFS2_CLUSTER_NAME_LEN + 1); | ||
2240 | } else { | 2244 | } else { |
2241 | /* The empty string is identical with classic tools that | 2245 | /* The empty string is identical with classic tools that |
2242 | * don't know about s_cluster_info. */ | 2246 | * don't know about s_cluster_info. */ |
diff --git a/fs/ocfs2/ver.c b/fs/ocfs2/ver.c deleted file mode 100644 index e2488f4128a2..000000000000 --- a/fs/ocfs2/ver.c +++ /dev/null | |||
@@ -1,43 +0,0 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * ver.c | ||
5 | * | ||
6 | * version string | ||
7 | * | ||
8 | * Copyright (C) 2002, 2005 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #include <linux/module.h> | ||
27 | #include <linux/string.h> | ||
28 | #include <linux/kernel.h> | ||
29 | |||
30 | #include "ver.h" | ||
31 | |||
32 | #define OCFS2_BUILD_VERSION "1.5.0" | ||
33 | |||
34 | #define VERSION_STR "OCFS2 " OCFS2_BUILD_VERSION | ||
35 | |||
36 | void ocfs2_print_version(void) | ||
37 | { | ||
38 | printk(KERN_INFO "%s\n", VERSION_STR); | ||
39 | } | ||
40 | |||
41 | MODULE_DESCRIPTION(VERSION_STR); | ||
42 | |||
43 | MODULE_VERSION(OCFS2_BUILD_VERSION); | ||
diff --git a/fs/ocfs2/ver.h b/fs/ocfs2/ver.h deleted file mode 100644 index d7395cb91d2f..000000000000 --- a/fs/ocfs2/ver.h +++ /dev/null | |||
@@ -1,31 +0,0 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * ver.h | ||
5 | * | ||
6 | * Function prototypes | ||
7 | * | ||
8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #ifndef OCFS2_VER_H | ||
27 | #define OCFS2_VER_H | ||
28 | |||
29 | void ocfs2_print_version(void); | ||
30 | |||
31 | #endif /* OCFS2_VER_H */ | ||
diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 8bd2135b7f82..021e7c069b86 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c | |||
@@ -22,11 +22,80 @@ | |||
22 | 22 | ||
23 | #include <linux/errno.h> | 23 | #include <linux/errno.h> |
24 | 24 | ||
25 | EXPORT_SYMBOL(posix_acl_init); | 25 | struct posix_acl **acl_by_type(struct inode *inode, int type) |
26 | EXPORT_SYMBOL(posix_acl_alloc); | 26 | { |
27 | EXPORT_SYMBOL(posix_acl_valid); | 27 | switch (type) { |
28 | EXPORT_SYMBOL(posix_acl_equiv_mode); | 28 | case ACL_TYPE_ACCESS: |
29 | EXPORT_SYMBOL(posix_acl_from_mode); | 29 | return &inode->i_acl; |
30 | case ACL_TYPE_DEFAULT: | ||
31 | return &inode->i_default_acl; | ||
32 | default: | ||
33 | BUG(); | ||
34 | } | ||
35 | } | ||
36 | EXPORT_SYMBOL(acl_by_type); | ||
37 | |||
38 | struct posix_acl *get_cached_acl(struct inode *inode, int type) | ||
39 | { | ||
40 | struct posix_acl **p = acl_by_type(inode, type); | ||
41 | struct posix_acl *acl = ACCESS_ONCE(*p); | ||
42 | if (acl) { | ||
43 | spin_lock(&inode->i_lock); | ||
44 | acl = *p; | ||
45 | if (acl != ACL_NOT_CACHED) | ||
46 | acl = posix_acl_dup(acl); | ||
47 | spin_unlock(&inode->i_lock); | ||
48 | } | ||
49 | return acl; | ||
50 | } | ||
51 | EXPORT_SYMBOL(get_cached_acl); | ||
52 | |||
53 | struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type) | ||
54 | { | ||
55 | return rcu_dereference(*acl_by_type(inode, type)); | ||
56 | } | ||
57 | EXPORT_SYMBOL(get_cached_acl_rcu); | ||
58 | |||
59 | void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl) | ||
60 | { | ||
61 | struct posix_acl **p = acl_by_type(inode, type); | ||
62 | struct posix_acl *old; | ||
63 | spin_lock(&inode->i_lock); | ||
64 | old = *p; | ||
65 | rcu_assign_pointer(*p, posix_acl_dup(acl)); | ||
66 | spin_unlock(&inode->i_lock); | ||
67 | if (old != ACL_NOT_CACHED) | ||
68 | posix_acl_release(old); | ||
69 | } | ||
70 | EXPORT_SYMBOL(set_cached_acl); | ||
71 | |||
72 | void forget_cached_acl(struct inode *inode, int type) | ||
73 | { | ||
74 | struct posix_acl **p = acl_by_type(inode, type); | ||
75 | struct posix_acl *old; | ||
76 | spin_lock(&inode->i_lock); | ||
77 | old = *p; | ||
78 | *p = ACL_NOT_CACHED; | ||
79 | spin_unlock(&inode->i_lock); | ||
80 | if (old != ACL_NOT_CACHED) | ||
81 | posix_acl_release(old); | ||
82 | } | ||
83 | EXPORT_SYMBOL(forget_cached_acl); | ||
84 | |||
85 | void forget_all_cached_acls(struct inode *inode) | ||
86 | { | ||
87 | struct posix_acl *old_access, *old_default; | ||
88 | spin_lock(&inode->i_lock); | ||
89 | old_access = inode->i_acl; | ||
90 | old_default = inode->i_default_acl; | ||
91 | inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; | ||
92 | spin_unlock(&inode->i_lock); | ||
93 | if (old_access != ACL_NOT_CACHED) | ||
94 | posix_acl_release(old_access); | ||
95 | if (old_default != ACL_NOT_CACHED) | ||
96 | posix_acl_release(old_default); | ||
97 | } | ||
98 | EXPORT_SYMBOL(forget_all_cached_acls); | ||
30 | 99 | ||
31 | /* | 100 | /* |
32 | * Init a fresh posix_acl | 101 | * Init a fresh posix_acl |
@@ -37,6 +106,7 @@ posix_acl_init(struct posix_acl *acl, int count) | |||
37 | atomic_set(&acl->a_refcount, 1); | 106 | atomic_set(&acl->a_refcount, 1); |
38 | acl->a_count = count; | 107 | acl->a_count = count; |
39 | } | 108 | } |
109 | EXPORT_SYMBOL(posix_acl_init); | ||
40 | 110 | ||
41 | /* | 111 | /* |
42 | * Allocate a new ACL with the specified number of entries. | 112 | * Allocate a new ACL with the specified number of entries. |
@@ -51,6 +121,7 @@ posix_acl_alloc(int count, gfp_t flags) | |||
51 | posix_acl_init(acl, count); | 121 | posix_acl_init(acl, count); |
52 | return acl; | 122 | return acl; |
53 | } | 123 | } |
124 | EXPORT_SYMBOL(posix_acl_alloc); | ||
54 | 125 | ||
55 | /* | 126 | /* |
56 | * Clone an ACL. | 127 | * Clone an ACL. |
@@ -146,6 +217,7 @@ posix_acl_valid(const struct posix_acl *acl) | |||
146 | return 0; | 217 | return 0; |
147 | return -EINVAL; | 218 | return -EINVAL; |
148 | } | 219 | } |
220 | EXPORT_SYMBOL(posix_acl_valid); | ||
149 | 221 | ||
150 | /* | 222 | /* |
151 | * Returns 0 if the acl can be exactly represented in the traditional | 223 | * Returns 0 if the acl can be exactly represented in the traditional |
@@ -186,6 +258,7 @@ posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p) | |||
186 | *mode_p = (*mode_p & ~S_IRWXUGO) | mode; | 258 | *mode_p = (*mode_p & ~S_IRWXUGO) | mode; |
187 | return not_equiv; | 259 | return not_equiv; |
188 | } | 260 | } |
261 | EXPORT_SYMBOL(posix_acl_equiv_mode); | ||
189 | 262 | ||
190 | /* | 263 | /* |
191 | * Create an ACL representing the file mode permission bits of an inode. | 264 | * Create an ACL representing the file mode permission bits of an inode. |
@@ -207,6 +280,7 @@ posix_acl_from_mode(umode_t mode, gfp_t flags) | |||
207 | acl->a_entries[2].e_perm = (mode & S_IRWXO); | 280 | acl->a_entries[2].e_perm = (mode & S_IRWXO); |
208 | return acl; | 281 | return acl; |
209 | } | 282 | } |
283 | EXPORT_SYMBOL(posix_acl_from_mode); | ||
210 | 284 | ||
211 | /* | 285 | /* |
212 | * Return 0 if current is granted want access to the inode | 286 | * Return 0 if current is granted want access to the inode |
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index a77d2b299199..24270eceddbf 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c | |||
@@ -26,7 +26,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v) | |||
26 | unsigned long committed; | 26 | unsigned long committed; |
27 | struct vmalloc_info vmi; | 27 | struct vmalloc_info vmi; |
28 | long cached; | 28 | long cached; |
29 | long available; | ||
30 | unsigned long pagecache; | ||
31 | unsigned long wmark_low = 0; | ||
29 | unsigned long pages[NR_LRU_LISTS]; | 32 | unsigned long pages[NR_LRU_LISTS]; |
33 | struct zone *zone; | ||
30 | int lru; | 34 | int lru; |
31 | 35 | ||
32 | /* | 36 | /* |
@@ -47,12 +51,44 @@ static int meminfo_proc_show(struct seq_file *m, void *v) | |||
47 | for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++) | 51 | for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++) |
48 | pages[lru] = global_page_state(NR_LRU_BASE + lru); | 52 | pages[lru] = global_page_state(NR_LRU_BASE + lru); |
49 | 53 | ||
54 | for_each_zone(zone) | ||
55 | wmark_low += zone->watermark[WMARK_LOW]; | ||
56 | |||
57 | /* | ||
58 | * Estimate the amount of memory available for userspace allocations, | ||
59 | * without causing swapping. | ||
60 | * | ||
61 | * Free memory cannot be taken below the low watermark, before the | ||
62 | * system starts swapping. | ||
63 | */ | ||
64 | available = i.freeram - wmark_low; | ||
65 | |||
66 | /* | ||
67 | * Not all the page cache can be freed, otherwise the system will | ||
68 | * start swapping. Assume at least half of the page cache, or the | ||
69 | * low watermark worth of cache, needs to stay. | ||
70 | */ | ||
71 | pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE]; | ||
72 | pagecache -= min(pagecache / 2, wmark_low); | ||
73 | available += pagecache; | ||
74 | |||
75 | /* | ||
76 | * Part of the reclaimable swap consists of items that are in use, | ||
77 | * and cannot be freed. Cap this estimate at the low watermark. | ||
78 | */ | ||
79 | available += global_page_state(NR_SLAB_RECLAIMABLE) - | ||
80 | min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low); | ||
81 | |||
82 | if (available < 0) | ||
83 | available = 0; | ||
84 | |||
50 | /* | 85 | /* |
51 | * Tagged format, for easy grepping and expansion. | 86 | * Tagged format, for easy grepping and expansion. |
52 | */ | 87 | */ |
53 | seq_printf(m, | 88 | seq_printf(m, |
54 | "MemTotal: %8lu kB\n" | 89 | "MemTotal: %8lu kB\n" |
55 | "MemFree: %8lu kB\n" | 90 | "MemFree: %8lu kB\n" |
91 | "MemAvailable: %8lu kB\n" | ||
56 | "Buffers: %8lu kB\n" | 92 | "Buffers: %8lu kB\n" |
57 | "Cached: %8lu kB\n" | 93 | "Cached: %8lu kB\n" |
58 | "SwapCached: %8lu kB\n" | 94 | "SwapCached: %8lu kB\n" |
@@ -105,6 +141,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) | |||
105 | , | 141 | , |
106 | K(i.totalram), | 142 | K(i.totalram), |
107 | K(i.freeram), | 143 | K(i.freeram), |
144 | K(available), | ||
108 | K(i.bufferram), | 145 | K(i.bufferram), |
109 | K(cached), | 146 | K(cached), |
110 | K(total_swapcache_pages()), | 147 | K(total_swapcache_pages()), |
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index b8e93a40a5d3..78c3c2097787 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c | |||
@@ -443,8 +443,11 @@ int pstore_register(struct pstore_info *psi) | |||
443 | pstore_get_records(0); | 443 | pstore_get_records(0); |
444 | 444 | ||
445 | kmsg_dump_register(&pstore_dumper); | 445 | kmsg_dump_register(&pstore_dumper); |
446 | pstore_register_console(); | 446 | |
447 | pstore_register_ftrace(); | 447 | if ((psi->flags & PSTORE_FLAGS_FRAGILE) == 0) { |
448 | pstore_register_console(); | ||
449 | pstore_register_ftrace(); | ||
450 | } | ||
448 | 451 | ||
449 | if (pstore_update_ms >= 0) { | 452 | if (pstore_update_ms >= 0) { |
450 | pstore_timer.expires = jiffies + | 453 | pstore_timer.expires = jiffies + |
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 39d14659a8d3..6a3e2c420180 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c | |||
@@ -275,4 +275,4 @@ int __init init_ramfs_fs(void) | |||
275 | 275 | ||
276 | return err; | 276 | return err; |
277 | } | 277 | } |
278 | module_init(init_ramfs_fs) | 278 | fs_initcall(init_ramfs_fs); |
diff --git a/fs/read_write.c b/fs/read_write.c index 58e440df1bc6..1193ffd03565 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -901,10 +901,6 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, | |||
901 | io_fn_t fn; | 901 | io_fn_t fn; |
902 | iov_fn_t fnv; | 902 | iov_fn_t fnv; |
903 | 903 | ||
904 | ret = -EFAULT; | ||
905 | if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) | ||
906 | goto out; | ||
907 | |||
908 | ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, | 904 | ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, |
909 | UIO_FASTIOV, iovstack, &iov); | 905 | UIO_FASTIOV, iovstack, &iov); |
910 | if (ret <= 0) | 906 | if (ret <= 0) |
diff --git a/fs/splice.c b/fs/splice.c index 46a08f772d7d..12028fa41def 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -555,6 +555,24 @@ static const struct pipe_buf_operations default_pipe_buf_ops = { | |||
555 | .get = generic_pipe_buf_get, | 555 | .get = generic_pipe_buf_get, |
556 | }; | 556 | }; |
557 | 557 | ||
558 | static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe, | ||
559 | struct pipe_buffer *buf) | ||
560 | { | ||
561 | return 1; | ||
562 | } | ||
563 | |||
564 | /* Pipe buffer operations for a socket and similar. */ | ||
565 | const struct pipe_buf_operations nosteal_pipe_buf_ops = { | ||
566 | .can_merge = 0, | ||
567 | .map = generic_pipe_buf_map, | ||
568 | .unmap = generic_pipe_buf_unmap, | ||
569 | .confirm = generic_pipe_buf_confirm, | ||
570 | .release = generic_pipe_buf_release, | ||
571 | .steal = generic_pipe_buf_nosteal, | ||
572 | .get = generic_pipe_buf_get, | ||
573 | }; | ||
574 | EXPORT_SYMBOL(nosteal_pipe_buf_ops); | ||
575 | |||
558 | static ssize_t kernel_readv(struct file *file, const struct iovec *vec, | 576 | static ssize_t kernel_readv(struct file *file, const struct iovec *vec, |
559 | unsigned long vlen, loff_t offset) | 577 | unsigned long vlen, loff_t offset) |
560 | { | 578 | { |
diff --git a/fs/super.c b/fs/super.c index e5f6c2cfac38..cecd780e0f44 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -166,6 +166,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) | |||
166 | if (!s) | 166 | if (!s) |
167 | return NULL; | 167 | return NULL; |
168 | 168 | ||
169 | INIT_LIST_HEAD(&s->s_mounts); | ||
170 | |||
169 | if (security_sb_alloc(s)) | 171 | if (security_sb_alloc(s)) |
170 | goto fail; | 172 | goto fail; |
171 | 173 | ||
@@ -188,7 +190,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) | |||
188 | if (list_lru_init(&s->s_inode_lru)) | 190 | if (list_lru_init(&s->s_inode_lru)) |
189 | goto fail; | 191 | goto fail; |
190 | 192 | ||
191 | INIT_LIST_HEAD(&s->s_mounts); | ||
192 | init_rwsem(&s->s_umount); | 193 | init_rwsem(&s->s_umount); |
193 | lockdep_set_class(&s->s_umount, &type->s_umount_key); | 194 | lockdep_set_class(&s->s_umount, &type->s_umount_key); |
194 | /* | 195 | /* |
diff --git a/fs/sysfs/Makefile b/fs/sysfs/Makefile index 8876ac183373..6eff6e1205a5 100644 --- a/fs/sysfs/Makefile +++ b/fs/sysfs/Makefile | |||
@@ -2,4 +2,4 @@ | |||
2 | # Makefile for the sysfs virtual filesystem | 2 | # Makefile for the sysfs virtual filesystem |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-y := inode.o file.o dir.o symlink.o mount.o group.o | 5 | obj-y := file.o dir.o symlink.o mount.o group.o |
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 5e73d6626e50..ee0d761c3179 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c | |||
@@ -13,465 +13,31 @@ | |||
13 | #undef DEBUG | 13 | #undef DEBUG |
14 | 14 | ||
15 | #include <linux/fs.h> | 15 | #include <linux/fs.h> |
16 | #include <linux/mount.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/kobject.h> | 16 | #include <linux/kobject.h> |
19 | #include <linux/namei.h> | ||
20 | #include <linux/idr.h> | ||
21 | #include <linux/completion.h> | ||
22 | #include <linux/mutex.h> | ||
23 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
24 | #include <linux/security.h> | ||
25 | #include <linux/hash.h> | ||
26 | #include "sysfs.h" | 18 | #include "sysfs.h" |
27 | 19 | ||
28 | DEFINE_MUTEX(sysfs_mutex); | ||
29 | DEFINE_SPINLOCK(sysfs_symlink_target_lock); | 20 | DEFINE_SPINLOCK(sysfs_symlink_target_lock); |
30 | 21 | ||
31 | #define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb) | ||
32 | |||
33 | static DEFINE_SPINLOCK(sysfs_ino_lock); | ||
34 | static DEFINE_IDA(sysfs_ino_ida); | ||
35 | |||
36 | /** | ||
37 | * sysfs_name_hash | ||
38 | * @name: Null terminated string to hash | ||
39 | * @ns: Namespace tag to hash | ||
40 | * | ||
41 | * Returns 31 bit hash of ns + name (so it fits in an off_t ) | ||
42 | */ | ||
43 | static unsigned int sysfs_name_hash(const char *name, const void *ns) | ||
44 | { | ||
45 | unsigned long hash = init_name_hash(); | ||
46 | unsigned int len = strlen(name); | ||
47 | while (len--) | ||
48 | hash = partial_name_hash(*name++, hash); | ||
49 | hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31)); | ||
50 | hash &= 0x7fffffffU; | ||
51 | /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ | ||
52 | if (hash < 1) | ||
53 | hash += 2; | ||
54 | if (hash >= INT_MAX) | ||
55 | hash = INT_MAX - 1; | ||
56 | return hash; | ||
57 | } | ||
58 | |||
59 | static int sysfs_name_compare(unsigned int hash, const char *name, | ||
60 | const void *ns, const struct sysfs_dirent *sd) | ||
61 | { | ||
62 | if (hash != sd->s_hash) | ||
63 | return hash - sd->s_hash; | ||
64 | if (ns != sd->s_ns) | ||
65 | return ns - sd->s_ns; | ||
66 | return strcmp(name, sd->s_name); | ||
67 | } | ||
68 | |||
69 | static int sysfs_sd_compare(const struct sysfs_dirent *left, | ||
70 | const struct sysfs_dirent *right) | ||
71 | { | ||
72 | return sysfs_name_compare(left->s_hash, left->s_name, left->s_ns, | ||
73 | right); | ||
74 | } | ||
75 | |||
76 | /** | ||
77 | * sysfs_link_sibling - link sysfs_dirent into sibling rbtree | ||
78 | * @sd: sysfs_dirent of interest | ||
79 | * | ||
80 | * Link @sd into its sibling rbtree which starts from | ||
81 | * sd->s_parent->s_dir.children. | ||
82 | * | ||
83 | * Locking: | ||
84 | * mutex_lock(sysfs_mutex) | ||
85 | * | ||
86 | * RETURNS: | ||
87 | * 0 on susccess -EEXIST on failure. | ||
88 | */ | ||
89 | static int sysfs_link_sibling(struct sysfs_dirent *sd) | ||
90 | { | ||
91 | struct rb_node **node = &sd->s_parent->s_dir.children.rb_node; | ||
92 | struct rb_node *parent = NULL; | ||
93 | |||
94 | if (sysfs_type(sd) == SYSFS_DIR) | ||
95 | sd->s_parent->s_dir.subdirs++; | ||
96 | |||
97 | while (*node) { | ||
98 | struct sysfs_dirent *pos; | ||
99 | int result; | ||
100 | |||
101 | pos = to_sysfs_dirent(*node); | ||
102 | parent = *node; | ||
103 | result = sysfs_sd_compare(sd, pos); | ||
104 | if (result < 0) | ||
105 | node = &pos->s_rb.rb_left; | ||
106 | else if (result > 0) | ||
107 | node = &pos->s_rb.rb_right; | ||
108 | else | ||
109 | return -EEXIST; | ||
110 | } | ||
111 | /* add new node and rebalance the tree */ | ||
112 | rb_link_node(&sd->s_rb, parent, node); | ||
113 | rb_insert_color(&sd->s_rb, &sd->s_parent->s_dir.children); | ||
114 | return 0; | ||
115 | } | ||
116 | |||
117 | /** | ||
118 | * sysfs_unlink_sibling - unlink sysfs_dirent from sibling rbtree | ||
119 | * @sd: sysfs_dirent of interest | ||
120 | * | ||
121 | * Unlink @sd from its sibling rbtree which starts from | ||
122 | * sd->s_parent->s_dir.children. | ||
123 | * | ||
124 | * Locking: | ||
125 | * mutex_lock(sysfs_mutex) | ||
126 | */ | ||
127 | static void sysfs_unlink_sibling(struct sysfs_dirent *sd) | ||
128 | { | ||
129 | if (sysfs_type(sd) == SYSFS_DIR) | ||
130 | sd->s_parent->s_dir.subdirs--; | ||
131 | |||
132 | rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children); | ||
133 | } | ||
134 | |||
135 | /** | ||
136 | * sysfs_get_active - get an active reference to sysfs_dirent | ||
137 | * @sd: sysfs_dirent to get an active reference to | ||
138 | * | ||
139 | * Get an active reference of @sd. This function is noop if @sd | ||
140 | * is NULL. | ||
141 | * | ||
142 | * RETURNS: | ||
143 | * Pointer to @sd on success, NULL on failure. | ||
144 | */ | ||
145 | struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd) | ||
146 | { | ||
147 | if (unlikely(!sd)) | ||
148 | return NULL; | ||
149 | |||
150 | if (!atomic_inc_unless_negative(&sd->s_active)) | ||
151 | return NULL; | ||
152 | |||
153 | if (likely(!sysfs_ignore_lockdep(sd))) | ||
154 | rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_); | ||
155 | return sd; | ||
156 | } | ||
157 | |||
158 | /** | ||
159 | * sysfs_put_active - put an active reference to sysfs_dirent | ||
160 | * @sd: sysfs_dirent to put an active reference to | ||
161 | * | ||
162 | * Put an active reference to @sd. This function is noop if @sd | ||
163 | * is NULL. | ||
164 | */ | ||
165 | void sysfs_put_active(struct sysfs_dirent *sd) | ||
166 | { | ||
167 | int v; | ||
168 | |||
169 | if (unlikely(!sd)) | ||
170 | return; | ||
171 | |||
172 | if (likely(!sysfs_ignore_lockdep(sd))) | ||
173 | rwsem_release(&sd->dep_map, 1, _RET_IP_); | ||
174 | v = atomic_dec_return(&sd->s_active); | ||
175 | if (likely(v != SD_DEACTIVATED_BIAS)) | ||
176 | return; | ||
177 | |||
178 | /* atomic_dec_return() is a mb(), we'll always see the updated | ||
179 | * sd->u.completion. | ||
180 | */ | ||
181 | complete(sd->u.completion); | ||
182 | } | ||
183 | |||
184 | /** | ||
185 | * sysfs_deactivate - deactivate sysfs_dirent | ||
186 | * @sd: sysfs_dirent to deactivate | ||
187 | * | ||
188 | * Deny new active references and drain existing ones. | ||
189 | */ | ||
190 | static void sysfs_deactivate(struct sysfs_dirent *sd) | ||
191 | { | ||
192 | DECLARE_COMPLETION_ONSTACK(wait); | ||
193 | int v; | ||
194 | |||
195 | BUG_ON(!(sd->s_flags & SYSFS_FLAG_REMOVED)); | ||
196 | |||
197 | if (!(sysfs_type(sd) & SYSFS_ACTIVE_REF)) | ||
198 | return; | ||
199 | |||
200 | sd->u.completion = (void *)&wait; | ||
201 | |||
202 | rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_); | ||
203 | /* atomic_add_return() is a mb(), put_active() will always see | ||
204 | * the updated sd->u.completion. | ||
205 | */ | ||
206 | v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active); | ||
207 | |||
208 | if (v != SD_DEACTIVATED_BIAS) { | ||
209 | lock_contended(&sd->dep_map, _RET_IP_); | ||
210 | wait_for_completion(&wait); | ||
211 | } | ||
212 | |||
213 | lock_acquired(&sd->dep_map, _RET_IP_); | ||
214 | rwsem_release(&sd->dep_map, 1, _RET_IP_); | ||
215 | } | ||
216 | |||
217 | static int sysfs_alloc_ino(unsigned int *pino) | ||
218 | { | ||
219 | int ino, rc; | ||
220 | |||
221 | retry: | ||
222 | spin_lock(&sysfs_ino_lock); | ||
223 | rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino); | ||
224 | spin_unlock(&sysfs_ino_lock); | ||
225 | |||
226 | if (rc == -EAGAIN) { | ||
227 | if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL)) | ||
228 | goto retry; | ||
229 | rc = -ENOMEM; | ||
230 | } | ||
231 | |||
232 | *pino = ino; | ||
233 | return rc; | ||
234 | } | ||
235 | |||
236 | static void sysfs_free_ino(unsigned int ino) | ||
237 | { | ||
238 | spin_lock(&sysfs_ino_lock); | ||
239 | ida_remove(&sysfs_ino_ida, ino); | ||
240 | spin_unlock(&sysfs_ino_lock); | ||
241 | } | ||
242 | |||
243 | void release_sysfs_dirent(struct sysfs_dirent *sd) | ||
244 | { | ||
245 | struct sysfs_dirent *parent_sd; | ||
246 | |||
247 | repeat: | ||
248 | /* Moving/renaming is always done while holding reference. | ||
249 | * sd->s_parent won't change beneath us. | ||
250 | */ | ||
251 | parent_sd = sd->s_parent; | ||
252 | |||
253 | WARN(!(sd->s_flags & SYSFS_FLAG_REMOVED), | ||
254 | "sysfs: free using entry: %s/%s\n", | ||
255 | parent_sd ? parent_sd->s_name : "", sd->s_name); | ||
256 | |||
257 | if (sysfs_type(sd) == SYSFS_KOBJ_LINK) | ||
258 | sysfs_put(sd->s_symlink.target_sd); | ||
259 | if (sysfs_type(sd) & SYSFS_COPY_NAME) | ||
260 | kfree(sd->s_name); | ||
261 | if (sd->s_iattr && sd->s_iattr->ia_secdata) | ||
262 | security_release_secctx(sd->s_iattr->ia_secdata, | ||
263 | sd->s_iattr->ia_secdata_len); | ||
264 | kfree(sd->s_iattr); | ||
265 | sysfs_free_ino(sd->s_ino); | ||
266 | kmem_cache_free(sysfs_dir_cachep, sd); | ||
267 | |||
268 | sd = parent_sd; | ||
269 | if (sd && atomic_dec_and_test(&sd->s_count)) | ||
270 | goto repeat; | ||
271 | } | ||
272 | |||
273 | static int sysfs_dentry_delete(const struct dentry *dentry) | ||
274 | { | ||
275 | struct sysfs_dirent *sd = dentry->d_fsdata; | ||
276 | return !(sd && !(sd->s_flags & SYSFS_FLAG_REMOVED)); | ||
277 | } | ||
278 | |||
279 | static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags) | ||
280 | { | ||
281 | struct sysfs_dirent *sd; | ||
282 | int type; | ||
283 | |||
284 | if (flags & LOOKUP_RCU) | ||
285 | return -ECHILD; | ||
286 | |||
287 | sd = dentry->d_fsdata; | ||
288 | mutex_lock(&sysfs_mutex); | ||
289 | |||
290 | /* The sysfs dirent has been deleted */ | ||
291 | if (sd->s_flags & SYSFS_FLAG_REMOVED) | ||
292 | goto out_bad; | ||
293 | |||
294 | /* The sysfs dirent has been moved? */ | ||
295 | if (dentry->d_parent->d_fsdata != sd->s_parent) | ||
296 | goto out_bad; | ||
297 | |||
298 | /* The sysfs dirent has been renamed */ | ||
299 | if (strcmp(dentry->d_name.name, sd->s_name) != 0) | ||
300 | goto out_bad; | ||
301 | |||
302 | /* The sysfs dirent has been moved to a different namespace */ | ||
303 | type = KOBJ_NS_TYPE_NONE; | ||
304 | if (sd->s_parent) { | ||
305 | type = sysfs_ns_type(sd->s_parent); | ||
306 | if (type != KOBJ_NS_TYPE_NONE && | ||
307 | sysfs_info(dentry->d_sb)->ns[type] != sd->s_ns) | ||
308 | goto out_bad; | ||
309 | } | ||
310 | |||
311 | mutex_unlock(&sysfs_mutex); | ||
312 | out_valid: | ||
313 | return 1; | ||
314 | out_bad: | ||
315 | /* Remove the dentry from the dcache hashes. | ||
316 | * If this is a deleted dentry we use d_drop instead of d_delete | ||
317 | * so sysfs doesn't need to cope with negative dentries. | ||
318 | * | ||
319 | * If this is a dentry that has simply been renamed we | ||
320 | * use d_drop to remove it from the dcache lookup on its | ||
321 | * old parent. If this dentry persists later when a lookup | ||
322 | * is performed at its new name the dentry will be readded | ||
323 | * to the dcache hashes. | ||
324 | */ | ||
325 | mutex_unlock(&sysfs_mutex); | ||
326 | |||
327 | /* If we have submounts we must allow the vfs caches | ||
328 | * to lie about the state of the filesystem to prevent | ||
329 | * leaks and other nasty things. | ||
330 | */ | ||
331 | if (check_submounts_and_drop(dentry) != 0) | ||
332 | goto out_valid; | ||
333 | |||
334 | return 0; | ||
335 | } | ||
336 | |||
337 | static void sysfs_dentry_release(struct dentry *dentry) | ||
338 | { | ||
339 | sysfs_put(dentry->d_fsdata); | ||
340 | } | ||
341 | |||
342 | const struct dentry_operations sysfs_dentry_ops = { | ||
343 | .d_revalidate = sysfs_dentry_revalidate, | ||
344 | .d_delete = sysfs_dentry_delete, | ||
345 | .d_release = sysfs_dentry_release, | ||
346 | }; | ||
347 | |||
348 | struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type) | ||
349 | { | ||
350 | char *dup_name = NULL; | ||
351 | struct sysfs_dirent *sd; | ||
352 | |||
353 | if (type & SYSFS_COPY_NAME) { | ||
354 | name = dup_name = kstrdup(name, GFP_KERNEL); | ||
355 | if (!name) | ||
356 | return NULL; | ||
357 | } | ||
358 | |||
359 | sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL); | ||
360 | if (!sd) | ||
361 | goto err_out1; | ||
362 | |||
363 | if (sysfs_alloc_ino(&sd->s_ino)) | ||
364 | goto err_out2; | ||
365 | |||
366 | atomic_set(&sd->s_count, 1); | ||
367 | atomic_set(&sd->s_active, 0); | ||
368 | |||
369 | sd->s_name = name; | ||
370 | sd->s_mode = mode; | ||
371 | sd->s_flags = type | SYSFS_FLAG_REMOVED; | ||
372 | |||
373 | return sd; | ||
374 | |||
375 | err_out2: | ||
376 | kmem_cache_free(sysfs_dir_cachep, sd); | ||
377 | err_out1: | ||
378 | kfree(dup_name); | ||
379 | return NULL; | ||
380 | } | ||
381 | |||
382 | /** | ||
383 | * sysfs_addrm_start - prepare for sysfs_dirent add/remove | ||
384 | * @acxt: pointer to sysfs_addrm_cxt to be used | ||
385 | * | ||
386 | * This function is called when the caller is about to add or remove | ||
387 | * sysfs_dirent. This function acquires sysfs_mutex. @acxt is used | ||
388 | * to keep and pass context to other addrm functions. | ||
389 | * | ||
390 | * LOCKING: | ||
391 | * Kernel thread context (may sleep). sysfs_mutex is locked on | ||
392 | * return. | ||
393 | */ | ||
394 | void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt) | ||
395 | __acquires(sysfs_mutex) | ||
396 | { | ||
397 | memset(acxt, 0, sizeof(*acxt)); | ||
398 | |||
399 | mutex_lock(&sysfs_mutex); | ||
400 | } | ||
401 | |||
402 | /** | ||
403 | * __sysfs_add_one - add sysfs_dirent to parent without warning | ||
404 | * @acxt: addrm context to use | ||
405 | * @sd: sysfs_dirent to be added | ||
406 | * @parent_sd: the parent sysfs_dirent to add @sd to | ||
407 | * | ||
408 | * Get @parent_sd and set @sd->s_parent to it and increment nlink of | ||
409 | * the parent inode if @sd is a directory and link into the children | ||
410 | * list of the parent. | ||
411 | * | ||
412 | * This function should be called between calls to | ||
413 | * sysfs_addrm_start() and sysfs_addrm_finish() and should be | ||
414 | * passed the same @acxt as passed to sysfs_addrm_start(). | ||
415 | * | ||
416 | * LOCKING: | ||
417 | * Determined by sysfs_addrm_start(). | ||
418 | * | ||
419 | * RETURNS: | ||
420 | * 0 on success, -EEXIST if entry with the given name already | ||
421 | * exists. | ||
422 | */ | ||
423 | int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, | ||
424 | struct sysfs_dirent *parent_sd) | ||
425 | { | ||
426 | struct sysfs_inode_attrs *ps_iattr; | ||
427 | int ret; | ||
428 | |||
429 | if (!!sysfs_ns_type(parent_sd) != !!sd->s_ns) { | ||
430 | WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", | ||
431 | sysfs_ns_type(parent_sd) ? "required" : "invalid", | ||
432 | parent_sd->s_name, sd->s_name); | ||
433 | return -EINVAL; | ||
434 | } | ||
435 | |||
436 | sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns); | ||
437 | sd->s_parent = sysfs_get(parent_sd); | ||
438 | |||
439 | ret = sysfs_link_sibling(sd); | ||
440 | if (ret) | ||
441 | return ret; | ||
442 | |||
443 | /* Update timestamps on the parent */ | ||
444 | ps_iattr = parent_sd->s_iattr; | ||
445 | if (ps_iattr) { | ||
446 | struct iattr *ps_iattrs = &ps_iattr->ia_iattr; | ||
447 | ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; | ||
448 | } | ||
449 | |||
450 | /* Mark the entry added into directory tree */ | ||
451 | sd->s_flags &= ~SYSFS_FLAG_REMOVED; | ||
452 | |||
453 | return 0; | ||
454 | } | ||
455 | |||
456 | /** | 22 | /** |
457 | * sysfs_pathname - return full path to sysfs dirent | 23 | * sysfs_pathname - return full path to sysfs dirent |
458 | * @sd: sysfs_dirent whose path we want | 24 | * @kn: kernfs_node whose path we want |
459 | * @path: caller allocated buffer of size PATH_MAX | 25 | * @path: caller allocated buffer of size PATH_MAX |
460 | * | 26 | * |
461 | * Gives the name "/" to the sysfs_root entry; any path returned | 27 | * Gives the name "/" to the sysfs_root entry; any path returned |
462 | * is relative to wherever sysfs is mounted. | 28 | * is relative to wherever sysfs is mounted. |
463 | */ | 29 | */ |
464 | static char *sysfs_pathname(struct sysfs_dirent *sd, char *path) | 30 | static char *sysfs_pathname(struct kernfs_node *kn, char *path) |
465 | { | 31 | { |
466 | if (sd->s_parent) { | 32 | if (kn->parent) { |
467 | sysfs_pathname(sd->s_parent, path); | 33 | sysfs_pathname(kn->parent, path); |
468 | strlcat(path, "/", PATH_MAX); | 34 | strlcat(path, "/", PATH_MAX); |
469 | } | 35 | } |
470 | strlcat(path, sd->s_name, PATH_MAX); | 36 | strlcat(path, kn->name, PATH_MAX); |
471 | return path; | 37 | return path; |
472 | } | 38 | } |
473 | 39 | ||
474 | void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name) | 40 | void sysfs_warn_dup(struct kernfs_node *parent, const char *name) |
475 | { | 41 | { |
476 | char *path; | 42 | char *path; |
477 | 43 | ||
@@ -489,445 +55,34 @@ void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name) | |||
489 | } | 55 | } |
490 | 56 | ||
491 | /** | 57 | /** |
492 | * sysfs_add_one - add sysfs_dirent to parent | ||
493 | * @acxt: addrm context to use | ||
494 | * @sd: sysfs_dirent to be added | ||
495 | * @parent_sd: the parent sysfs_dirent to add @sd to | ||
496 | * | ||
497 | * Get @parent_sd and set @sd->s_parent to it and increment nlink of | ||
498 | * the parent inode if @sd is a directory and link into the children | ||
499 | * list of the parent. | ||
500 | * | ||
501 | * This function should be called between calls to | ||
502 | * sysfs_addrm_start() and sysfs_addrm_finish() and should be | ||
503 | * passed the same @acxt as passed to sysfs_addrm_start(). | ||
504 | * | ||
505 | * LOCKING: | ||
506 | * Determined by sysfs_addrm_start(). | ||
507 | * | ||
508 | * RETURNS: | ||
509 | * 0 on success, -EEXIST if entry with the given name already | ||
510 | * exists. | ||
511 | */ | ||
512 | int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, | ||
513 | struct sysfs_dirent *parent_sd) | ||
514 | { | ||
515 | int ret; | ||
516 | |||
517 | ret = __sysfs_add_one(acxt, sd, parent_sd); | ||
518 | |||
519 | if (ret == -EEXIST) | ||
520 | sysfs_warn_dup(parent_sd, sd->s_name); | ||
521 | return ret; | ||
522 | } | ||
523 | |||
524 | /** | ||
525 | * sysfs_remove_one - remove sysfs_dirent from parent | ||
526 | * @acxt: addrm context to use | ||
527 | * @sd: sysfs_dirent to be removed | ||
528 | * | ||
529 | * Mark @sd removed and drop nlink of parent inode if @sd is a | ||
530 | * directory. @sd is unlinked from the children list. | ||
531 | * | ||
532 | * This function should be called between calls to | ||
533 | * sysfs_addrm_start() and sysfs_addrm_finish() and should be | ||
534 | * passed the same @acxt as passed to sysfs_addrm_start(). | ||
535 | * | ||
536 | * LOCKING: | ||
537 | * Determined by sysfs_addrm_start(). | ||
538 | */ | ||
539 | static void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, | ||
540 | struct sysfs_dirent *sd) | ||
541 | { | ||
542 | struct sysfs_inode_attrs *ps_iattr; | ||
543 | |||
544 | /* | ||
545 | * Removal can be called multiple times on the same node. Only the | ||
546 | * first invocation is effective and puts the base ref. | ||
547 | */ | ||
548 | if (sd->s_flags & SYSFS_FLAG_REMOVED) | ||
549 | return; | ||
550 | |||
551 | sysfs_unlink_sibling(sd); | ||
552 | |||
553 | /* Update timestamps on the parent */ | ||
554 | ps_iattr = sd->s_parent->s_iattr; | ||
555 | if (ps_iattr) { | ||
556 | struct iattr *ps_iattrs = &ps_iattr->ia_iattr; | ||
557 | ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; | ||
558 | } | ||
559 | |||
560 | sd->s_flags |= SYSFS_FLAG_REMOVED; | ||
561 | sd->u.removed_list = acxt->removed; | ||
562 | acxt->removed = sd; | ||
563 | } | ||
564 | |||
565 | /** | ||
566 | * sysfs_addrm_finish - finish up sysfs_dirent add/remove | ||
567 | * @acxt: addrm context to finish up | ||
568 | * | ||
569 | * Finish up sysfs_dirent add/remove. Resources acquired by | ||
570 | * sysfs_addrm_start() are released and removed sysfs_dirents are | ||
571 | * cleaned up. | ||
572 | * | ||
573 | * LOCKING: | ||
574 | * sysfs_mutex is released. | ||
575 | */ | ||
576 | void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) | ||
577 | __releases(sysfs_mutex) | ||
578 | { | ||
579 | /* release resources acquired by sysfs_addrm_start() */ | ||
580 | mutex_unlock(&sysfs_mutex); | ||
581 | |||
582 | /* kill removed sysfs_dirents */ | ||
583 | while (acxt->removed) { | ||
584 | struct sysfs_dirent *sd = acxt->removed; | ||
585 | |||
586 | acxt->removed = sd->u.removed_list; | ||
587 | |||
588 | sysfs_deactivate(sd); | ||
589 | sysfs_unmap_bin_file(sd); | ||
590 | sysfs_put(sd); | ||
591 | } | ||
592 | } | ||
593 | |||
594 | /** | ||
595 | * sysfs_find_dirent - find sysfs_dirent with the given name | ||
596 | * @parent_sd: sysfs_dirent to search under | ||
597 | * @name: name to look for | ||
598 | * @ns: the namespace tag to use | ||
599 | * | ||
600 | * Look for sysfs_dirent with name @name under @parent_sd. | ||
601 | * | ||
602 | * LOCKING: | ||
603 | * mutex_lock(sysfs_mutex) | ||
604 | * | ||
605 | * RETURNS: | ||
606 | * Pointer to sysfs_dirent if found, NULL if not. | ||
607 | */ | ||
608 | struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, | ||
609 | const unsigned char *name, | ||
610 | const void *ns) | ||
611 | { | ||
612 | struct rb_node *node = parent_sd->s_dir.children.rb_node; | ||
613 | unsigned int hash; | ||
614 | |||
615 | if (!!sysfs_ns_type(parent_sd) != !!ns) { | ||
616 | WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", | ||
617 | sysfs_ns_type(parent_sd) ? "required" : "invalid", | ||
618 | parent_sd->s_name, name); | ||
619 | return NULL; | ||
620 | } | ||
621 | |||
622 | hash = sysfs_name_hash(name, ns); | ||
623 | while (node) { | ||
624 | struct sysfs_dirent *sd; | ||
625 | int result; | ||
626 | |||
627 | sd = to_sysfs_dirent(node); | ||
628 | result = sysfs_name_compare(hash, name, ns, sd); | ||
629 | if (result < 0) | ||
630 | node = node->rb_left; | ||
631 | else if (result > 0) | ||
632 | node = node->rb_right; | ||
633 | else | ||
634 | return sd; | ||
635 | } | ||
636 | return NULL; | ||
637 | } | ||
638 | |||
639 | /** | ||
640 | * sysfs_get_dirent_ns - find and get sysfs_dirent with the given name | ||
641 | * @parent_sd: sysfs_dirent to search under | ||
642 | * @name: name to look for | ||
643 | * @ns: the namespace tag to use | ||
644 | * | ||
645 | * Look for sysfs_dirent with name @name under @parent_sd and get | ||
646 | * it if found. | ||
647 | * | ||
648 | * LOCKING: | ||
649 | * Kernel thread context (may sleep). Grabs sysfs_mutex. | ||
650 | * | ||
651 | * RETURNS: | ||
652 | * Pointer to sysfs_dirent if found, NULL if not. | ||
653 | */ | ||
654 | struct sysfs_dirent *sysfs_get_dirent_ns(struct sysfs_dirent *parent_sd, | ||
655 | const unsigned char *name, | ||
656 | const void *ns) | ||
657 | { | ||
658 | struct sysfs_dirent *sd; | ||
659 | |||
660 | mutex_lock(&sysfs_mutex); | ||
661 | sd = sysfs_find_dirent(parent_sd, name, ns); | ||
662 | sysfs_get(sd); | ||
663 | mutex_unlock(&sysfs_mutex); | ||
664 | |||
665 | return sd; | ||
666 | } | ||
667 | EXPORT_SYMBOL_GPL(sysfs_get_dirent_ns); | ||
668 | |||
669 | static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, | ||
670 | enum kobj_ns_type type, | ||
671 | const char *name, const void *ns, | ||
672 | struct sysfs_dirent **p_sd) | ||
673 | { | ||
674 | umode_t mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; | ||
675 | struct sysfs_addrm_cxt acxt; | ||
676 | struct sysfs_dirent *sd; | ||
677 | int rc; | ||
678 | |||
679 | /* allocate */ | ||
680 | sd = sysfs_new_dirent(name, mode, SYSFS_DIR); | ||
681 | if (!sd) | ||
682 | return -ENOMEM; | ||
683 | |||
684 | sd->s_flags |= (type << SYSFS_NS_TYPE_SHIFT); | ||
685 | sd->s_ns = ns; | ||
686 | sd->s_dir.kobj = kobj; | ||
687 | |||
688 | /* link in */ | ||
689 | sysfs_addrm_start(&acxt); | ||
690 | rc = sysfs_add_one(&acxt, sd, parent_sd); | ||
691 | sysfs_addrm_finish(&acxt); | ||
692 | |||
693 | if (rc == 0) | ||
694 | *p_sd = sd; | ||
695 | else | ||
696 | sysfs_put(sd); | ||
697 | |||
698 | return rc; | ||
699 | } | ||
700 | |||
701 | int sysfs_create_subdir(struct kobject *kobj, const char *name, | ||
702 | struct sysfs_dirent **p_sd) | ||
703 | { | ||
704 | return create_dir(kobj, kobj->sd, | ||
705 | KOBJ_NS_TYPE_NONE, name, NULL, p_sd); | ||
706 | } | ||
707 | |||
708 | /** | ||
709 | * sysfs_read_ns_type: return associated ns_type | ||
710 | * @kobj: the kobject being queried | ||
711 | * | ||
712 | * Each kobject can be tagged with exactly one namespace type | ||
713 | * (i.e. network or user). Return the ns_type associated with | ||
714 | * this object if any | ||
715 | */ | ||
716 | static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj) | ||
717 | { | ||
718 | const struct kobj_ns_type_operations *ops; | ||
719 | enum kobj_ns_type type; | ||
720 | |||
721 | ops = kobj_child_ns_ops(kobj); | ||
722 | if (!ops) | ||
723 | return KOBJ_NS_TYPE_NONE; | ||
724 | |||
725 | type = ops->type; | ||
726 | BUG_ON(type <= KOBJ_NS_TYPE_NONE); | ||
727 | BUG_ON(type >= KOBJ_NS_TYPES); | ||
728 | BUG_ON(!kobj_ns_type_registered(type)); | ||
729 | |||
730 | return type; | ||
731 | } | ||
732 | |||
733 | /** | ||
734 | * sysfs_create_dir_ns - create a directory for an object with a namespace tag | 58 | * sysfs_create_dir_ns - create a directory for an object with a namespace tag |
735 | * @kobj: object we're creating directory for | 59 | * @kobj: object we're creating directory for |
736 | * @ns: the namespace tag to use | 60 | * @ns: the namespace tag to use |
737 | */ | 61 | */ |
738 | int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) | 62 | int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) |
739 | { | 63 | { |
740 | enum kobj_ns_type type; | 64 | struct kernfs_node *parent, *kn; |
741 | struct sysfs_dirent *parent_sd, *sd; | ||
742 | int error = 0; | ||
743 | 65 | ||
744 | BUG_ON(!kobj); | 66 | BUG_ON(!kobj); |
745 | 67 | ||
746 | if (kobj->parent) | 68 | if (kobj->parent) |
747 | parent_sd = kobj->parent->sd; | 69 | parent = kobj->parent->sd; |
748 | else | 70 | else |
749 | parent_sd = &sysfs_root; | 71 | parent = sysfs_root_kn; |
750 | 72 | ||
751 | if (!parent_sd) | 73 | if (!parent) |
752 | return -ENOENT; | 74 | return -ENOENT; |
753 | 75 | ||
754 | type = sysfs_read_ns_type(kobj); | 76 | kn = kernfs_create_dir_ns(parent, kobject_name(kobj), |
755 | 77 | S_IRWXU | S_IRUGO | S_IXUGO, kobj, ns); | |
756 | error = create_dir(kobj, parent_sd, type, kobject_name(kobj), ns, &sd); | 78 | if (IS_ERR(kn)) { |
757 | if (!error) | 79 | if (PTR_ERR(kn) == -EEXIST) |
758 | kobj->sd = sd; | 80 | sysfs_warn_dup(parent, kobject_name(kobj)); |
759 | return error; | 81 | return PTR_ERR(kn); |
760 | } | ||
761 | |||
762 | static struct dentry *sysfs_lookup(struct inode *dir, struct dentry *dentry, | ||
763 | unsigned int flags) | ||
764 | { | ||
765 | struct dentry *ret = NULL; | ||
766 | struct dentry *parent = dentry->d_parent; | ||
767 | struct sysfs_dirent *parent_sd = parent->d_fsdata; | ||
768 | struct sysfs_dirent *sd; | ||
769 | struct inode *inode; | ||
770 | enum kobj_ns_type type; | ||
771 | const void *ns; | ||
772 | |||
773 | mutex_lock(&sysfs_mutex); | ||
774 | |||
775 | type = sysfs_ns_type(parent_sd); | ||
776 | ns = sysfs_info(dir->i_sb)->ns[type]; | ||
777 | |||
778 | sd = sysfs_find_dirent(parent_sd, dentry->d_name.name, ns); | ||
779 | |||
780 | /* no such entry */ | ||
781 | if (!sd) { | ||
782 | ret = ERR_PTR(-ENOENT); | ||
783 | goto out_unlock; | ||
784 | } | ||
785 | dentry->d_fsdata = sysfs_get(sd); | ||
786 | |||
787 | /* attach dentry and inode */ | ||
788 | inode = sysfs_get_inode(dir->i_sb, sd); | ||
789 | if (!inode) { | ||
790 | ret = ERR_PTR(-ENOMEM); | ||
791 | goto out_unlock; | ||
792 | } | ||
793 | |||
794 | /* instantiate and hash dentry */ | ||
795 | ret = d_materialise_unique(dentry, inode); | ||
796 | out_unlock: | ||
797 | mutex_unlock(&sysfs_mutex); | ||
798 | return ret; | ||
799 | } | ||
800 | |||
801 | const struct inode_operations sysfs_dir_inode_operations = { | ||
802 | .lookup = sysfs_lookup, | ||
803 | .permission = sysfs_permission, | ||
804 | .setattr = sysfs_setattr, | ||
805 | .getattr = sysfs_getattr, | ||
806 | .setxattr = sysfs_setxattr, | ||
807 | }; | ||
808 | |||
809 | static struct sysfs_dirent *sysfs_leftmost_descendant(struct sysfs_dirent *pos) | ||
810 | { | ||
811 | struct sysfs_dirent *last; | ||
812 | |||
813 | while (true) { | ||
814 | struct rb_node *rbn; | ||
815 | |||
816 | last = pos; | ||
817 | |||
818 | if (sysfs_type(pos) != SYSFS_DIR) | ||
819 | break; | ||
820 | |||
821 | rbn = rb_first(&pos->s_dir.children); | ||
822 | if (!rbn) | ||
823 | break; | ||
824 | |||
825 | pos = to_sysfs_dirent(rbn); | ||
826 | } | ||
827 | |||
828 | return last; | ||
829 | } | ||
830 | |||
831 | /** | ||
832 | * sysfs_next_descendant_post - find the next descendant for post-order walk | ||
833 | * @pos: the current position (%NULL to initiate traversal) | ||
834 | * @root: sysfs_dirent whose descendants to walk | ||
835 | * | ||
836 | * Find the next descendant to visit for post-order traversal of @root's | ||
837 | * descendants. @root is included in the iteration and the last node to be | ||
838 | * visited. | ||
839 | */ | ||
840 | static struct sysfs_dirent *sysfs_next_descendant_post(struct sysfs_dirent *pos, | ||
841 | struct sysfs_dirent *root) | ||
842 | { | ||
843 | struct rb_node *rbn; | ||
844 | |||
845 | lockdep_assert_held(&sysfs_mutex); | ||
846 | |||
847 | /* if first iteration, visit leftmost descendant which may be root */ | ||
848 | if (!pos) | ||
849 | return sysfs_leftmost_descendant(root); | ||
850 | |||
851 | /* if we visited @root, we're done */ | ||
852 | if (pos == root) | ||
853 | return NULL; | ||
854 | |||
855 | /* if there's an unvisited sibling, visit its leftmost descendant */ | ||
856 | rbn = rb_next(&pos->s_rb); | ||
857 | if (rbn) | ||
858 | return sysfs_leftmost_descendant(to_sysfs_dirent(rbn)); | ||
859 | |||
860 | /* no sibling left, visit parent */ | ||
861 | return pos->s_parent; | ||
862 | } | ||
863 | |||
864 | static void __sysfs_remove(struct sysfs_addrm_cxt *acxt, | ||
865 | struct sysfs_dirent *sd) | ||
866 | { | ||
867 | struct sysfs_dirent *pos, *next; | ||
868 | |||
869 | if (!sd) | ||
870 | return; | ||
871 | |||
872 | pr_debug("sysfs %s: removing\n", sd->s_name); | ||
873 | |||
874 | next = NULL; | ||
875 | do { | ||
876 | pos = next; | ||
877 | next = sysfs_next_descendant_post(pos, sd); | ||
878 | if (pos) | ||
879 | sysfs_remove_one(acxt, pos); | ||
880 | } while (next); | ||
881 | } | ||
882 | |||
883 | /** | ||
884 | * sysfs_remove - remove a sysfs_dirent recursively | ||
885 | * @sd: the sysfs_dirent to remove | ||
886 | * | ||
887 | * Remove @sd along with all its subdirectories and files. | ||
888 | */ | ||
889 | void sysfs_remove(struct sysfs_dirent *sd) | ||
890 | { | ||
891 | struct sysfs_addrm_cxt acxt; | ||
892 | |||
893 | sysfs_addrm_start(&acxt); | ||
894 | __sysfs_remove(&acxt, sd); | ||
895 | sysfs_addrm_finish(&acxt); | ||
896 | } | ||
897 | |||
898 | /** | ||
899 | * sysfs_hash_and_remove - find a sysfs_dirent by name and remove it | ||
900 | * @dir_sd: parent of the target | ||
901 | * @name: name of the sysfs_dirent to remove | ||
902 | * @ns: namespace tag of the sysfs_dirent to remove | ||
903 | * | ||
904 | * Look for the sysfs_dirent with @name and @ns under @dir_sd and remove | ||
905 | * it. Returns 0 on success, -ENOENT if such entry doesn't exist. | ||
906 | */ | ||
907 | int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name, | ||
908 | const void *ns) | ||
909 | { | ||
910 | struct sysfs_addrm_cxt acxt; | ||
911 | struct sysfs_dirent *sd; | ||
912 | |||
913 | if (!dir_sd) { | ||
914 | WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n", | ||
915 | name); | ||
916 | return -ENOENT; | ||
917 | } | 82 | } |
918 | 83 | ||
919 | sysfs_addrm_start(&acxt); | 84 | kobj->sd = kn; |
920 | 85 | return 0; | |
921 | sd = sysfs_find_dirent(dir_sd, name, ns); | ||
922 | if (sd) | ||
923 | __sysfs_remove(&acxt, sd); | ||
924 | |||
925 | sysfs_addrm_finish(&acxt); | ||
926 | |||
927 | if (sd) | ||
928 | return 0; | ||
929 | else | ||
930 | return -ENOENT; | ||
931 | } | 86 | } |
932 | 87 | ||
933 | /** | 88 | /** |
@@ -940,207 +95,47 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name, | |||
940 | */ | 95 | */ |
941 | void sysfs_remove_dir(struct kobject *kobj) | 96 | void sysfs_remove_dir(struct kobject *kobj) |
942 | { | 97 | { |
943 | struct sysfs_dirent *sd = kobj->sd; | 98 | struct kernfs_node *kn = kobj->sd; |
944 | 99 | ||
945 | /* | 100 | /* |
946 | * In general, kboject owner is responsible for ensuring removal | 101 | * In general, kboject owner is responsible for ensuring removal |
947 | * doesn't race with other operations and sysfs doesn't provide any | 102 | * doesn't race with other operations and sysfs doesn't provide any |
948 | * protection; however, when @kobj is used as a symlink target, the | 103 | * protection; however, when @kobj is used as a symlink target, the |
949 | * symlinking entity usually doesn't own @kobj and thus has no | 104 | * symlinking entity usually doesn't own @kobj and thus has no |
950 | * control over removal. @kobj->sd may be removed anytime and | 105 | * control over removal. @kobj->sd may be removed anytime |
951 | * symlink code may end up dereferencing an already freed sd. | 106 | * and symlink code may end up dereferencing an already freed node. |
952 | * | 107 | * |
953 | * sysfs_symlink_target_lock synchronizes @kobj->sd disassociation | 108 | * sysfs_symlink_target_lock synchronizes @kobj->sd |
954 | * against symlink operations so that symlink code can safely | 109 | * disassociation against symlink operations so that symlink code |
955 | * dereference @kobj->sd. | 110 | * can safely dereference @kobj->sd. |
956 | */ | 111 | */ |
957 | spin_lock(&sysfs_symlink_target_lock); | 112 | spin_lock(&sysfs_symlink_target_lock); |
958 | kobj->sd = NULL; | 113 | kobj->sd = NULL; |
959 | spin_unlock(&sysfs_symlink_target_lock); | 114 | spin_unlock(&sysfs_symlink_target_lock); |
960 | 115 | ||
961 | if (sd) { | 116 | if (kn) { |
962 | WARN_ON_ONCE(sysfs_type(sd) != SYSFS_DIR); | 117 | WARN_ON_ONCE(kernfs_type(kn) != KERNFS_DIR); |
963 | sysfs_remove(sd); | 118 | kernfs_remove(kn); |
964 | } | 119 | } |
965 | } | 120 | } |
966 | 121 | ||
967 | int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd, | ||
968 | const char *new_name, const void *new_ns) | ||
969 | { | ||
970 | int error; | ||
971 | |||
972 | mutex_lock(&sysfs_mutex); | ||
973 | |||
974 | error = 0; | ||
975 | if ((sd->s_parent == new_parent_sd) && (sd->s_ns == new_ns) && | ||
976 | (strcmp(sd->s_name, new_name) == 0)) | ||
977 | goto out; /* nothing to rename */ | ||
978 | |||
979 | error = -EEXIST; | ||
980 | if (sysfs_find_dirent(new_parent_sd, new_name, new_ns)) | ||
981 | goto out; | ||
982 | |||
983 | /* rename sysfs_dirent */ | ||
984 | if (strcmp(sd->s_name, new_name) != 0) { | ||
985 | error = -ENOMEM; | ||
986 | new_name = kstrdup(new_name, GFP_KERNEL); | ||
987 | if (!new_name) | ||
988 | goto out; | ||
989 | |||
990 | kfree(sd->s_name); | ||
991 | sd->s_name = new_name; | ||
992 | } | ||
993 | |||
994 | /* | ||
995 | * Move to the appropriate place in the appropriate directories rbtree. | ||
996 | */ | ||
997 | sysfs_unlink_sibling(sd); | ||
998 | sysfs_get(new_parent_sd); | ||
999 | sysfs_put(sd->s_parent); | ||
1000 | sd->s_ns = new_ns; | ||
1001 | sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns); | ||
1002 | sd->s_parent = new_parent_sd; | ||
1003 | sysfs_link_sibling(sd); | ||
1004 | |||
1005 | error = 0; | ||
1006 | out: | ||
1007 | mutex_unlock(&sysfs_mutex); | ||
1008 | return error; | ||
1009 | } | ||
1010 | |||
1011 | int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, | 122 | int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, |
1012 | const void *new_ns) | 123 | const void *new_ns) |
1013 | { | 124 | { |
1014 | struct sysfs_dirent *parent_sd = kobj->sd->s_parent; | 125 | struct kernfs_node *parent = kobj->sd->parent; |
1015 | 126 | ||
1016 | return sysfs_rename(kobj->sd, parent_sd, new_name, new_ns); | 127 | return kernfs_rename_ns(kobj->sd, parent, new_name, new_ns); |
1017 | } | 128 | } |
1018 | 129 | ||
1019 | int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, | 130 | int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, |
1020 | const void *new_ns) | 131 | const void *new_ns) |
1021 | { | 132 | { |
1022 | struct sysfs_dirent *sd = kobj->sd; | 133 | struct kernfs_node *kn = kobj->sd; |
1023 | struct sysfs_dirent *new_parent_sd; | 134 | struct kernfs_node *new_parent; |
1024 | 135 | ||
1025 | BUG_ON(!sd->s_parent); | 136 | BUG_ON(!kn->parent); |
1026 | new_parent_sd = new_parent_kobj && new_parent_kobj->sd ? | 137 | new_parent = new_parent_kobj && new_parent_kobj->sd ? |
1027 | new_parent_kobj->sd : &sysfs_root; | 138 | new_parent_kobj->sd : sysfs_root_kn; |
1028 | 139 | ||
1029 | return sysfs_rename(sd, new_parent_sd, sd->s_name, new_ns); | 140 | return kernfs_rename_ns(kn, new_parent, kn->name, new_ns); |
1030 | } | 141 | } |
1031 | |||
1032 | /* Relationship between s_mode and the DT_xxx types */ | ||
1033 | static inline unsigned char dt_type(struct sysfs_dirent *sd) | ||
1034 | { | ||
1035 | return (sd->s_mode >> 12) & 15; | ||
1036 | } | ||
1037 | |||
1038 | static int sysfs_dir_release(struct inode *inode, struct file *filp) | ||
1039 | { | ||
1040 | sysfs_put(filp->private_data); | ||
1041 | return 0; | ||
1042 | } | ||
1043 | |||
1044 | static struct sysfs_dirent *sysfs_dir_pos(const void *ns, | ||
1045 | struct sysfs_dirent *parent_sd, loff_t hash, struct sysfs_dirent *pos) | ||
1046 | { | ||
1047 | if (pos) { | ||
1048 | int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) && | ||
1049 | pos->s_parent == parent_sd && | ||
1050 | hash == pos->s_hash; | ||
1051 | sysfs_put(pos); | ||
1052 | if (!valid) | ||
1053 | pos = NULL; | ||
1054 | } | ||
1055 | if (!pos && (hash > 1) && (hash < INT_MAX)) { | ||
1056 | struct rb_node *node = parent_sd->s_dir.children.rb_node; | ||
1057 | while (node) { | ||
1058 | pos = to_sysfs_dirent(node); | ||
1059 | |||
1060 | if (hash < pos->s_hash) | ||
1061 | node = node->rb_left; | ||
1062 | else if (hash > pos->s_hash) | ||
1063 | node = node->rb_right; | ||
1064 | else | ||
1065 | break; | ||
1066 | } | ||
1067 | } | ||
1068 | /* Skip over entries in the wrong namespace */ | ||
1069 | while (pos && pos->s_ns != ns) { | ||
1070 | struct rb_node *node = rb_next(&pos->s_rb); | ||
1071 | if (!node) | ||
1072 | pos = NULL; | ||
1073 | else | ||
1074 | pos = to_sysfs_dirent(node); | ||
1075 | } | ||
1076 | return pos; | ||
1077 | } | ||
1078 | |||
1079 | static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns, | ||
1080 | struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos) | ||
1081 | { | ||
1082 | pos = sysfs_dir_pos(ns, parent_sd, ino, pos); | ||
1083 | if (pos) | ||
1084 | do { | ||
1085 | struct rb_node *node = rb_next(&pos->s_rb); | ||
1086 | if (!node) | ||
1087 | pos = NULL; | ||
1088 | else | ||
1089 | pos = to_sysfs_dirent(node); | ||
1090 | } while (pos && pos->s_ns != ns); | ||
1091 | return pos; | ||
1092 | } | ||
1093 | |||
1094 | static int sysfs_readdir(struct file *file, struct dir_context *ctx) | ||
1095 | { | ||
1096 | struct dentry *dentry = file->f_path.dentry; | ||
1097 | struct sysfs_dirent *parent_sd = dentry->d_fsdata; | ||
1098 | struct sysfs_dirent *pos = file->private_data; | ||
1099 | enum kobj_ns_type type; | ||
1100 | const void *ns; | ||
1101 | |||
1102 | type = sysfs_ns_type(parent_sd); | ||
1103 | ns = sysfs_info(dentry->d_sb)->ns[type]; | ||
1104 | |||
1105 | if (!dir_emit_dots(file, ctx)) | ||
1106 | return 0; | ||
1107 | mutex_lock(&sysfs_mutex); | ||
1108 | for (pos = sysfs_dir_pos(ns, parent_sd, ctx->pos, pos); | ||
1109 | pos; | ||
1110 | pos = sysfs_dir_next_pos(ns, parent_sd, ctx->pos, pos)) { | ||
1111 | const char *name = pos->s_name; | ||
1112 | unsigned int type = dt_type(pos); | ||
1113 | int len = strlen(name); | ||
1114 | ino_t ino = pos->s_ino; | ||
1115 | ctx->pos = pos->s_hash; | ||
1116 | file->private_data = sysfs_get(pos); | ||
1117 | |||
1118 | mutex_unlock(&sysfs_mutex); | ||
1119 | if (!dir_emit(ctx, name, len, ino, type)) | ||
1120 | return 0; | ||
1121 | mutex_lock(&sysfs_mutex); | ||
1122 | } | ||
1123 | mutex_unlock(&sysfs_mutex); | ||
1124 | file->private_data = NULL; | ||
1125 | ctx->pos = INT_MAX; | ||
1126 | return 0; | ||
1127 | } | ||
1128 | |||
1129 | static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence) | ||
1130 | { | ||
1131 | struct inode *inode = file_inode(file); | ||
1132 | loff_t ret; | ||
1133 | |||
1134 | mutex_lock(&inode->i_mutex); | ||
1135 | ret = generic_file_llseek(file, offset, whence); | ||
1136 | mutex_unlock(&inode->i_mutex); | ||
1137 | |||
1138 | return ret; | ||
1139 | } | ||
1140 | |||
1141 | const struct file_operations sysfs_dir_operations = { | ||
1142 | .read = generic_read_dir, | ||
1143 | .iterate = sysfs_readdir, | ||
1144 | .release = sysfs_dir_release, | ||
1145 | .llseek = sysfs_dir_llseek, | ||
1146 | }; | ||
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index b94f93685093..810cf6e613e5 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
@@ -14,70 +14,23 @@ | |||
14 | #include <linux/kobject.h> | 14 | #include <linux/kobject.h> |
15 | #include <linux/kallsyms.h> | 15 | #include <linux/kallsyms.h> |
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | #include <linux/fsnotify.h> | ||
18 | #include <linux/namei.h> | ||
19 | #include <linux/poll.h> | ||
20 | #include <linux/list.h> | 17 | #include <linux/list.h> |
21 | #include <linux/mutex.h> | 18 | #include <linux/mutex.h> |
22 | #include <linux/limits.h> | ||
23 | #include <linux/uaccess.h> | ||
24 | #include <linux/seq_file.h> | 19 | #include <linux/seq_file.h> |
25 | #include <linux/mm.h> | ||
26 | 20 | ||
27 | #include "sysfs.h" | 21 | #include "sysfs.h" |
22 | #include "../kernfs/kernfs-internal.h" | ||
28 | 23 | ||
29 | /* | 24 | /* |
30 | * There's one sysfs_open_file for each open file and one sysfs_open_dirent | 25 | * Determine ktype->sysfs_ops for the given kernfs_node. This function |
31 | * for each sysfs_dirent with one or more open files. | ||
32 | * | ||
33 | * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open is | ||
34 | * protected by sysfs_open_dirent_lock. | ||
35 | * | ||
36 | * filp->private_data points to seq_file whose ->private points to | ||
37 | * sysfs_open_file. sysfs_open_files are chained at | ||
38 | * sysfs_open_dirent->files, which is protected by sysfs_open_file_mutex. | ||
39 | */ | ||
40 | static DEFINE_SPINLOCK(sysfs_open_dirent_lock); | ||
41 | static DEFINE_MUTEX(sysfs_open_file_mutex); | ||
42 | |||
43 | struct sysfs_open_dirent { | ||
44 | atomic_t refcnt; | ||
45 | atomic_t event; | ||
46 | wait_queue_head_t poll; | ||
47 | struct list_head files; /* goes through sysfs_open_file.list */ | ||
48 | }; | ||
49 | |||
50 | struct sysfs_open_file { | ||
51 | struct sysfs_dirent *sd; | ||
52 | struct file *file; | ||
53 | struct mutex mutex; | ||
54 | int event; | ||
55 | struct list_head list; | ||
56 | |||
57 | bool mmapped; | ||
58 | const struct vm_operations_struct *vm_ops; | ||
59 | }; | ||
60 | |||
61 | static bool sysfs_is_bin(struct sysfs_dirent *sd) | ||
62 | { | ||
63 | return sysfs_type(sd) == SYSFS_KOBJ_BIN_ATTR; | ||
64 | } | ||
65 | |||
66 | static struct sysfs_open_file *sysfs_of(struct file *file) | ||
67 | { | ||
68 | return ((struct seq_file *)file->private_data)->private; | ||
69 | } | ||
70 | |||
71 | /* | ||
72 | * Determine ktype->sysfs_ops for the given sysfs_dirent. This function | ||
73 | * must be called while holding an active reference. | 26 | * must be called while holding an active reference. |
74 | */ | 27 | */ |
75 | static const struct sysfs_ops *sysfs_file_ops(struct sysfs_dirent *sd) | 28 | static const struct sysfs_ops *sysfs_file_ops(struct kernfs_node *kn) |
76 | { | 29 | { |
77 | struct kobject *kobj = sd->s_parent->s_dir.kobj; | 30 | struct kobject *kobj = kn->parent->priv; |
78 | 31 | ||
79 | if (!sysfs_ignore_lockdep(sd)) | 32 | if (kn->flags & KERNFS_LOCKDEP) |
80 | lockdep_assert_held(sd); | 33 | lockdep_assert_held(kn); |
81 | return kobj->ktype ? kobj->ktype->sysfs_ops : NULL; | 34 | return kobj->ktype ? kobj->ktype->sysfs_ops : NULL; |
82 | } | 35 | } |
83 | 36 | ||
@@ -86,13 +39,13 @@ static const struct sysfs_ops *sysfs_file_ops(struct sysfs_dirent *sd) | |||
86 | * details like buffering and seeking. The following function pipes | 39 | * details like buffering and seeking. The following function pipes |
87 | * sysfs_ops->show() result through seq_file. | 40 | * sysfs_ops->show() result through seq_file. |
88 | */ | 41 | */ |
89 | static int sysfs_seq_show(struct seq_file *sf, void *v) | 42 | static int sysfs_kf_seq_show(struct seq_file *sf, void *v) |
90 | { | 43 | { |
91 | struct sysfs_open_file *of = sf->private; | 44 | struct kernfs_open_file *of = sf->private; |
92 | struct kobject *kobj = of->sd->s_parent->s_dir.kobj; | 45 | struct kobject *kobj = of->kn->parent->priv; |
93 | const struct sysfs_ops *ops; | 46 | const struct sysfs_ops *ops = sysfs_file_ops(of->kn); |
94 | char *buf; | ||
95 | ssize_t count; | 47 | ssize_t count; |
48 | char *buf; | ||
96 | 49 | ||
97 | /* acquire buffer and ensure that it's >= PAGE_SIZE */ | 50 | /* acquire buffer and ensure that it's >= PAGE_SIZE */ |
98 | count = seq_get_buf(sf, &buf); | 51 | count = seq_get_buf(sf, &buf); |
@@ -102,34 +55,15 @@ static int sysfs_seq_show(struct seq_file *sf, void *v) | |||
102 | } | 55 | } |
103 | 56 | ||
104 | /* | 57 | /* |
105 | * Need @of->sd for attr and ops, its parent for kobj. @of->mutex | 58 | * Invoke show(). Control may reach here via seq file lseek even |
106 | * nests outside active ref and is just to ensure that the ops | 59 | * if @ops->show() isn't implemented. |
107 | * aren't called concurrently for the same open file. | ||
108 | */ | 60 | */ |
109 | mutex_lock(&of->mutex); | 61 | if (ops->show) { |
110 | if (!sysfs_get_active(of->sd)) { | 62 | count = ops->show(kobj, of->kn->priv, buf); |
111 | mutex_unlock(&of->mutex); | 63 | if (count < 0) |
112 | return -ENODEV; | 64 | return count; |
113 | } | 65 | } |
114 | 66 | ||
115 | of->event = atomic_read(&of->sd->s_attr.open->event); | ||
116 | |||
117 | /* | ||
118 | * Lookup @ops and invoke show(). Control may reach here via seq | ||
119 | * file lseek even if @ops->show() isn't implemented. | ||
120 | */ | ||
121 | ops = sysfs_file_ops(of->sd); | ||
122 | if (ops->show) | ||
123 | count = ops->show(kobj, of->sd->s_attr.attr, buf); | ||
124 | else | ||
125 | count = 0; | ||
126 | |||
127 | sysfs_put_active(of->sd); | ||
128 | mutex_unlock(&of->mutex); | ||
129 | |||
130 | if (count < 0) | ||
131 | return count; | ||
132 | |||
133 | /* | 67 | /* |
134 | * The code works fine with PAGE_SIZE return but it's likely to | 68 | * The code works fine with PAGE_SIZE return but it's likely to |
135 | * indicate truncated result or overflow in normal use cases. | 69 | * indicate truncated result or overflow in normal use cases. |
@@ -144,728 +78,194 @@ static int sysfs_seq_show(struct seq_file *sf, void *v) | |||
144 | return 0; | 78 | return 0; |
145 | } | 79 | } |
146 | 80 | ||
147 | /* | 81 | static ssize_t sysfs_kf_bin_read(struct kernfs_open_file *of, char *buf, |
148 | * Read method for bin files. As reading a bin file can have side-effects, | 82 | size_t count, loff_t pos) |
149 | * the exact offset and bytes specified in read(2) call should be passed to | ||
150 | * the read callback making it difficult to use seq_file. Implement | ||
151 | * simplistic custom buffering for bin files. | ||
152 | */ | ||
153 | static ssize_t sysfs_bin_read(struct file *file, char __user *userbuf, | ||
154 | size_t bytes, loff_t *off) | ||
155 | { | 83 | { |
156 | struct sysfs_open_file *of = sysfs_of(file); | 84 | struct bin_attribute *battr = of->kn->priv; |
157 | struct bin_attribute *battr = of->sd->s_attr.bin_attr; | 85 | struct kobject *kobj = of->kn->parent->priv; |
158 | struct kobject *kobj = of->sd->s_parent->s_dir.kobj; | 86 | loff_t size = file_inode(of->file)->i_size; |
159 | loff_t size = file_inode(file)->i_size; | ||
160 | int count = min_t(size_t, bytes, PAGE_SIZE); | ||
161 | loff_t offs = *off; | ||
162 | char *buf; | ||
163 | 87 | ||
164 | if (!bytes) | 88 | if (!count) |
165 | return 0; | 89 | return 0; |
166 | 90 | ||
167 | if (size) { | 91 | if (size) { |
168 | if (offs > size) | 92 | if (pos > size) |
169 | return 0; | 93 | return 0; |
170 | if (offs + count > size) | 94 | if (pos + count > size) |
171 | count = size - offs; | 95 | count = size - pos; |
172 | } | ||
173 | |||
174 | buf = kmalloc(count, GFP_KERNEL); | ||
175 | if (!buf) | ||
176 | return -ENOMEM; | ||
177 | |||
178 | /* need of->sd for battr, its parent for kobj */ | ||
179 | mutex_lock(&of->mutex); | ||
180 | if (!sysfs_get_active(of->sd)) { | ||
181 | count = -ENODEV; | ||
182 | mutex_unlock(&of->mutex); | ||
183 | goto out_free; | ||
184 | } | ||
185 | |||
186 | if (battr->read) | ||
187 | count = battr->read(file, kobj, battr, buf, offs, count); | ||
188 | else | ||
189 | count = -EIO; | ||
190 | |||
191 | sysfs_put_active(of->sd); | ||
192 | mutex_unlock(&of->mutex); | ||
193 | |||
194 | if (count < 0) | ||
195 | goto out_free; | ||
196 | |||
197 | if (copy_to_user(userbuf, buf, count)) { | ||
198 | count = -EFAULT; | ||
199 | goto out_free; | ||
200 | } | 96 | } |
201 | 97 | ||
202 | pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count); | 98 | if (!battr->read) |
203 | 99 | return -EIO; | |
204 | *off = offs + count; | ||
205 | 100 | ||
206 | out_free: | 101 | return battr->read(of->file, kobj, battr, buf, pos, count); |
207 | kfree(buf); | ||
208 | return count; | ||
209 | } | 102 | } |
210 | 103 | ||
211 | /** | 104 | /* kernfs write callback for regular sysfs files */ |
212 | * flush_write_buffer - push buffer to kobject | 105 | static ssize_t sysfs_kf_write(struct kernfs_open_file *of, char *buf, |
213 | * @of: open file | 106 | size_t count, loff_t pos) |
214 | * @buf: data buffer for file | ||
215 | * @off: file offset to write to | ||
216 | * @count: number of bytes | ||
217 | * | ||
218 | * Get the correct pointers for the kobject and the attribute we're dealing | ||
219 | * with, then call the store() method for it with @buf. | ||
220 | */ | ||
221 | static int flush_write_buffer(struct sysfs_open_file *of, char *buf, loff_t off, | ||
222 | size_t count) | ||
223 | { | 107 | { |
224 | struct kobject *kobj = of->sd->s_parent->s_dir.kobj; | 108 | const struct sysfs_ops *ops = sysfs_file_ops(of->kn); |
225 | int rc = 0; | 109 | struct kobject *kobj = of->kn->parent->priv; |
226 | |||
227 | /* | ||
228 | * Need @of->sd for attr and ops, its parent for kobj. @of->mutex | ||
229 | * nests outside active ref and is just to ensure that the ops | ||
230 | * aren't called concurrently for the same open file. | ||
231 | */ | ||
232 | mutex_lock(&of->mutex); | ||
233 | if (!sysfs_get_active(of->sd)) { | ||
234 | mutex_unlock(&of->mutex); | ||
235 | return -ENODEV; | ||
236 | } | ||
237 | 110 | ||
238 | if (sysfs_is_bin(of->sd)) { | 111 | if (!count) |
239 | struct bin_attribute *battr = of->sd->s_attr.bin_attr; | 112 | return 0; |
240 | |||
241 | rc = -EIO; | ||
242 | if (battr->write) | ||
243 | rc = battr->write(of->file, kobj, battr, buf, off, | ||
244 | count); | ||
245 | } else { | ||
246 | const struct sysfs_ops *ops = sysfs_file_ops(of->sd); | ||
247 | |||
248 | rc = ops->store(kobj, of->sd->s_attr.attr, buf, count); | ||
249 | } | ||
250 | |||
251 | sysfs_put_active(of->sd); | ||
252 | mutex_unlock(&of->mutex); | ||
253 | 113 | ||
254 | return rc; | 114 | return ops->store(kobj, of->kn->priv, buf, count); |
255 | } | 115 | } |
256 | 116 | ||
257 | /** | 117 | /* kernfs write callback for bin sysfs files */ |
258 | * sysfs_write_file - write an attribute | 118 | static ssize_t sysfs_kf_bin_write(struct kernfs_open_file *of, char *buf, |
259 | * @file: file pointer | 119 | size_t count, loff_t pos) |
260 | * @user_buf: data to write | ||
261 | * @count: number of bytes | ||
262 | * @ppos: starting offset | ||
263 | * | ||
264 | * Copy data in from userland and pass it to the matching | ||
265 | * sysfs_ops->store() by invoking flush_write_buffer(). | ||
266 | * | ||
267 | * There is no easy way for us to know if userspace is only doing a partial | ||
268 | * write, so we don't support them. We expect the entire buffer to come on | ||
269 | * the first write. Hint: if you're writing a value, first read the file, | ||
270 | * modify only the the value you're changing, then write entire buffer | ||
271 | * back. | ||
272 | */ | ||
273 | static ssize_t sysfs_write_file(struct file *file, const char __user *user_buf, | ||
274 | size_t count, loff_t *ppos) | ||
275 | { | 120 | { |
276 | struct sysfs_open_file *of = sysfs_of(file); | 121 | struct bin_attribute *battr = of->kn->priv; |
277 | ssize_t len = min_t(size_t, count, PAGE_SIZE); | 122 | struct kobject *kobj = of->kn->parent->priv; |
278 | loff_t size = file_inode(file)->i_size; | 123 | loff_t size = file_inode(of->file)->i_size; |
279 | char *buf; | ||
280 | 124 | ||
281 | if (sysfs_is_bin(of->sd) && size) { | 125 | if (size) { |
282 | if (size <= *ppos) | 126 | if (size <= pos) |
283 | return 0; | 127 | return 0; |
284 | len = min_t(ssize_t, len, size - *ppos); | 128 | count = min_t(ssize_t, count, size - pos); |
285 | } | 129 | } |
286 | 130 | if (!count) | |
287 | if (!len) | ||
288 | return 0; | 131 | return 0; |
289 | 132 | ||
290 | buf = kmalloc(len + 1, GFP_KERNEL); | 133 | if (!battr->write) |
291 | if (!buf) | 134 | return -EIO; |
292 | return -ENOMEM; | ||
293 | 135 | ||
294 | if (copy_from_user(buf, user_buf, len)) { | 136 | return battr->write(of->file, kobj, battr, buf, pos, count); |
295 | len = -EFAULT; | ||
296 | goto out_free; | ||
297 | } | ||
298 | buf[len] = '\0'; /* guarantee string termination */ | ||
299 | |||
300 | len = flush_write_buffer(of, buf, *ppos, len); | ||
301 | if (len > 0) | ||
302 | *ppos += len; | ||
303 | out_free: | ||
304 | kfree(buf); | ||
305 | return len; | ||
306 | } | ||
307 | |||
308 | static void sysfs_bin_vma_open(struct vm_area_struct *vma) | ||
309 | { | ||
310 | struct file *file = vma->vm_file; | ||
311 | struct sysfs_open_file *of = sysfs_of(file); | ||
312 | |||
313 | if (!of->vm_ops) | ||
314 | return; | ||
315 | |||
316 | if (!sysfs_get_active(of->sd)) | ||
317 | return; | ||
318 | |||
319 | if (of->vm_ops->open) | ||
320 | of->vm_ops->open(vma); | ||
321 | |||
322 | sysfs_put_active(of->sd); | ||
323 | } | 137 | } |
324 | 138 | ||
325 | static int sysfs_bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 139 | static int sysfs_kf_bin_mmap(struct kernfs_open_file *of, |
140 | struct vm_area_struct *vma) | ||
326 | { | 141 | { |
327 | struct file *file = vma->vm_file; | 142 | struct bin_attribute *battr = of->kn->priv; |
328 | struct sysfs_open_file *of = sysfs_of(file); | 143 | struct kobject *kobj = of->kn->parent->priv; |
329 | int ret; | ||
330 | 144 | ||
331 | if (!of->vm_ops) | 145 | return battr->mmap(of->file, kobj, battr, vma); |
332 | return VM_FAULT_SIGBUS; | ||
333 | |||
334 | if (!sysfs_get_active(of->sd)) | ||
335 | return VM_FAULT_SIGBUS; | ||
336 | |||
337 | ret = VM_FAULT_SIGBUS; | ||
338 | if (of->vm_ops->fault) | ||
339 | ret = of->vm_ops->fault(vma, vmf); | ||
340 | |||
341 | sysfs_put_active(of->sd); | ||
342 | return ret; | ||
343 | } | 146 | } |
344 | 147 | ||
345 | static int sysfs_bin_page_mkwrite(struct vm_area_struct *vma, | 148 | void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr) |
346 | struct vm_fault *vmf) | ||
347 | { | 149 | { |
348 | struct file *file = vma->vm_file; | 150 | struct kernfs_node *kn = kobj->sd, *tmp; |
349 | struct sysfs_open_file *of = sysfs_of(file); | ||
350 | int ret; | ||
351 | |||
352 | if (!of->vm_ops) | ||
353 | return VM_FAULT_SIGBUS; | ||
354 | 151 | ||
355 | if (!sysfs_get_active(of->sd)) | 152 | if (kn && dir) |
356 | return VM_FAULT_SIGBUS; | 153 | kn = kernfs_find_and_get(kn, dir); |
357 | |||
358 | ret = 0; | ||
359 | if (of->vm_ops->page_mkwrite) | ||
360 | ret = of->vm_ops->page_mkwrite(vma, vmf); | ||
361 | else | 154 | else |
362 | file_update_time(file); | 155 | kernfs_get(kn); |
363 | |||
364 | sysfs_put_active(of->sd); | ||
365 | return ret; | ||
366 | } | ||
367 | |||
368 | static int sysfs_bin_access(struct vm_area_struct *vma, unsigned long addr, | ||
369 | void *buf, int len, int write) | ||
370 | { | ||
371 | struct file *file = vma->vm_file; | ||
372 | struct sysfs_open_file *of = sysfs_of(file); | ||
373 | int ret; | ||
374 | |||
375 | if (!of->vm_ops) | ||
376 | return -EINVAL; | ||
377 | |||
378 | if (!sysfs_get_active(of->sd)) | ||
379 | return -EINVAL; | ||
380 | |||
381 | ret = -EINVAL; | ||
382 | if (of->vm_ops->access) | ||
383 | ret = of->vm_ops->access(vma, addr, buf, len, write); | ||
384 | |||
385 | sysfs_put_active(of->sd); | ||
386 | return ret; | ||
387 | } | ||
388 | |||
389 | #ifdef CONFIG_NUMA | ||
390 | static int sysfs_bin_set_policy(struct vm_area_struct *vma, | ||
391 | struct mempolicy *new) | ||
392 | { | ||
393 | struct file *file = vma->vm_file; | ||
394 | struct sysfs_open_file *of = sysfs_of(file); | ||
395 | int ret; | ||
396 | |||
397 | if (!of->vm_ops) | ||
398 | return 0; | ||
399 | |||
400 | if (!sysfs_get_active(of->sd)) | ||
401 | return -EINVAL; | ||
402 | |||
403 | ret = 0; | ||
404 | if (of->vm_ops->set_policy) | ||
405 | ret = of->vm_ops->set_policy(vma, new); | ||
406 | |||
407 | sysfs_put_active(of->sd); | ||
408 | return ret; | ||
409 | } | ||
410 | |||
411 | static struct mempolicy *sysfs_bin_get_policy(struct vm_area_struct *vma, | ||
412 | unsigned long addr) | ||
413 | { | ||
414 | struct file *file = vma->vm_file; | ||
415 | struct sysfs_open_file *of = sysfs_of(file); | ||
416 | struct mempolicy *pol; | ||
417 | |||
418 | if (!of->vm_ops) | ||
419 | return vma->vm_policy; | ||
420 | |||
421 | if (!sysfs_get_active(of->sd)) | ||
422 | return vma->vm_policy; | ||
423 | |||
424 | pol = vma->vm_policy; | ||
425 | if (of->vm_ops->get_policy) | ||
426 | pol = of->vm_ops->get_policy(vma, addr); | ||
427 | |||
428 | sysfs_put_active(of->sd); | ||
429 | return pol; | ||
430 | } | ||
431 | |||
432 | static int sysfs_bin_migrate(struct vm_area_struct *vma, const nodemask_t *from, | ||
433 | const nodemask_t *to, unsigned long flags) | ||
434 | { | ||
435 | struct file *file = vma->vm_file; | ||
436 | struct sysfs_open_file *of = sysfs_of(file); | ||
437 | int ret; | ||
438 | |||
439 | if (!of->vm_ops) | ||
440 | return 0; | ||
441 | |||
442 | if (!sysfs_get_active(of->sd)) | ||
443 | return 0; | ||
444 | |||
445 | ret = 0; | ||
446 | if (of->vm_ops->migrate) | ||
447 | ret = of->vm_ops->migrate(vma, from, to, flags); | ||
448 | |||
449 | sysfs_put_active(of->sd); | ||
450 | return ret; | ||
451 | } | ||
452 | #endif | ||
453 | |||
454 | static const struct vm_operations_struct sysfs_bin_vm_ops = { | ||
455 | .open = sysfs_bin_vma_open, | ||
456 | .fault = sysfs_bin_fault, | ||
457 | .page_mkwrite = sysfs_bin_page_mkwrite, | ||
458 | .access = sysfs_bin_access, | ||
459 | #ifdef CONFIG_NUMA | ||
460 | .set_policy = sysfs_bin_set_policy, | ||
461 | .get_policy = sysfs_bin_get_policy, | ||
462 | .migrate = sysfs_bin_migrate, | ||
463 | #endif | ||
464 | }; | ||
465 | |||
466 | static int sysfs_bin_mmap(struct file *file, struct vm_area_struct *vma) | ||
467 | { | ||
468 | struct sysfs_open_file *of = sysfs_of(file); | ||
469 | struct bin_attribute *battr = of->sd->s_attr.bin_attr; | ||
470 | struct kobject *kobj = of->sd->s_parent->s_dir.kobj; | ||
471 | int rc; | ||
472 | |||
473 | mutex_lock(&of->mutex); | ||
474 | |||
475 | /* need of->sd for battr, its parent for kobj */ | ||
476 | rc = -ENODEV; | ||
477 | if (!sysfs_get_active(of->sd)) | ||
478 | goto out_unlock; | ||
479 | |||
480 | if (!battr->mmap) | ||
481 | goto out_put; | ||
482 | |||
483 | rc = battr->mmap(file, kobj, battr, vma); | ||
484 | if (rc) | ||
485 | goto out_put; | ||
486 | |||
487 | /* | ||
488 | * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() | ||
489 | * to satisfy versions of X which crash if the mmap fails: that | ||
490 | * substitutes a new vm_file, and we don't then want bin_vm_ops. | ||
491 | */ | ||
492 | if (vma->vm_file != file) | ||
493 | goto out_put; | ||
494 | |||
495 | rc = -EINVAL; | ||
496 | if (of->mmapped && of->vm_ops != vma->vm_ops) | ||
497 | goto out_put; | ||
498 | 156 | ||
499 | /* | 157 | if (kn && attr) { |
500 | * It is not possible to successfully wrap close. | 158 | tmp = kernfs_find_and_get(kn, attr); |
501 | * So error if someone is trying to use close. | 159 | kernfs_put(kn); |
502 | */ | 160 | kn = tmp; |
503 | rc = -EINVAL; | ||
504 | if (vma->vm_ops && vma->vm_ops->close) | ||
505 | goto out_put; | ||
506 | |||
507 | rc = 0; | ||
508 | of->mmapped = 1; | ||
509 | of->vm_ops = vma->vm_ops; | ||
510 | vma->vm_ops = &sysfs_bin_vm_ops; | ||
511 | out_put: | ||
512 | sysfs_put_active(of->sd); | ||
513 | out_unlock: | ||
514 | mutex_unlock(&of->mutex); | ||
515 | |||
516 | return rc; | ||
517 | } | ||
518 | |||
519 | /** | ||
520 | * sysfs_get_open_dirent - get or create sysfs_open_dirent | ||
521 | * @sd: target sysfs_dirent | ||
522 | * @of: sysfs_open_file for this instance of open | ||
523 | * | ||
524 | * If @sd->s_attr.open exists, increment its reference count; | ||
525 | * otherwise, create one. @of is chained to the files list. | ||
526 | * | ||
527 | * LOCKING: | ||
528 | * Kernel thread context (may sleep). | ||
529 | * | ||
530 | * RETURNS: | ||
531 | * 0 on success, -errno on failure. | ||
532 | */ | ||
533 | static int sysfs_get_open_dirent(struct sysfs_dirent *sd, | ||
534 | struct sysfs_open_file *of) | ||
535 | { | ||
536 | struct sysfs_open_dirent *od, *new_od = NULL; | ||
537 | |||
538 | retry: | ||
539 | mutex_lock(&sysfs_open_file_mutex); | ||
540 | spin_lock_irq(&sysfs_open_dirent_lock); | ||
541 | |||
542 | if (!sd->s_attr.open && new_od) { | ||
543 | sd->s_attr.open = new_od; | ||
544 | new_od = NULL; | ||
545 | } | 161 | } |
546 | 162 | ||
547 | od = sd->s_attr.open; | 163 | if (kn) { |
548 | if (od) { | 164 | kernfs_notify(kn); |
549 | atomic_inc(&od->refcnt); | 165 | kernfs_put(kn); |
550 | list_add_tail(&of->list, &od->files); | ||
551 | } | ||
552 | |||
553 | spin_unlock_irq(&sysfs_open_dirent_lock); | ||
554 | mutex_unlock(&sysfs_open_file_mutex); | ||
555 | |||
556 | if (od) { | ||
557 | kfree(new_od); | ||
558 | return 0; | ||
559 | } | 166 | } |
167 | } | ||
168 | EXPORT_SYMBOL_GPL(sysfs_notify); | ||
560 | 169 | ||
561 | /* not there, initialize a new one and retry */ | 170 | static const struct kernfs_ops sysfs_file_kfops_empty = { |
562 | new_od = kmalloc(sizeof(*new_od), GFP_KERNEL); | 171 | }; |
563 | if (!new_od) | ||
564 | return -ENOMEM; | ||
565 | 172 | ||
566 | atomic_set(&new_od->refcnt, 0); | 173 | static const struct kernfs_ops sysfs_file_kfops_ro = { |
567 | atomic_set(&new_od->event, 1); | 174 | .seq_show = sysfs_kf_seq_show, |
568 | init_waitqueue_head(&new_od->poll); | 175 | }; |
569 | INIT_LIST_HEAD(&new_od->files); | ||
570 | goto retry; | ||
571 | } | ||
572 | 176 | ||
573 | /** | 177 | static const struct kernfs_ops sysfs_file_kfops_wo = { |
574 | * sysfs_put_open_dirent - put sysfs_open_dirent | 178 | .write = sysfs_kf_write, |
575 | * @sd: target sysfs_dirent | 179 | }; |
576 | * @of: associated sysfs_open_file | ||
577 | * | ||
578 | * Put @sd->s_attr.open and unlink @of from the files list. If | ||
579 | * reference count reaches zero, disassociate and free it. | ||
580 | * | ||
581 | * LOCKING: | ||
582 | * None. | ||
583 | */ | ||
584 | static void sysfs_put_open_dirent(struct sysfs_dirent *sd, | ||
585 | struct sysfs_open_file *of) | ||
586 | { | ||
587 | struct sysfs_open_dirent *od = sd->s_attr.open; | ||
588 | unsigned long flags; | ||
589 | 180 | ||
590 | mutex_lock(&sysfs_open_file_mutex); | 181 | static const struct kernfs_ops sysfs_file_kfops_rw = { |
591 | spin_lock_irqsave(&sysfs_open_dirent_lock, flags); | 182 | .seq_show = sysfs_kf_seq_show, |
183 | .write = sysfs_kf_write, | ||
184 | }; | ||
592 | 185 | ||
593 | if (of) | 186 | static const struct kernfs_ops sysfs_bin_kfops_ro = { |
594 | list_del(&of->list); | 187 | .read = sysfs_kf_bin_read, |
188 | }; | ||
595 | 189 | ||
596 | if (atomic_dec_and_test(&od->refcnt)) | 190 | static const struct kernfs_ops sysfs_bin_kfops_wo = { |
597 | sd->s_attr.open = NULL; | 191 | .write = sysfs_kf_bin_write, |
598 | else | 192 | }; |
599 | od = NULL; | ||
600 | 193 | ||
601 | spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags); | 194 | static const struct kernfs_ops sysfs_bin_kfops_rw = { |
602 | mutex_unlock(&sysfs_open_file_mutex); | 195 | .read = sysfs_kf_bin_read, |
196 | .write = sysfs_kf_bin_write, | ||
197 | }; | ||
603 | 198 | ||
604 | kfree(od); | 199 | static const struct kernfs_ops sysfs_bin_kfops_mmap = { |
605 | } | 200 | .read = sysfs_kf_bin_read, |
201 | .write = sysfs_kf_bin_write, | ||
202 | .mmap = sysfs_kf_bin_mmap, | ||
203 | }; | ||
606 | 204 | ||
607 | static int sysfs_open_file(struct inode *inode, struct file *file) | 205 | int sysfs_add_file_mode_ns(struct kernfs_node *parent, |
206 | const struct attribute *attr, bool is_bin, | ||
207 | umode_t mode, const void *ns) | ||
608 | { | 208 | { |
609 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; | 209 | struct lock_class_key *key = NULL; |
610 | struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; | 210 | const struct kernfs_ops *ops; |
611 | struct sysfs_open_file *of; | 211 | struct kernfs_node *kn; |
612 | bool has_read, has_write, has_mmap; | 212 | loff_t size; |
613 | int error = -EACCES; | ||
614 | |||
615 | /* need attr_sd for attr and ops, its parent for kobj */ | ||
616 | if (!sysfs_get_active(attr_sd)) | ||
617 | return -ENODEV; | ||
618 | 213 | ||
619 | if (sysfs_is_bin(attr_sd)) { | 214 | if (!is_bin) { |
620 | struct bin_attribute *battr = attr_sd->s_attr.bin_attr; | 215 | struct kobject *kobj = parent->priv; |
621 | 216 | const struct sysfs_ops *sysfs_ops = kobj->ktype->sysfs_ops; | |
622 | has_read = battr->read || battr->mmap; | ||
623 | has_write = battr->write || battr->mmap; | ||
624 | has_mmap = battr->mmap; | ||
625 | } else { | ||
626 | const struct sysfs_ops *ops = sysfs_file_ops(attr_sd); | ||
627 | 217 | ||
628 | /* every kobject with an attribute needs a ktype assigned */ | 218 | /* every kobject with an attribute needs a ktype assigned */ |
629 | if (WARN(!ops, KERN_ERR | 219 | if (WARN(!sysfs_ops, KERN_ERR |
630 | "missing sysfs attribute operations for kobject: %s\n", | 220 | "missing sysfs attribute operations for kobject: %s\n", |
631 | kobject_name(kobj))) | 221 | kobject_name(kobj))) |
632 | goto err_out; | 222 | return -EINVAL; |
633 | 223 | ||
634 | has_read = ops->show; | 224 | if (sysfs_ops->show && sysfs_ops->store) |
635 | has_write = ops->store; | 225 | ops = &sysfs_file_kfops_rw; |
636 | has_mmap = false; | 226 | else if (sysfs_ops->show) |
637 | } | 227 | ops = &sysfs_file_kfops_ro; |
638 | 228 | else if (sysfs_ops->store) | |
639 | /* check perms and supported operations */ | 229 | ops = &sysfs_file_kfops_wo; |
640 | if ((file->f_mode & FMODE_WRITE) && | 230 | else |
641 | (!(inode->i_mode & S_IWUGO) || !has_write)) | 231 | ops = &sysfs_file_kfops_empty; |
642 | goto err_out; | 232 | |
643 | 233 | size = PAGE_SIZE; | |
644 | if ((file->f_mode & FMODE_READ) && | 234 | } else { |
645 | (!(inode->i_mode & S_IRUGO) || !has_read)) | 235 | struct bin_attribute *battr = (void *)attr; |
646 | goto err_out; | 236 | |
647 | 237 | if (battr->mmap) | |
648 | /* allocate a sysfs_open_file for the file */ | 238 | ops = &sysfs_bin_kfops_mmap; |
649 | error = -ENOMEM; | 239 | else if (battr->read && battr->write) |
650 | of = kzalloc(sizeof(struct sysfs_open_file), GFP_KERNEL); | 240 | ops = &sysfs_bin_kfops_rw; |
651 | if (!of) | 241 | else if (battr->read) |
652 | goto err_out; | 242 | ops = &sysfs_bin_kfops_ro; |
653 | 243 | else if (battr->write) | |
654 | /* | 244 | ops = &sysfs_bin_kfops_wo; |
655 | * The following is done to give a different lockdep key to | 245 | else |
656 | * @of->mutex for files which implement mmap. This is a rather | 246 | ops = &sysfs_file_kfops_empty; |
657 | * crude way to avoid false positive lockdep warning around | 247 | |
658 | * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and | 248 | size = battr->size; |
659 | * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under | ||
660 | * which mm->mmap_sem nests, while holding @of->mutex. As each | ||
661 | * open file has a separate mutex, it's okay as long as those don't | ||
662 | * happen on the same file. At this point, we can't easily give | ||
663 | * each file a separate locking class. Let's differentiate on | ||
664 | * whether the file has mmap or not for now. | ||
665 | */ | ||
666 | if (has_mmap) | ||
667 | mutex_init(&of->mutex); | ||
668 | else | ||
669 | mutex_init(&of->mutex); | ||
670 | |||
671 | of->sd = attr_sd; | ||
672 | of->file = file; | ||
673 | |||
674 | /* | ||
675 | * Always instantiate seq_file even if read access doesn't use | ||
676 | * seq_file or is not requested. This unifies private data access | ||
677 | * and readable regular files are the vast majority anyway. | ||
678 | */ | ||
679 | if (sysfs_is_bin(attr_sd)) | ||
680 | error = single_open(file, NULL, of); | ||
681 | else | ||
682 | error = single_open(file, sysfs_seq_show, of); | ||
683 | if (error) | ||
684 | goto err_free; | ||
685 | |||
686 | /* seq_file clears PWRITE unconditionally, restore it if WRITE */ | ||
687 | if (file->f_mode & FMODE_WRITE) | ||
688 | file->f_mode |= FMODE_PWRITE; | ||
689 | |||
690 | /* make sure we have open dirent struct */ | ||
691 | error = sysfs_get_open_dirent(attr_sd, of); | ||
692 | if (error) | ||
693 | goto err_close; | ||
694 | |||
695 | /* open succeeded, put active references */ | ||
696 | sysfs_put_active(attr_sd); | ||
697 | return 0; | ||
698 | |||
699 | err_close: | ||
700 | single_release(inode, file); | ||
701 | err_free: | ||
702 | kfree(of); | ||
703 | err_out: | ||
704 | sysfs_put_active(attr_sd); | ||
705 | return error; | ||
706 | } | ||
707 | |||
708 | static int sysfs_release(struct inode *inode, struct file *filp) | ||
709 | { | ||
710 | struct sysfs_dirent *sd = filp->f_path.dentry->d_fsdata; | ||
711 | struct sysfs_open_file *of = sysfs_of(filp); | ||
712 | |||
713 | sysfs_put_open_dirent(sd, of); | ||
714 | single_release(inode, filp); | ||
715 | kfree(of); | ||
716 | |||
717 | return 0; | ||
718 | } | ||
719 | |||
720 | void sysfs_unmap_bin_file(struct sysfs_dirent *sd) | ||
721 | { | ||
722 | struct sysfs_open_dirent *od; | ||
723 | struct sysfs_open_file *of; | ||
724 | |||
725 | if (!sysfs_is_bin(sd)) | ||
726 | return; | ||
727 | |||
728 | spin_lock_irq(&sysfs_open_dirent_lock); | ||
729 | od = sd->s_attr.open; | ||
730 | if (od) | ||
731 | atomic_inc(&od->refcnt); | ||
732 | spin_unlock_irq(&sysfs_open_dirent_lock); | ||
733 | if (!od) | ||
734 | return; | ||
735 | |||
736 | mutex_lock(&sysfs_open_file_mutex); | ||
737 | list_for_each_entry(of, &od->files, list) { | ||
738 | struct inode *inode = file_inode(of->file); | ||
739 | unmap_mapping_range(inode->i_mapping, 0, 0, 1); | ||
740 | } | 249 | } |
741 | mutex_unlock(&sysfs_open_file_mutex); | ||
742 | |||
743 | sysfs_put_open_dirent(sd, NULL); | ||
744 | } | ||
745 | |||
746 | /* Sysfs attribute files are pollable. The idea is that you read | ||
747 | * the content and then you use 'poll' or 'select' to wait for | ||
748 | * the content to change. When the content changes (assuming the | ||
749 | * manager for the kobject supports notification), poll will | ||
750 | * return POLLERR|POLLPRI, and select will return the fd whether | ||
751 | * it is waiting for read, write, or exceptions. | ||
752 | * Once poll/select indicates that the value has changed, you | ||
753 | * need to close and re-open the file, or seek to 0 and read again. | ||
754 | * Reminder: this only works for attributes which actively support | ||
755 | * it, and it is not possible to test an attribute from userspace | ||
756 | * to see if it supports poll (Neither 'poll' nor 'select' return | ||
757 | * an appropriate error code). When in doubt, set a suitable timeout value. | ||
758 | */ | ||
759 | static unsigned int sysfs_poll(struct file *filp, poll_table *wait) | ||
760 | { | ||
761 | struct sysfs_open_file *of = sysfs_of(filp); | ||
762 | struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata; | ||
763 | struct sysfs_open_dirent *od = attr_sd->s_attr.open; | ||
764 | |||
765 | /* need parent for the kobj, grab both */ | ||
766 | if (!sysfs_get_active(attr_sd)) | ||
767 | goto trigger; | ||
768 | |||
769 | poll_wait(filp, &od->poll, wait); | ||
770 | 250 | ||
771 | sysfs_put_active(attr_sd); | 251 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
772 | 252 | if (!attr->ignore_lockdep) | |
773 | if (of->event != atomic_read(&od->event)) | 253 | key = attr->key ?: (struct lock_class_key *)&attr->skey; |
774 | goto trigger; | 254 | #endif |
775 | 255 | kn = __kernfs_create_file(parent, attr->name, mode, size, ops, | |
776 | return DEFAULT_POLLMASK; | 256 | (void *)attr, ns, true, key); |
777 | 257 | if (IS_ERR(kn)) { | |
778 | trigger: | 258 | if (PTR_ERR(kn) == -EEXIST) |
779 | return DEFAULT_POLLMASK|POLLERR|POLLPRI; | 259 | sysfs_warn_dup(parent, attr->name); |
780 | } | 260 | return PTR_ERR(kn); |
781 | |||
782 | void sysfs_notify_dirent(struct sysfs_dirent *sd) | ||
783 | { | ||
784 | struct sysfs_open_dirent *od; | ||
785 | unsigned long flags; | ||
786 | |||
787 | spin_lock_irqsave(&sysfs_open_dirent_lock, flags); | ||
788 | |||
789 | if (!WARN_ON(sysfs_type(sd) != SYSFS_KOBJ_ATTR)) { | ||
790 | od = sd->s_attr.open; | ||
791 | if (od) { | ||
792 | atomic_inc(&od->event); | ||
793 | wake_up_interruptible(&od->poll); | ||
794 | } | ||
795 | } | 261 | } |
796 | 262 | return 0; | |
797 | spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags); | ||
798 | } | ||
799 | EXPORT_SYMBOL_GPL(sysfs_notify_dirent); | ||
800 | |||
801 | void sysfs_notify(struct kobject *k, const char *dir, const char *attr) | ||
802 | { | ||
803 | struct sysfs_dirent *sd = k->sd; | ||
804 | |||
805 | mutex_lock(&sysfs_mutex); | ||
806 | |||
807 | if (sd && dir) | ||
808 | sd = sysfs_find_dirent(sd, dir, NULL); | ||
809 | if (sd && attr) | ||
810 | sd = sysfs_find_dirent(sd, attr, NULL); | ||
811 | if (sd) | ||
812 | sysfs_notify_dirent(sd); | ||
813 | |||
814 | mutex_unlock(&sysfs_mutex); | ||
815 | } | ||
816 | EXPORT_SYMBOL_GPL(sysfs_notify); | ||
817 | |||
818 | const struct file_operations sysfs_file_operations = { | ||
819 | .read = seq_read, | ||
820 | .write = sysfs_write_file, | ||
821 | .llseek = generic_file_llseek, | ||
822 | .open = sysfs_open_file, | ||
823 | .release = sysfs_release, | ||
824 | .poll = sysfs_poll, | ||
825 | }; | ||
826 | |||
827 | const struct file_operations sysfs_bin_operations = { | ||
828 | .read = sysfs_bin_read, | ||
829 | .write = sysfs_write_file, | ||
830 | .llseek = generic_file_llseek, | ||
831 | .mmap = sysfs_bin_mmap, | ||
832 | .open = sysfs_open_file, | ||
833 | .release = sysfs_release, | ||
834 | .poll = sysfs_poll, | ||
835 | }; | ||
836 | |||
837 | int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd, | ||
838 | const struct attribute *attr, int type, | ||
839 | umode_t amode, const void *ns) | ||
840 | { | ||
841 | umode_t mode = (amode & S_IALLUGO) | S_IFREG; | ||
842 | struct sysfs_addrm_cxt acxt; | ||
843 | struct sysfs_dirent *sd; | ||
844 | int rc; | ||
845 | |||
846 | sd = sysfs_new_dirent(attr->name, mode, type); | ||
847 | if (!sd) | ||
848 | return -ENOMEM; | ||
849 | |||
850 | sd->s_ns = ns; | ||
851 | sd->s_attr.attr = (void *)attr; | ||
852 | sysfs_dirent_init_lockdep(sd); | ||
853 | |||
854 | sysfs_addrm_start(&acxt); | ||
855 | rc = sysfs_add_one(&acxt, sd, dir_sd); | ||
856 | sysfs_addrm_finish(&acxt); | ||
857 | |||
858 | if (rc) | ||
859 | sysfs_put(sd); | ||
860 | |||
861 | return rc; | ||
862 | } | 263 | } |
863 | 264 | ||
864 | 265 | int sysfs_add_file(struct kernfs_node *parent, const struct attribute *attr, | |
865 | int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr, | 266 | bool is_bin) |
866 | int type) | ||
867 | { | 267 | { |
868 | return sysfs_add_file_mode_ns(dir_sd, attr, type, attr->mode, NULL); | 268 | return sysfs_add_file_mode_ns(parent, attr, is_bin, attr->mode, NULL); |
869 | } | 269 | } |
870 | 270 | ||
871 | /** | 271 | /** |
@@ -879,8 +279,7 @@ int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, | |||
879 | { | 279 | { |
880 | BUG_ON(!kobj || !kobj->sd || !attr); | 280 | BUG_ON(!kobj || !kobj->sd || !attr); |
881 | 281 | ||
882 | return sysfs_add_file_mode_ns(kobj->sd, attr, SYSFS_KOBJ_ATTR, | 282 | return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, ns); |
883 | attr->mode, ns); | ||
884 | 283 | ||
885 | } | 284 | } |
886 | EXPORT_SYMBOL_GPL(sysfs_create_file_ns); | 285 | EXPORT_SYMBOL_GPL(sysfs_create_file_ns); |
@@ -908,19 +307,21 @@ EXPORT_SYMBOL_GPL(sysfs_create_files); | |||
908 | int sysfs_add_file_to_group(struct kobject *kobj, | 307 | int sysfs_add_file_to_group(struct kobject *kobj, |
909 | const struct attribute *attr, const char *group) | 308 | const struct attribute *attr, const char *group) |
910 | { | 309 | { |
911 | struct sysfs_dirent *dir_sd; | 310 | struct kernfs_node *parent; |
912 | int error; | 311 | int error; |
913 | 312 | ||
914 | if (group) | 313 | if (group) { |
915 | dir_sd = sysfs_get_dirent(kobj->sd, group); | 314 | parent = kernfs_find_and_get(kobj->sd, group); |
916 | else | 315 | } else { |
917 | dir_sd = sysfs_get(kobj->sd); | 316 | parent = kobj->sd; |
317 | kernfs_get(parent); | ||
318 | } | ||
918 | 319 | ||
919 | if (!dir_sd) | 320 | if (!parent) |
920 | return -ENOENT; | 321 | return -ENOENT; |
921 | 322 | ||
922 | error = sysfs_add_file(dir_sd, attr, SYSFS_KOBJ_ATTR); | 323 | error = sysfs_add_file(parent, attr, false); |
923 | sysfs_put(dir_sd); | 324 | kernfs_put(parent); |
924 | 325 | ||
925 | return error; | 326 | return error; |
926 | } | 327 | } |
@@ -936,23 +337,20 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group); | |||
936 | int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, | 337 | int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, |
937 | umode_t mode) | 338 | umode_t mode) |
938 | { | 339 | { |
939 | struct sysfs_dirent *sd; | 340 | struct kernfs_node *kn; |
940 | struct iattr newattrs; | 341 | struct iattr newattrs; |
941 | int rc; | 342 | int rc; |
942 | 343 | ||
943 | mutex_lock(&sysfs_mutex); | 344 | kn = kernfs_find_and_get(kobj->sd, attr->name); |
944 | 345 | if (!kn) | |
945 | rc = -ENOENT; | 346 | return -ENOENT; |
946 | sd = sysfs_find_dirent(kobj->sd, attr->name, NULL); | ||
947 | if (!sd) | ||
948 | goto out; | ||
949 | 347 | ||
950 | newattrs.ia_mode = (mode & S_IALLUGO) | (sd->s_mode & ~S_IALLUGO); | 348 | newattrs.ia_mode = (mode & S_IALLUGO) | (kn->mode & ~S_IALLUGO); |
951 | newattrs.ia_valid = ATTR_MODE; | 349 | newattrs.ia_valid = ATTR_MODE; |
952 | rc = sysfs_sd_setattr(sd, &newattrs); | ||
953 | 350 | ||
954 | out: | 351 | rc = kernfs_setattr(kn, &newattrs); |
955 | mutex_unlock(&sysfs_mutex); | 352 | |
353 | kernfs_put(kn); | ||
956 | return rc; | 354 | return rc; |
957 | } | 355 | } |
958 | EXPORT_SYMBOL_GPL(sysfs_chmod_file); | 356 | EXPORT_SYMBOL_GPL(sysfs_chmod_file); |
@@ -968,9 +366,9 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file); | |||
968 | void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, | 366 | void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, |
969 | const void *ns) | 367 | const void *ns) |
970 | { | 368 | { |
971 | struct sysfs_dirent *dir_sd = kobj->sd; | 369 | struct kernfs_node *parent = kobj->sd; |
972 | 370 | ||
973 | sysfs_hash_and_remove(dir_sd, attr->name, ns); | 371 | kernfs_remove_by_name_ns(parent, attr->name, ns); |
974 | } | 372 | } |
975 | EXPORT_SYMBOL_GPL(sysfs_remove_file_ns); | 373 | EXPORT_SYMBOL_GPL(sysfs_remove_file_ns); |
976 | 374 | ||
@@ -991,15 +389,18 @@ EXPORT_SYMBOL_GPL(sysfs_remove_files); | |||
991 | void sysfs_remove_file_from_group(struct kobject *kobj, | 389 | void sysfs_remove_file_from_group(struct kobject *kobj, |
992 | const struct attribute *attr, const char *group) | 390 | const struct attribute *attr, const char *group) |
993 | { | 391 | { |
994 | struct sysfs_dirent *dir_sd; | 392 | struct kernfs_node *parent; |
995 | 393 | ||
996 | if (group) | 394 | if (group) { |
997 | dir_sd = sysfs_get_dirent(kobj->sd, group); | 395 | parent = kernfs_find_and_get(kobj->sd, group); |
998 | else | 396 | } else { |
999 | dir_sd = sysfs_get(kobj->sd); | 397 | parent = kobj->sd; |
1000 | if (dir_sd) { | 398 | kernfs_get(parent); |
1001 | sysfs_hash_and_remove(dir_sd, attr->name, NULL); | 399 | } |
1002 | sysfs_put(dir_sd); | 400 | |
401 | if (parent) { | ||
402 | kernfs_remove_by_name(parent, attr->name); | ||
403 | kernfs_put(parent); | ||
1003 | } | 404 | } |
1004 | } | 405 | } |
1005 | EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); | 406 | EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); |
@@ -1014,7 +415,7 @@ int sysfs_create_bin_file(struct kobject *kobj, | |||
1014 | { | 415 | { |
1015 | BUG_ON(!kobj || !kobj->sd || !attr); | 416 | BUG_ON(!kobj || !kobj->sd || !attr); |
1016 | 417 | ||
1017 | return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR); | 418 | return sysfs_add_file(kobj->sd, &attr->attr, true); |
1018 | } | 419 | } |
1019 | EXPORT_SYMBOL_GPL(sysfs_create_bin_file); | 420 | EXPORT_SYMBOL_GPL(sysfs_create_bin_file); |
1020 | 421 | ||
@@ -1026,7 +427,7 @@ EXPORT_SYMBOL_GPL(sysfs_create_bin_file); | |||
1026 | void sysfs_remove_bin_file(struct kobject *kobj, | 427 | void sysfs_remove_bin_file(struct kobject *kobj, |
1027 | const struct bin_attribute *attr) | 428 | const struct bin_attribute *attr) |
1028 | { | 429 | { |
1029 | sysfs_hash_and_remove(kobj->sd, attr->attr.name, NULL); | 430 | kernfs_remove_by_name(kobj->sd, attr->attr.name); |
1030 | } | 431 | } |
1031 | EXPORT_SYMBOL_GPL(sysfs_remove_bin_file); | 432 | EXPORT_SYMBOL_GPL(sysfs_remove_bin_file); |
1032 | 433 | ||
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 1898a10e38ce..6b579387c67a 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c | |||
@@ -18,7 +18,7 @@ | |||
18 | #include "sysfs.h" | 18 | #include "sysfs.h" |
19 | 19 | ||
20 | 20 | ||
21 | static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, | 21 | static void remove_files(struct kernfs_node *parent, struct kobject *kobj, |
22 | const struct attribute_group *grp) | 22 | const struct attribute_group *grp) |
23 | { | 23 | { |
24 | struct attribute *const *attr; | 24 | struct attribute *const *attr; |
@@ -26,13 +26,13 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, | |||
26 | 26 | ||
27 | if (grp->attrs) | 27 | if (grp->attrs) |
28 | for (attr = grp->attrs; *attr; attr++) | 28 | for (attr = grp->attrs; *attr; attr++) |
29 | sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL); | 29 | kernfs_remove_by_name(parent, (*attr)->name); |
30 | if (grp->bin_attrs) | 30 | if (grp->bin_attrs) |
31 | for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) | 31 | for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) |
32 | sysfs_remove_bin_file(kobj, *bin_attr); | 32 | sysfs_remove_bin_file(kobj, *bin_attr); |
33 | } | 33 | } |
34 | 34 | ||
35 | static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, | 35 | static int create_files(struct kernfs_node *parent, struct kobject *kobj, |
36 | const struct attribute_group *grp, int update) | 36 | const struct attribute_group *grp, int update) |
37 | { | 37 | { |
38 | struct attribute *const *attr; | 38 | struct attribute *const *attr; |
@@ -49,22 +49,20 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, | |||
49 | * re-adding (if required) the file. | 49 | * re-adding (if required) the file. |
50 | */ | 50 | */ |
51 | if (update) | 51 | if (update) |
52 | sysfs_hash_and_remove(dir_sd, (*attr)->name, | 52 | kernfs_remove_by_name(parent, (*attr)->name); |
53 | NULL); | ||
54 | if (grp->is_visible) { | 53 | if (grp->is_visible) { |
55 | mode = grp->is_visible(kobj, *attr, i); | 54 | mode = grp->is_visible(kobj, *attr, i); |
56 | if (!mode) | 55 | if (!mode) |
57 | continue; | 56 | continue; |
58 | } | 57 | } |
59 | error = sysfs_add_file_mode_ns(dir_sd, *attr, | 58 | error = sysfs_add_file_mode_ns(parent, *attr, false, |
60 | SYSFS_KOBJ_ATTR, | ||
61 | (*attr)->mode | mode, | 59 | (*attr)->mode | mode, |
62 | NULL); | 60 | NULL); |
63 | if (unlikely(error)) | 61 | if (unlikely(error)) |
64 | break; | 62 | break; |
65 | } | 63 | } |
66 | if (error) { | 64 | if (error) { |
67 | remove_files(dir_sd, kobj, grp); | 65 | remove_files(parent, kobj, grp); |
68 | goto exit; | 66 | goto exit; |
69 | } | 67 | } |
70 | } | 68 | } |
@@ -78,7 +76,7 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, | |||
78 | break; | 76 | break; |
79 | } | 77 | } |
80 | if (error) | 78 | if (error) |
81 | remove_files(dir_sd, kobj, grp); | 79 | remove_files(parent, kobj, grp); |
82 | } | 80 | } |
83 | exit: | 81 | exit: |
84 | return error; | 82 | return error; |
@@ -88,7 +86,7 @@ exit: | |||
88 | static int internal_create_group(struct kobject *kobj, int update, | 86 | static int internal_create_group(struct kobject *kobj, int update, |
89 | const struct attribute_group *grp) | 87 | const struct attribute_group *grp) |
90 | { | 88 | { |
91 | struct sysfs_dirent *sd; | 89 | struct kernfs_node *kn; |
92 | int error; | 90 | int error; |
93 | 91 | ||
94 | BUG_ON(!kobj || (!update && !kobj->sd)); | 92 | BUG_ON(!kobj || (!update && !kobj->sd)); |
@@ -102,18 +100,22 @@ static int internal_create_group(struct kobject *kobj, int update, | |||
102 | return -EINVAL; | 100 | return -EINVAL; |
103 | } | 101 | } |
104 | if (grp->name) { | 102 | if (grp->name) { |
105 | error = sysfs_create_subdir(kobj, grp->name, &sd); | 103 | kn = kernfs_create_dir(kobj->sd, grp->name, |
106 | if (error) | 104 | S_IRWXU | S_IRUGO | S_IXUGO, kobj); |
107 | return error; | 105 | if (IS_ERR(kn)) { |
106 | if (PTR_ERR(kn) == -EEXIST) | ||
107 | sysfs_warn_dup(kobj->sd, grp->name); | ||
108 | return PTR_ERR(kn); | ||
109 | } | ||
108 | } else | 110 | } else |
109 | sd = kobj->sd; | 111 | kn = kobj->sd; |
110 | sysfs_get(sd); | 112 | kernfs_get(kn); |
111 | error = create_files(sd, kobj, grp, update); | 113 | error = create_files(kn, kobj, grp, update); |
112 | if (error) { | 114 | if (error) { |
113 | if (grp->name) | 115 | if (grp->name) |
114 | sysfs_remove(sd); | 116 | kernfs_remove(kn); |
115 | } | 117 | } |
116 | sysfs_put(sd); | 118 | kernfs_put(kn); |
117 | return error; | 119 | return error; |
118 | } | 120 | } |
119 | 121 | ||
@@ -203,25 +205,27 @@ EXPORT_SYMBOL_GPL(sysfs_update_group); | |||
203 | void sysfs_remove_group(struct kobject *kobj, | 205 | void sysfs_remove_group(struct kobject *kobj, |
204 | const struct attribute_group *grp) | 206 | const struct attribute_group *grp) |
205 | { | 207 | { |
206 | struct sysfs_dirent *dir_sd = kobj->sd; | 208 | struct kernfs_node *parent = kobj->sd; |
207 | struct sysfs_dirent *sd; | 209 | struct kernfs_node *kn; |
208 | 210 | ||
209 | if (grp->name) { | 211 | if (grp->name) { |
210 | sd = sysfs_get_dirent(dir_sd, grp->name); | 212 | kn = kernfs_find_and_get(parent, grp->name); |
211 | if (!sd) { | 213 | if (!kn) { |
212 | WARN(!sd, KERN_WARNING | 214 | WARN(!kn, KERN_WARNING |
213 | "sysfs group %p not found for kobject '%s'\n", | 215 | "sysfs group %p not found for kobject '%s'\n", |
214 | grp, kobject_name(kobj)); | 216 | grp, kobject_name(kobj)); |
215 | return; | 217 | return; |
216 | } | 218 | } |
217 | } else | 219 | } else { |
218 | sd = sysfs_get(dir_sd); | 220 | kn = parent; |
221 | kernfs_get(kn); | ||
222 | } | ||
219 | 223 | ||
220 | remove_files(sd, kobj, grp); | 224 | remove_files(kn, kobj, grp); |
221 | if (grp->name) | 225 | if (grp->name) |
222 | sysfs_remove(sd); | 226 | kernfs_remove(kn); |
223 | 227 | ||
224 | sysfs_put(sd); | 228 | kernfs_put(kn); |
225 | } | 229 | } |
226 | EXPORT_SYMBOL_GPL(sysfs_remove_group); | 230 | EXPORT_SYMBOL_GPL(sysfs_remove_group); |
227 | 231 | ||
@@ -257,22 +261,22 @@ EXPORT_SYMBOL_GPL(sysfs_remove_groups); | |||
257 | int sysfs_merge_group(struct kobject *kobj, | 261 | int sysfs_merge_group(struct kobject *kobj, |
258 | const struct attribute_group *grp) | 262 | const struct attribute_group *grp) |
259 | { | 263 | { |
260 | struct sysfs_dirent *dir_sd; | 264 | struct kernfs_node *parent; |
261 | int error = 0; | 265 | int error = 0; |
262 | struct attribute *const *attr; | 266 | struct attribute *const *attr; |
263 | int i; | 267 | int i; |
264 | 268 | ||
265 | dir_sd = sysfs_get_dirent(kobj->sd, grp->name); | 269 | parent = kernfs_find_and_get(kobj->sd, grp->name); |
266 | if (!dir_sd) | 270 | if (!parent) |
267 | return -ENOENT; | 271 | return -ENOENT; |
268 | 272 | ||
269 | for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr)) | 273 | for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr)) |
270 | error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR); | 274 | error = sysfs_add_file(parent, *attr, false); |
271 | if (error) { | 275 | if (error) { |
272 | while (--i >= 0) | 276 | while (--i >= 0) |
273 | sysfs_hash_and_remove(dir_sd, (*--attr)->name, NULL); | 277 | kernfs_remove_by_name(parent, (*--attr)->name); |
274 | } | 278 | } |
275 | sysfs_put(dir_sd); | 279 | kernfs_put(parent); |
276 | 280 | ||
277 | return error; | 281 | return error; |
278 | } | 282 | } |
@@ -286,14 +290,14 @@ EXPORT_SYMBOL_GPL(sysfs_merge_group); | |||
286 | void sysfs_unmerge_group(struct kobject *kobj, | 290 | void sysfs_unmerge_group(struct kobject *kobj, |
287 | const struct attribute_group *grp) | 291 | const struct attribute_group *grp) |
288 | { | 292 | { |
289 | struct sysfs_dirent *dir_sd; | 293 | struct kernfs_node *parent; |
290 | struct attribute *const *attr; | 294 | struct attribute *const *attr; |
291 | 295 | ||
292 | dir_sd = sysfs_get_dirent(kobj->sd, grp->name); | 296 | parent = kernfs_find_and_get(kobj->sd, grp->name); |
293 | if (dir_sd) { | 297 | if (parent) { |
294 | for (attr = grp->attrs; *attr; ++attr) | 298 | for (attr = grp->attrs; *attr; ++attr) |
295 | sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL); | 299 | kernfs_remove_by_name(parent, (*attr)->name); |
296 | sysfs_put(dir_sd); | 300 | kernfs_put(parent); |
297 | } | 301 | } |
298 | } | 302 | } |
299 | EXPORT_SYMBOL_GPL(sysfs_unmerge_group); | 303 | EXPORT_SYMBOL_GPL(sysfs_unmerge_group); |
@@ -308,15 +312,15 @@ EXPORT_SYMBOL_GPL(sysfs_unmerge_group); | |||
308 | int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name, | 312 | int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name, |
309 | struct kobject *target, const char *link_name) | 313 | struct kobject *target, const char *link_name) |
310 | { | 314 | { |
311 | struct sysfs_dirent *dir_sd; | 315 | struct kernfs_node *parent; |
312 | int error = 0; | 316 | int error = 0; |
313 | 317 | ||
314 | dir_sd = sysfs_get_dirent(kobj->sd, group_name); | 318 | parent = kernfs_find_and_get(kobj->sd, group_name); |
315 | if (!dir_sd) | 319 | if (!parent) |
316 | return -ENOENT; | 320 | return -ENOENT; |
317 | 321 | ||
318 | error = sysfs_create_link_sd(dir_sd, target, link_name); | 322 | error = sysfs_create_link_sd(parent, target, link_name); |
319 | sysfs_put(dir_sd); | 323 | kernfs_put(parent); |
320 | 324 | ||
321 | return error; | 325 | return error; |
322 | } | 326 | } |
@@ -331,12 +335,12 @@ EXPORT_SYMBOL_GPL(sysfs_add_link_to_group); | |||
331 | void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name, | 335 | void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name, |
332 | const char *link_name) | 336 | const char *link_name) |
333 | { | 337 | { |
334 | struct sysfs_dirent *dir_sd; | 338 | struct kernfs_node *parent; |
335 | 339 | ||
336 | dir_sd = sysfs_get_dirent(kobj->sd, group_name); | 340 | parent = kernfs_find_and_get(kobj->sd, group_name); |
337 | if (dir_sd) { | 341 | if (parent) { |
338 | sysfs_hash_and_remove(dir_sd, link_name, NULL); | 342 | kernfs_remove_by_name(parent, link_name); |
339 | sysfs_put(dir_sd); | 343 | kernfs_put(parent); |
340 | } | 344 | } |
341 | } | 345 | } |
342 | EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group); | 346 | EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group); |
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c deleted file mode 100644 index 1750f790af3b..000000000000 --- a/fs/sysfs/inode.c +++ /dev/null | |||
@@ -1,331 +0,0 @@ | |||
1 | /* | ||
2 | * fs/sysfs/inode.c - basic sysfs inode and dentry operations | ||
3 | * | ||
4 | * Copyright (c) 2001-3 Patrick Mochel | ||
5 | * Copyright (c) 2007 SUSE Linux Products GmbH | ||
6 | * Copyright (c) 2007 Tejun Heo <teheo@suse.de> | ||
7 | * | ||
8 | * This file is released under the GPLv2. | ||
9 | * | ||
10 | * Please see Documentation/filesystems/sysfs.txt for more information. | ||
11 | */ | ||
12 | |||
13 | #undef DEBUG | ||
14 | |||
15 | #include <linux/pagemap.h> | ||
16 | #include <linux/namei.h> | ||
17 | #include <linux/backing-dev.h> | ||
18 | #include <linux/capability.h> | ||
19 | #include <linux/errno.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/slab.h> | ||
22 | #include <linux/sysfs.h> | ||
23 | #include <linux/xattr.h> | ||
24 | #include <linux/security.h> | ||
25 | #include "sysfs.h" | ||
26 | |||
27 | static const struct address_space_operations sysfs_aops = { | ||
28 | .readpage = simple_readpage, | ||
29 | .write_begin = simple_write_begin, | ||
30 | .write_end = simple_write_end, | ||
31 | }; | ||
32 | |||
33 | static struct backing_dev_info sysfs_backing_dev_info = { | ||
34 | .name = "sysfs", | ||
35 | .ra_pages = 0, /* No readahead */ | ||
36 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, | ||
37 | }; | ||
38 | |||
39 | static const struct inode_operations sysfs_inode_operations = { | ||
40 | .permission = sysfs_permission, | ||
41 | .setattr = sysfs_setattr, | ||
42 | .getattr = sysfs_getattr, | ||
43 | .setxattr = sysfs_setxattr, | ||
44 | }; | ||
45 | |||
46 | int __init sysfs_inode_init(void) | ||
47 | { | ||
48 | return bdi_init(&sysfs_backing_dev_info); | ||
49 | } | ||
50 | |||
51 | static struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd) | ||
52 | { | ||
53 | struct sysfs_inode_attrs *attrs; | ||
54 | struct iattr *iattrs; | ||
55 | |||
56 | attrs = kzalloc(sizeof(struct sysfs_inode_attrs), GFP_KERNEL); | ||
57 | if (!attrs) | ||
58 | return NULL; | ||
59 | iattrs = &attrs->ia_iattr; | ||
60 | |||
61 | /* assign default attributes */ | ||
62 | iattrs->ia_mode = sd->s_mode; | ||
63 | iattrs->ia_uid = GLOBAL_ROOT_UID; | ||
64 | iattrs->ia_gid = GLOBAL_ROOT_GID; | ||
65 | iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME; | ||
66 | |||
67 | return attrs; | ||
68 | } | ||
69 | |||
70 | int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr) | ||
71 | { | ||
72 | struct sysfs_inode_attrs *sd_attrs; | ||
73 | struct iattr *iattrs; | ||
74 | unsigned int ia_valid = iattr->ia_valid; | ||
75 | |||
76 | sd_attrs = sd->s_iattr; | ||
77 | |||
78 | if (!sd_attrs) { | ||
79 | /* setting attributes for the first time, allocate now */ | ||
80 | sd_attrs = sysfs_init_inode_attrs(sd); | ||
81 | if (!sd_attrs) | ||
82 | return -ENOMEM; | ||
83 | sd->s_iattr = sd_attrs; | ||
84 | } | ||
85 | /* attributes were changed at least once in past */ | ||
86 | iattrs = &sd_attrs->ia_iattr; | ||
87 | |||
88 | if (ia_valid & ATTR_UID) | ||
89 | iattrs->ia_uid = iattr->ia_uid; | ||
90 | if (ia_valid & ATTR_GID) | ||
91 | iattrs->ia_gid = iattr->ia_gid; | ||
92 | if (ia_valid & ATTR_ATIME) | ||
93 | iattrs->ia_atime = iattr->ia_atime; | ||
94 | if (ia_valid & ATTR_MTIME) | ||
95 | iattrs->ia_mtime = iattr->ia_mtime; | ||
96 | if (ia_valid & ATTR_CTIME) | ||
97 | iattrs->ia_ctime = iattr->ia_ctime; | ||
98 | if (ia_valid & ATTR_MODE) { | ||
99 | umode_t mode = iattr->ia_mode; | ||
100 | iattrs->ia_mode = sd->s_mode = mode; | ||
101 | } | ||
102 | return 0; | ||
103 | } | ||
104 | |||
105 | int sysfs_setattr(struct dentry *dentry, struct iattr *iattr) | ||
106 | { | ||
107 | struct inode *inode = dentry->d_inode; | ||
108 | struct sysfs_dirent *sd = dentry->d_fsdata; | ||
109 | int error; | ||
110 | |||
111 | if (!sd) | ||
112 | return -EINVAL; | ||
113 | |||
114 | mutex_lock(&sysfs_mutex); | ||
115 | error = inode_change_ok(inode, iattr); | ||
116 | if (error) | ||
117 | goto out; | ||
118 | |||
119 | error = sysfs_sd_setattr(sd, iattr); | ||
120 | if (error) | ||
121 | goto out; | ||
122 | |||
123 | /* this ignores size changes */ | ||
124 | setattr_copy(inode, iattr); | ||
125 | |||
126 | out: | ||
127 | mutex_unlock(&sysfs_mutex); | ||
128 | return error; | ||
129 | } | ||
130 | |||
131 | static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata, | ||
132 | u32 *secdata_len) | ||
133 | { | ||
134 | struct sysfs_inode_attrs *iattrs; | ||
135 | void *old_secdata; | ||
136 | size_t old_secdata_len; | ||
137 | |||
138 | if (!sd->s_iattr) { | ||
139 | sd->s_iattr = sysfs_init_inode_attrs(sd); | ||
140 | if (!sd->s_iattr) | ||
141 | return -ENOMEM; | ||
142 | } | ||
143 | |||
144 | iattrs = sd->s_iattr; | ||
145 | old_secdata = iattrs->ia_secdata; | ||
146 | old_secdata_len = iattrs->ia_secdata_len; | ||
147 | |||
148 | iattrs->ia_secdata = *secdata; | ||
149 | iattrs->ia_secdata_len = *secdata_len; | ||
150 | |||
151 | *secdata = old_secdata; | ||
152 | *secdata_len = old_secdata_len; | ||
153 | return 0; | ||
154 | } | ||
155 | |||
156 | int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, | ||
157 | size_t size, int flags) | ||
158 | { | ||
159 | struct sysfs_dirent *sd = dentry->d_fsdata; | ||
160 | void *secdata; | ||
161 | int error; | ||
162 | u32 secdata_len = 0; | ||
163 | |||
164 | if (!sd) | ||
165 | return -EINVAL; | ||
166 | |||
167 | if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) { | ||
168 | const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; | ||
169 | error = security_inode_setsecurity(dentry->d_inode, suffix, | ||
170 | value, size, flags); | ||
171 | if (error) | ||
172 | goto out; | ||
173 | error = security_inode_getsecctx(dentry->d_inode, | ||
174 | &secdata, &secdata_len); | ||
175 | if (error) | ||
176 | goto out; | ||
177 | |||
178 | mutex_lock(&sysfs_mutex); | ||
179 | error = sysfs_sd_setsecdata(sd, &secdata, &secdata_len); | ||
180 | mutex_unlock(&sysfs_mutex); | ||
181 | |||
182 | if (secdata) | ||
183 | security_release_secctx(secdata, secdata_len); | ||
184 | } else | ||
185 | return -EINVAL; | ||
186 | out: | ||
187 | return error; | ||
188 | } | ||
189 | |||
190 | static inline void set_default_inode_attr(struct inode *inode, umode_t mode) | ||
191 | { | ||
192 | inode->i_mode = mode; | ||
193 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
194 | } | ||
195 | |||
196 | static inline void set_inode_attr(struct inode *inode, struct iattr *iattr) | ||
197 | { | ||
198 | inode->i_uid = iattr->ia_uid; | ||
199 | inode->i_gid = iattr->ia_gid; | ||
200 | inode->i_atime = iattr->ia_atime; | ||
201 | inode->i_mtime = iattr->ia_mtime; | ||
202 | inode->i_ctime = iattr->ia_ctime; | ||
203 | } | ||
204 | |||
205 | static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode) | ||
206 | { | ||
207 | struct sysfs_inode_attrs *iattrs = sd->s_iattr; | ||
208 | |||
209 | inode->i_mode = sd->s_mode; | ||
210 | if (iattrs) { | ||
211 | /* sysfs_dirent has non-default attributes | ||
212 | * get them from persistent copy in sysfs_dirent | ||
213 | */ | ||
214 | set_inode_attr(inode, &iattrs->ia_iattr); | ||
215 | security_inode_notifysecctx(inode, | ||
216 | iattrs->ia_secdata, | ||
217 | iattrs->ia_secdata_len); | ||
218 | } | ||
219 | |||
220 | if (sysfs_type(sd) == SYSFS_DIR) | ||
221 | set_nlink(inode, sd->s_dir.subdirs + 2); | ||
222 | } | ||
223 | |||
224 | int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
225 | struct kstat *stat) | ||
226 | { | ||
227 | struct sysfs_dirent *sd = dentry->d_fsdata; | ||
228 | struct inode *inode = dentry->d_inode; | ||
229 | |||
230 | mutex_lock(&sysfs_mutex); | ||
231 | sysfs_refresh_inode(sd, inode); | ||
232 | mutex_unlock(&sysfs_mutex); | ||
233 | |||
234 | generic_fillattr(inode, stat); | ||
235 | return 0; | ||
236 | } | ||
237 | |||
238 | static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) | ||
239 | { | ||
240 | struct bin_attribute *bin_attr; | ||
241 | |||
242 | inode->i_private = sysfs_get(sd); | ||
243 | inode->i_mapping->a_ops = &sysfs_aops; | ||
244 | inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; | ||
245 | inode->i_op = &sysfs_inode_operations; | ||
246 | |||
247 | set_default_inode_attr(inode, sd->s_mode); | ||
248 | sysfs_refresh_inode(sd, inode); | ||
249 | |||
250 | /* initialize inode according to type */ | ||
251 | switch (sysfs_type(sd)) { | ||
252 | case SYSFS_DIR: | ||
253 | inode->i_op = &sysfs_dir_inode_operations; | ||
254 | inode->i_fop = &sysfs_dir_operations; | ||
255 | break; | ||
256 | case SYSFS_KOBJ_ATTR: | ||
257 | inode->i_size = PAGE_SIZE; | ||
258 | inode->i_fop = &sysfs_file_operations; | ||
259 | break; | ||
260 | case SYSFS_KOBJ_BIN_ATTR: | ||
261 | bin_attr = sd->s_attr.bin_attr; | ||
262 | inode->i_size = bin_attr->size; | ||
263 | inode->i_fop = &sysfs_bin_operations; | ||
264 | break; | ||
265 | case SYSFS_KOBJ_LINK: | ||
266 | inode->i_op = &sysfs_symlink_inode_operations; | ||
267 | break; | ||
268 | default: | ||
269 | BUG(); | ||
270 | } | ||
271 | |||
272 | unlock_new_inode(inode); | ||
273 | } | ||
274 | |||
275 | /** | ||
276 | * sysfs_get_inode - get inode for sysfs_dirent | ||
277 | * @sb: super block | ||
278 | * @sd: sysfs_dirent to allocate inode for | ||
279 | * | ||
280 | * Get inode for @sd. If such inode doesn't exist, a new inode | ||
281 | * is allocated and basics are initialized. New inode is | ||
282 | * returned locked. | ||
283 | * | ||
284 | * LOCKING: | ||
285 | * Kernel thread context (may sleep). | ||
286 | * | ||
287 | * RETURNS: | ||
288 | * Pointer to allocated inode on success, NULL on failure. | ||
289 | */ | ||
290 | struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd) | ||
291 | { | ||
292 | struct inode *inode; | ||
293 | |||
294 | inode = iget_locked(sb, sd->s_ino); | ||
295 | if (inode && (inode->i_state & I_NEW)) | ||
296 | sysfs_init_inode(sd, inode); | ||
297 | |||
298 | return inode; | ||
299 | } | ||
300 | |||
301 | /* | ||
302 | * The sysfs_dirent serves as both an inode and a directory entry for sysfs. | ||
303 | * To prevent the sysfs inode numbers from being freed prematurely we take a | ||
304 | * reference to sysfs_dirent from the sysfs inode. A | ||
305 | * super_operations.evict_inode() implementation is needed to drop that | ||
306 | * reference upon inode destruction. | ||
307 | */ | ||
308 | void sysfs_evict_inode(struct inode *inode) | ||
309 | { | ||
310 | struct sysfs_dirent *sd = inode->i_private; | ||
311 | |||
312 | truncate_inode_pages(&inode->i_data, 0); | ||
313 | clear_inode(inode); | ||
314 | sysfs_put(sd); | ||
315 | } | ||
316 | |||
317 | int sysfs_permission(struct inode *inode, int mask) | ||
318 | { | ||
319 | struct sysfs_dirent *sd; | ||
320 | |||
321 | if (mask & MAY_NOT_BLOCK) | ||
322 | return -ECHILD; | ||
323 | |||
324 | sd = inode->i_private; | ||
325 | |||
326 | mutex_lock(&sysfs_mutex); | ||
327 | sysfs_refresh_inode(sd, inode); | ||
328 | mutex_unlock(&sysfs_mutex); | ||
329 | |||
330 | return generic_permission(inode, mask); | ||
331 | } | ||
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 834ec2cdb7a3..6211230814fd 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c | |||
@@ -14,146 +14,41 @@ | |||
14 | 14 | ||
15 | #include <linux/fs.h> | 15 | #include <linux/fs.h> |
16 | #include <linux/mount.h> | 16 | #include <linux/mount.h> |
17 | #include <linux/pagemap.h> | ||
18 | #include <linux/init.h> | 17 | #include <linux/init.h> |
19 | #include <linux/module.h> | ||
20 | #include <linux/magic.h> | ||
21 | #include <linux/slab.h> | ||
22 | #include <linux/user_namespace.h> | 18 | #include <linux/user_namespace.h> |
23 | 19 | ||
24 | #include "sysfs.h" | 20 | #include "sysfs.h" |
25 | 21 | ||
26 | 22 | static struct kernfs_root *sysfs_root; | |
27 | static struct vfsmount *sysfs_mnt; | 23 | struct kernfs_node *sysfs_root_kn; |
28 | struct kmem_cache *sysfs_dir_cachep; | ||
29 | |||
30 | static const struct super_operations sysfs_ops = { | ||
31 | .statfs = simple_statfs, | ||
32 | .drop_inode = generic_delete_inode, | ||
33 | .evict_inode = sysfs_evict_inode, | ||
34 | }; | ||
35 | |||
36 | struct sysfs_dirent sysfs_root = { | ||
37 | .s_name = "", | ||
38 | .s_count = ATOMIC_INIT(1), | ||
39 | .s_flags = SYSFS_DIR | (KOBJ_NS_TYPE_NONE << SYSFS_NS_TYPE_SHIFT), | ||
40 | .s_mode = S_IFDIR | S_IRUGO | S_IXUGO, | ||
41 | .s_ino = 1, | ||
42 | }; | ||
43 | |||
44 | static int sysfs_fill_super(struct super_block *sb, void *data, int silent) | ||
45 | { | ||
46 | struct inode *inode; | ||
47 | struct dentry *root; | ||
48 | |||
49 | sb->s_blocksize = PAGE_CACHE_SIZE; | ||
50 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; | ||
51 | sb->s_magic = SYSFS_MAGIC; | ||
52 | sb->s_op = &sysfs_ops; | ||
53 | sb->s_time_gran = 1; | ||
54 | |||
55 | /* get root inode, initialize and unlock it */ | ||
56 | mutex_lock(&sysfs_mutex); | ||
57 | inode = sysfs_get_inode(sb, &sysfs_root); | ||
58 | mutex_unlock(&sysfs_mutex); | ||
59 | if (!inode) { | ||
60 | pr_debug("sysfs: could not get root inode\n"); | ||
61 | return -ENOMEM; | ||
62 | } | ||
63 | |||
64 | /* instantiate and link root dentry */ | ||
65 | root = d_make_root(inode); | ||
66 | if (!root) { | ||
67 | pr_debug("%s: could not get root dentry!\n", __func__); | ||
68 | return -ENOMEM; | ||
69 | } | ||
70 | root->d_fsdata = &sysfs_root; | ||
71 | sb->s_root = root; | ||
72 | sb->s_d_op = &sysfs_dentry_ops; | ||
73 | return 0; | ||
74 | } | ||
75 | |||
76 | static int sysfs_test_super(struct super_block *sb, void *data) | ||
77 | { | ||
78 | struct sysfs_super_info *sb_info = sysfs_info(sb); | ||
79 | struct sysfs_super_info *info = data; | ||
80 | enum kobj_ns_type type; | ||
81 | int found = 1; | ||
82 | |||
83 | for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) { | ||
84 | if (sb_info->ns[type] != info->ns[type]) | ||
85 | found = 0; | ||
86 | } | ||
87 | return found; | ||
88 | } | ||
89 | |||
90 | static int sysfs_set_super(struct super_block *sb, void *data) | ||
91 | { | ||
92 | int error; | ||
93 | error = set_anon_super(sb, data); | ||
94 | if (!error) | ||
95 | sb->s_fs_info = data; | ||
96 | return error; | ||
97 | } | ||
98 | |||
99 | static void free_sysfs_super_info(struct sysfs_super_info *info) | ||
100 | { | ||
101 | int type; | ||
102 | for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) | ||
103 | kobj_ns_drop(type, info->ns[type]); | ||
104 | kfree(info); | ||
105 | } | ||
106 | 24 | ||
107 | static struct dentry *sysfs_mount(struct file_system_type *fs_type, | 25 | static struct dentry *sysfs_mount(struct file_system_type *fs_type, |
108 | int flags, const char *dev_name, void *data) | 26 | int flags, const char *dev_name, void *data) |
109 | { | 27 | { |
110 | struct sysfs_super_info *info; | 28 | struct dentry *root; |
111 | enum kobj_ns_type type; | 29 | void *ns; |
112 | struct super_block *sb; | ||
113 | int error; | ||
114 | 30 | ||
115 | if (!(flags & MS_KERNMOUNT)) { | 31 | if (!(flags & MS_KERNMOUNT)) { |
116 | if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) | 32 | if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) |
117 | return ERR_PTR(-EPERM); | 33 | return ERR_PTR(-EPERM); |
118 | 34 | ||
119 | for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) { | 35 | if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) |
120 | if (!kobj_ns_current_may_mount(type)) | 36 | return ERR_PTR(-EPERM); |
121 | return ERR_PTR(-EPERM); | ||
122 | } | ||
123 | } | ||
124 | |||
125 | info = kzalloc(sizeof(*info), GFP_KERNEL); | ||
126 | if (!info) | ||
127 | return ERR_PTR(-ENOMEM); | ||
128 | |||
129 | for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) | ||
130 | info->ns[type] = kobj_ns_grab_current(type); | ||
131 | |||
132 | sb = sget(fs_type, sysfs_test_super, sysfs_set_super, flags, info); | ||
133 | if (IS_ERR(sb) || sb->s_fs_info != info) | ||
134 | free_sysfs_super_info(info); | ||
135 | if (IS_ERR(sb)) | ||
136 | return ERR_CAST(sb); | ||
137 | if (!sb->s_root) { | ||
138 | error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); | ||
139 | if (error) { | ||
140 | deactivate_locked_super(sb); | ||
141 | return ERR_PTR(error); | ||
142 | } | ||
143 | sb->s_flags |= MS_ACTIVE; | ||
144 | } | 37 | } |
145 | 38 | ||
146 | return dget(sb->s_root); | 39 | ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); |
40 | root = kernfs_mount_ns(fs_type, flags, sysfs_root, ns); | ||
41 | if (IS_ERR(root)) | ||
42 | kobj_ns_drop(KOBJ_NS_TYPE_NET, ns); | ||
43 | return root; | ||
147 | } | 44 | } |
148 | 45 | ||
149 | static void sysfs_kill_sb(struct super_block *sb) | 46 | static void sysfs_kill_sb(struct super_block *sb) |
150 | { | 47 | { |
151 | struct sysfs_super_info *info = sysfs_info(sb); | 48 | void *ns = (void *)kernfs_super_ns(sb); |
152 | /* Remove the superblock from fs_supers/s_instances | 49 | |
153 | * so we can't find it, before freeing sysfs_super_info. | 50 | kernfs_kill_sb(sb); |
154 | */ | 51 | kobj_ns_drop(KOBJ_NS_TYPE_NET, ns); |
155 | kill_anon_super(sb); | ||
156 | free_sysfs_super_info(info); | ||
157 | } | 52 | } |
158 | 53 | ||
159 | static struct file_system_type sysfs_fs_type = { | 54 | static struct file_system_type sysfs_fs_type = { |
@@ -165,48 +60,19 @@ static struct file_system_type sysfs_fs_type = { | |||
165 | 60 | ||
166 | int __init sysfs_init(void) | 61 | int __init sysfs_init(void) |
167 | { | 62 | { |
168 | int err = -ENOMEM; | 63 | int err; |
169 | 64 | ||
170 | sysfs_dir_cachep = kmem_cache_create("sysfs_dir_cache", | 65 | sysfs_root = kernfs_create_root(NULL, NULL); |
171 | sizeof(struct sysfs_dirent), | 66 | if (IS_ERR(sysfs_root)) |
172 | 0, 0, NULL); | 67 | return PTR_ERR(sysfs_root); |
173 | if (!sysfs_dir_cachep) | ||
174 | goto out; | ||
175 | 68 | ||
176 | err = sysfs_inode_init(); | 69 | sysfs_root_kn = sysfs_root->kn; |
177 | if (err) | ||
178 | goto out_err; | ||
179 | 70 | ||
180 | err = register_filesystem(&sysfs_fs_type); | 71 | err = register_filesystem(&sysfs_fs_type); |
181 | if (!err) { | 72 | if (err) { |
182 | sysfs_mnt = kern_mount(&sysfs_fs_type); | 73 | kernfs_destroy_root(sysfs_root); |
183 | if (IS_ERR(sysfs_mnt)) { | 74 | return err; |
184 | printk(KERN_ERR "sysfs: could not mount!\n"); | 75 | } |
185 | err = PTR_ERR(sysfs_mnt); | ||
186 | sysfs_mnt = NULL; | ||
187 | unregister_filesystem(&sysfs_fs_type); | ||
188 | goto out_err; | ||
189 | } | ||
190 | } else | ||
191 | goto out_err; | ||
192 | out: | ||
193 | return err; | ||
194 | out_err: | ||
195 | kmem_cache_destroy(sysfs_dir_cachep); | ||
196 | sysfs_dir_cachep = NULL; | ||
197 | goto out; | ||
198 | } | ||
199 | |||
200 | #undef sysfs_get | ||
201 | struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd) | ||
202 | { | ||
203 | return __sysfs_get(sd); | ||
204 | } | ||
205 | EXPORT_SYMBOL_GPL(sysfs_get); | ||
206 | 76 | ||
207 | #undef sysfs_put | 77 | return 0; |
208 | void sysfs_put(struct sysfs_dirent *sd) | ||
209 | { | ||
210 | __sysfs_put(sd); | ||
211 | } | 78 | } |
212 | EXPORT_SYMBOL_GPL(sysfs_put); | ||
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 3ae3f1bf1a09..aecb15f84557 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c | |||
@@ -11,109 +11,73 @@ | |||
11 | */ | 11 | */ |
12 | 12 | ||
13 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
14 | #include <linux/gfp.h> | ||
15 | #include <linux/mount.h> | ||
16 | #include <linux/module.h> | 14 | #include <linux/module.h> |
17 | #include <linux/kobject.h> | 15 | #include <linux/kobject.h> |
18 | #include <linux/namei.h> | ||
19 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
20 | #include <linux/security.h> | 17 | #include <linux/security.h> |
21 | 18 | ||
22 | #include "sysfs.h" | 19 | #include "sysfs.h" |
23 | 20 | ||
24 | static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd, | 21 | static int sysfs_do_create_link_sd(struct kernfs_node *parent, |
25 | struct kobject *target, | 22 | struct kobject *target_kobj, |
26 | const char *name, int warn) | 23 | const char *name, int warn) |
27 | { | 24 | { |
28 | struct sysfs_dirent *target_sd = NULL; | 25 | struct kernfs_node *kn, *target = NULL; |
29 | struct sysfs_dirent *sd = NULL; | ||
30 | struct sysfs_addrm_cxt acxt; | ||
31 | enum kobj_ns_type ns_type; | ||
32 | int error; | ||
33 | 26 | ||
34 | BUG_ON(!name || !parent_sd); | 27 | BUG_ON(!name || !parent); |
35 | 28 | ||
36 | /* | 29 | /* |
37 | * We don't own @target and it may be removed at any time. | 30 | * We don't own @target_kobj and it may be removed at any time. |
38 | * Synchronize using sysfs_symlink_target_lock. See | 31 | * Synchronize using sysfs_symlink_target_lock. See |
39 | * sysfs_remove_dir() for details. | 32 | * sysfs_remove_dir() for details. |
40 | */ | 33 | */ |
41 | spin_lock(&sysfs_symlink_target_lock); | 34 | spin_lock(&sysfs_symlink_target_lock); |
42 | if (target->sd) | 35 | if (target_kobj->sd) { |
43 | target_sd = sysfs_get(target->sd); | 36 | target = target_kobj->sd; |
37 | kernfs_get(target); | ||
38 | } | ||
44 | spin_unlock(&sysfs_symlink_target_lock); | 39 | spin_unlock(&sysfs_symlink_target_lock); |
45 | 40 | ||
46 | error = -ENOENT; | 41 | if (!target) |
47 | if (!target_sd) | 42 | return -ENOENT; |
48 | goto out_put; | ||
49 | |||
50 | error = -ENOMEM; | ||
51 | sd = sysfs_new_dirent(name, S_IFLNK|S_IRWXUGO, SYSFS_KOBJ_LINK); | ||
52 | if (!sd) | ||
53 | goto out_put; | ||
54 | 43 | ||
55 | ns_type = sysfs_ns_type(parent_sd); | 44 | kn = kernfs_create_link(parent, name, target); |
56 | if (ns_type) | 45 | kernfs_put(target); |
57 | sd->s_ns = target_sd->s_ns; | ||
58 | sd->s_symlink.target_sd = target_sd; | ||
59 | target_sd = NULL; /* reference is now owned by the symlink */ | ||
60 | |||
61 | sysfs_addrm_start(&acxt); | ||
62 | /* Symlinks must be between directories with the same ns_type */ | ||
63 | if (!ns_type || | ||
64 | (ns_type == sysfs_ns_type(sd->s_symlink.target_sd->s_parent))) { | ||
65 | if (warn) | ||
66 | error = sysfs_add_one(&acxt, sd, parent_sd); | ||
67 | else | ||
68 | error = __sysfs_add_one(&acxt, sd, parent_sd); | ||
69 | } else { | ||
70 | error = -EINVAL; | ||
71 | WARN(1, KERN_WARNING | ||
72 | "sysfs: symlink across ns_types %s/%s -> %s/%s\n", | ||
73 | parent_sd->s_name, | ||
74 | sd->s_name, | ||
75 | sd->s_symlink.target_sd->s_parent->s_name, | ||
76 | sd->s_symlink.target_sd->s_name); | ||
77 | } | ||
78 | sysfs_addrm_finish(&acxt); | ||
79 | 46 | ||
80 | if (error) | 47 | if (!IS_ERR(kn)) |
81 | goto out_put; | 48 | return 0; |
82 | 49 | ||
83 | return 0; | 50 | if (warn && PTR_ERR(kn) == -EEXIST) |
84 | 51 | sysfs_warn_dup(parent, name); | |
85 | out_put: | 52 | return PTR_ERR(kn); |
86 | sysfs_put(target_sd); | ||
87 | sysfs_put(sd); | ||
88 | return error; | ||
89 | } | 53 | } |
90 | 54 | ||
91 | /** | 55 | /** |
92 | * sysfs_create_link_sd - create symlink to a given object. | 56 | * sysfs_create_link_sd - create symlink to a given object. |
93 | * @sd: directory we're creating the link in. | 57 | * @kn: directory we're creating the link in. |
94 | * @target: object we're pointing to. | 58 | * @target: object we're pointing to. |
95 | * @name: name of the symlink. | 59 | * @name: name of the symlink. |
96 | */ | 60 | */ |
97 | int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target, | 61 | int sysfs_create_link_sd(struct kernfs_node *kn, struct kobject *target, |
98 | const char *name) | 62 | const char *name) |
99 | { | 63 | { |
100 | return sysfs_do_create_link_sd(sd, target, name, 1); | 64 | return sysfs_do_create_link_sd(kn, target, name, 1); |
101 | } | 65 | } |
102 | 66 | ||
103 | static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target, | 67 | static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target, |
104 | const char *name, int warn) | 68 | const char *name, int warn) |
105 | { | 69 | { |
106 | struct sysfs_dirent *parent_sd = NULL; | 70 | struct kernfs_node *parent = NULL; |
107 | 71 | ||
108 | if (!kobj) | 72 | if (!kobj) |
109 | parent_sd = &sysfs_root; | 73 | parent = sysfs_root_kn; |
110 | else | 74 | else |
111 | parent_sd = kobj->sd; | 75 | parent = kobj->sd; |
112 | 76 | ||
113 | if (!parent_sd) | 77 | if (!parent) |
114 | return -EFAULT; | 78 | return -EFAULT; |
115 | 79 | ||
116 | return sysfs_do_create_link_sd(parent_sd, target, name, warn); | 80 | return sysfs_do_create_link_sd(parent, target, name, warn); |
117 | } | 81 | } |
118 | 82 | ||
119 | /** | 83 | /** |
@@ -164,10 +128,10 @@ void sysfs_delete_link(struct kobject *kobj, struct kobject *targ, | |||
164 | * sysfs_remove_dir() for details. | 128 | * sysfs_remove_dir() for details. |
165 | */ | 129 | */ |
166 | spin_lock(&sysfs_symlink_target_lock); | 130 | spin_lock(&sysfs_symlink_target_lock); |
167 | if (targ->sd && sysfs_ns_type(kobj->sd)) | 131 | if (targ->sd && kernfs_ns_enabled(kobj->sd)) |
168 | ns = targ->sd->s_ns; | 132 | ns = targ->sd->ns; |
169 | spin_unlock(&sysfs_symlink_target_lock); | 133 | spin_unlock(&sysfs_symlink_target_lock); |
170 | sysfs_hash_and_remove(kobj->sd, name, ns); | 134 | kernfs_remove_by_name_ns(kobj->sd, name, ns); |
171 | } | 135 | } |
172 | 136 | ||
173 | /** | 137 | /** |
@@ -177,14 +141,14 @@ void sysfs_delete_link(struct kobject *kobj, struct kobject *targ, | |||
177 | */ | 141 | */ |
178 | void sysfs_remove_link(struct kobject *kobj, const char *name) | 142 | void sysfs_remove_link(struct kobject *kobj, const char *name) |
179 | { | 143 | { |
180 | struct sysfs_dirent *parent_sd = NULL; | 144 | struct kernfs_node *parent = NULL; |
181 | 145 | ||
182 | if (!kobj) | 146 | if (!kobj) |
183 | parent_sd = &sysfs_root; | 147 | parent = sysfs_root_kn; |
184 | else | 148 | else |
185 | parent_sd = kobj->sd; | 149 | parent = kobj->sd; |
186 | 150 | ||
187 | sysfs_hash_and_remove(parent_sd, name, NULL); | 151 | kernfs_remove_by_name(parent, name); |
188 | } | 152 | } |
189 | EXPORT_SYMBOL_GPL(sysfs_remove_link); | 153 | EXPORT_SYMBOL_GPL(sysfs_remove_link); |
190 | 154 | ||
@@ -201,130 +165,33 @@ EXPORT_SYMBOL_GPL(sysfs_remove_link); | |||
201 | int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ, | 165 | int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ, |
202 | const char *old, const char *new, const void *new_ns) | 166 | const char *old, const char *new, const void *new_ns) |
203 | { | 167 | { |
204 | struct sysfs_dirent *parent_sd, *sd = NULL; | 168 | struct kernfs_node *parent, *kn = NULL; |
205 | const void *old_ns = NULL; | 169 | const void *old_ns = NULL; |
206 | int result; | 170 | int result; |
207 | 171 | ||
208 | if (!kobj) | 172 | if (!kobj) |
209 | parent_sd = &sysfs_root; | 173 | parent = sysfs_root_kn; |
210 | else | 174 | else |
211 | parent_sd = kobj->sd; | 175 | parent = kobj->sd; |
212 | 176 | ||
213 | if (targ->sd) | 177 | if (targ->sd) |
214 | old_ns = targ->sd->s_ns; | 178 | old_ns = targ->sd->ns; |
215 | 179 | ||
216 | result = -ENOENT; | 180 | result = -ENOENT; |
217 | sd = sysfs_get_dirent_ns(parent_sd, old, old_ns); | 181 | kn = kernfs_find_and_get_ns(parent, old, old_ns); |
218 | if (!sd) | 182 | if (!kn) |
219 | goto out; | 183 | goto out; |
220 | 184 | ||
221 | result = -EINVAL; | 185 | result = -EINVAL; |
222 | if (sysfs_type(sd) != SYSFS_KOBJ_LINK) | 186 | if (kernfs_type(kn) != KERNFS_LINK) |
223 | goto out; | 187 | goto out; |
224 | if (sd->s_symlink.target_sd->s_dir.kobj != targ) | 188 | if (kn->symlink.target_kn->priv != targ) |
225 | goto out; | 189 | goto out; |
226 | 190 | ||
227 | result = sysfs_rename(sd, parent_sd, new, new_ns); | 191 | result = kernfs_rename_ns(kn, parent, new, new_ns); |
228 | 192 | ||
229 | out: | 193 | out: |
230 | sysfs_put(sd); | 194 | kernfs_put(kn); |
231 | return result; | 195 | return result; |
232 | } | 196 | } |
233 | EXPORT_SYMBOL_GPL(sysfs_rename_link_ns); | 197 | EXPORT_SYMBOL_GPL(sysfs_rename_link_ns); |
234 | |||
235 | static int sysfs_get_target_path(struct sysfs_dirent *parent_sd, | ||
236 | struct sysfs_dirent *target_sd, char *path) | ||
237 | { | ||
238 | struct sysfs_dirent *base, *sd; | ||
239 | char *s = path; | ||
240 | int len = 0; | ||
241 | |||
242 | /* go up to the root, stop at the base */ | ||
243 | base = parent_sd; | ||
244 | while (base->s_parent) { | ||
245 | sd = target_sd->s_parent; | ||
246 | while (sd->s_parent && base != sd) | ||
247 | sd = sd->s_parent; | ||
248 | |||
249 | if (base == sd) | ||
250 | break; | ||
251 | |||
252 | strcpy(s, "../"); | ||
253 | s += 3; | ||
254 | base = base->s_parent; | ||
255 | } | ||
256 | |||
257 | /* determine end of target string for reverse fillup */ | ||
258 | sd = target_sd; | ||
259 | while (sd->s_parent && sd != base) { | ||
260 | len += strlen(sd->s_name) + 1; | ||
261 | sd = sd->s_parent; | ||
262 | } | ||
263 | |||
264 | /* check limits */ | ||
265 | if (len < 2) | ||
266 | return -EINVAL; | ||
267 | len--; | ||
268 | if ((s - path) + len > PATH_MAX) | ||
269 | return -ENAMETOOLONG; | ||
270 | |||
271 | /* reverse fillup of target string from target to base */ | ||
272 | sd = target_sd; | ||
273 | while (sd->s_parent && sd != base) { | ||
274 | int slen = strlen(sd->s_name); | ||
275 | |||
276 | len -= slen; | ||
277 | strncpy(s + len, sd->s_name, slen); | ||
278 | if (len) | ||
279 | s[--len] = '/'; | ||
280 | |||
281 | sd = sd->s_parent; | ||
282 | } | ||
283 | |||
284 | return 0; | ||
285 | } | ||
286 | |||
287 | static int sysfs_getlink(struct dentry *dentry, char *path) | ||
288 | { | ||
289 | struct sysfs_dirent *sd = dentry->d_fsdata; | ||
290 | struct sysfs_dirent *parent_sd = sd->s_parent; | ||
291 | struct sysfs_dirent *target_sd = sd->s_symlink.target_sd; | ||
292 | int error; | ||
293 | |||
294 | mutex_lock(&sysfs_mutex); | ||
295 | error = sysfs_get_target_path(parent_sd, target_sd, path); | ||
296 | mutex_unlock(&sysfs_mutex); | ||
297 | |||
298 | return error; | ||
299 | } | ||
300 | |||
301 | static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
302 | { | ||
303 | int error = -ENOMEM; | ||
304 | unsigned long page = get_zeroed_page(GFP_KERNEL); | ||
305 | if (page) { | ||
306 | error = sysfs_getlink(dentry, (char *) page); | ||
307 | if (error < 0) | ||
308 | free_page((unsigned long)page); | ||
309 | } | ||
310 | nd_set_link(nd, error ? ERR_PTR(error) : (char *)page); | ||
311 | return NULL; | ||
312 | } | ||
313 | |||
314 | static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd, | ||
315 | void *cookie) | ||
316 | { | ||
317 | char *page = nd_get_link(nd); | ||
318 | if (!IS_ERR(page)) | ||
319 | free_page((unsigned long)page); | ||
320 | } | ||
321 | |||
322 | const struct inode_operations sysfs_symlink_inode_operations = { | ||
323 | .setxattr = sysfs_setxattr, | ||
324 | .readlink = generic_readlink, | ||
325 | .follow_link = sysfs_follow_link, | ||
326 | .put_link = sysfs_put_link, | ||
327 | .setattr = sysfs_setattr, | ||
328 | .getattr = sysfs_getattr, | ||
329 | .permission = sysfs_permission, | ||
330 | }; | ||
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 0af09fbfb3f6..0e2f1cccb812 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h | |||
@@ -8,248 +8,36 @@ | |||
8 | * This file is released under the GPLv2. | 8 | * This file is released under the GPLv2. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/lockdep.h> | 11 | #ifndef __SYSFS_INTERNAL_H |
12 | #include <linux/kobject_ns.h> | 12 | #define __SYSFS_INTERNAL_H |
13 | #include <linux/fs.h> | ||
14 | #include <linux/rbtree.h> | ||
15 | 13 | ||
16 | struct sysfs_open_dirent; | 14 | #include <linux/sysfs.h> |
17 | |||
18 | /* type-specific structures for sysfs_dirent->s_* union members */ | ||
19 | struct sysfs_elem_dir { | ||
20 | struct kobject *kobj; | ||
21 | |||
22 | unsigned long subdirs; | ||
23 | /* children rbtree starts here and goes through sd->s_rb */ | ||
24 | struct rb_root children; | ||
25 | }; | ||
26 | |||
27 | struct sysfs_elem_symlink { | ||
28 | struct sysfs_dirent *target_sd; | ||
29 | }; | ||
30 | |||
31 | struct sysfs_elem_attr { | ||
32 | union { | ||
33 | struct attribute *attr; | ||
34 | struct bin_attribute *bin_attr; | ||
35 | }; | ||
36 | struct sysfs_open_dirent *open; | ||
37 | }; | ||
38 | |||
39 | struct sysfs_inode_attrs { | ||
40 | struct iattr ia_iattr; | ||
41 | void *ia_secdata; | ||
42 | u32 ia_secdata_len; | ||
43 | }; | ||
44 | |||
45 | /* | ||
46 | * sysfs_dirent - the building block of sysfs hierarchy. Each and | ||
47 | * every sysfs node is represented by single sysfs_dirent. | ||
48 | * | ||
49 | * As long as s_count reference is held, the sysfs_dirent itself is | ||
50 | * accessible. Dereferencing s_elem or any other outer entity | ||
51 | * requires s_active reference. | ||
52 | */ | ||
53 | struct sysfs_dirent { | ||
54 | atomic_t s_count; | ||
55 | atomic_t s_active; | ||
56 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
57 | struct lockdep_map dep_map; | ||
58 | #endif | ||
59 | struct sysfs_dirent *s_parent; | ||
60 | const char *s_name; | ||
61 | |||
62 | struct rb_node s_rb; | ||
63 | |||
64 | union { | ||
65 | struct completion *completion; | ||
66 | struct sysfs_dirent *removed_list; | ||
67 | } u; | ||
68 | |||
69 | const void *s_ns; /* namespace tag */ | ||
70 | unsigned int s_hash; /* ns + name hash */ | ||
71 | union { | ||
72 | struct sysfs_elem_dir s_dir; | ||
73 | struct sysfs_elem_symlink s_symlink; | ||
74 | struct sysfs_elem_attr s_attr; | ||
75 | }; | ||
76 | |||
77 | unsigned short s_flags; | ||
78 | umode_t s_mode; | ||
79 | unsigned int s_ino; | ||
80 | struct sysfs_inode_attrs *s_iattr; | ||
81 | }; | ||
82 | |||
83 | #define SD_DEACTIVATED_BIAS INT_MIN | ||
84 | |||
85 | #define SYSFS_TYPE_MASK 0x00ff | ||
86 | #define SYSFS_DIR 0x0001 | ||
87 | #define SYSFS_KOBJ_ATTR 0x0002 | ||
88 | #define SYSFS_KOBJ_BIN_ATTR 0x0004 | ||
89 | #define SYSFS_KOBJ_LINK 0x0008 | ||
90 | #define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK) | ||
91 | #define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR) | ||
92 | |||
93 | /* identify any namespace tag on sysfs_dirents */ | ||
94 | #define SYSFS_NS_TYPE_MASK 0xf00 | ||
95 | #define SYSFS_NS_TYPE_SHIFT 8 | ||
96 | |||
97 | #define SYSFS_FLAG_MASK ~(SYSFS_NS_TYPE_MASK|SYSFS_TYPE_MASK) | ||
98 | #define SYSFS_FLAG_REMOVED 0x02000 | ||
99 | |||
100 | static inline unsigned int sysfs_type(struct sysfs_dirent *sd) | ||
101 | { | ||
102 | return sd->s_flags & SYSFS_TYPE_MASK; | ||
103 | } | ||
104 | |||
105 | /* | ||
106 | * Return any namespace tags on this dirent. | ||
107 | * enum kobj_ns_type is defined in linux/kobject.h | ||
108 | */ | ||
109 | static inline enum kobj_ns_type sysfs_ns_type(struct sysfs_dirent *sd) | ||
110 | { | ||
111 | return (sd->s_flags & SYSFS_NS_TYPE_MASK) >> SYSFS_NS_TYPE_SHIFT; | ||
112 | } | ||
113 | |||
114 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
115 | |||
116 | #define sysfs_dirent_init_lockdep(sd) \ | ||
117 | do { \ | ||
118 | struct attribute *attr = sd->s_attr.attr; \ | ||
119 | struct lock_class_key *key = attr->key; \ | ||
120 | if (!key) \ | ||
121 | key = &attr->skey; \ | ||
122 | \ | ||
123 | lockdep_init_map(&sd->dep_map, "s_active", key, 0); \ | ||
124 | } while (0) | ||
125 | |||
126 | /* Test for attributes that want to ignore lockdep for read-locking */ | ||
127 | static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd) | ||
128 | { | ||
129 | int type = sysfs_type(sd); | ||
130 | |||
131 | return (type == SYSFS_KOBJ_ATTR || type == SYSFS_KOBJ_BIN_ATTR) && | ||
132 | sd->s_attr.attr->ignore_lockdep; | ||
133 | } | ||
134 | |||
135 | #else | ||
136 | |||
137 | #define sysfs_dirent_init_lockdep(sd) do {} while (0) | ||
138 | |||
139 | static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd) | ||
140 | { | ||
141 | return true; | ||
142 | } | ||
143 | |||
144 | #endif | ||
145 | |||
146 | /* | ||
147 | * Context structure to be used while adding/removing nodes. | ||
148 | */ | ||
149 | struct sysfs_addrm_cxt { | ||
150 | struct sysfs_dirent *removed; | ||
151 | }; | ||
152 | 15 | ||
153 | /* | 16 | /* |
154 | * mount.c | 17 | * mount.c |
155 | */ | 18 | */ |
156 | 19 | extern struct kernfs_node *sysfs_root_kn; | |
157 | /* | ||
158 | * Each sb is associated with a set of namespace tags (i.e. | ||
159 | * the network namespace of the task which mounted this sysfs | ||
160 | * instance). | ||
161 | */ | ||
162 | struct sysfs_super_info { | ||
163 | void *ns[KOBJ_NS_TYPES]; | ||
164 | }; | ||
165 | #define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info)) | ||
166 | extern struct sysfs_dirent sysfs_root; | ||
167 | extern struct kmem_cache *sysfs_dir_cachep; | ||
168 | 20 | ||
169 | /* | 21 | /* |
170 | * dir.c | 22 | * dir.c |
171 | */ | 23 | */ |
172 | extern struct mutex sysfs_mutex; | ||
173 | extern spinlock_t sysfs_symlink_target_lock; | 24 | extern spinlock_t sysfs_symlink_target_lock; |
174 | extern const struct dentry_operations sysfs_dentry_ops; | ||
175 | |||
176 | extern const struct file_operations sysfs_dir_operations; | ||
177 | extern const struct inode_operations sysfs_dir_inode_operations; | ||
178 | 25 | ||
179 | struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd); | 26 | void sysfs_warn_dup(struct kernfs_node *parent, const char *name); |
180 | void sysfs_put_active(struct sysfs_dirent *sd); | ||
181 | void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt); | ||
182 | void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name); | ||
183 | int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, | ||
184 | struct sysfs_dirent *parent_sd); | ||
185 | int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, | ||
186 | struct sysfs_dirent *parent_sd); | ||
187 | void sysfs_remove(struct sysfs_dirent *sd); | ||
188 | int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name, | ||
189 | const void *ns); | ||
190 | void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt); | ||
191 | |||
192 | struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, | ||
193 | const unsigned char *name, | ||
194 | const void *ns); | ||
195 | struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type); | ||
196 | |||
197 | void release_sysfs_dirent(struct sysfs_dirent *sd); | ||
198 | |||
199 | int sysfs_create_subdir(struct kobject *kobj, const char *name, | ||
200 | struct sysfs_dirent **p_sd); | ||
201 | |||
202 | int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd, | ||
203 | const char *new_name, const void *new_ns); | ||
204 | |||
205 | static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd) | ||
206 | { | ||
207 | if (sd) { | ||
208 | WARN_ON(!atomic_read(&sd->s_count)); | ||
209 | atomic_inc(&sd->s_count); | ||
210 | } | ||
211 | return sd; | ||
212 | } | ||
213 | #define sysfs_get(sd) __sysfs_get(sd) | ||
214 | |||
215 | static inline void __sysfs_put(struct sysfs_dirent *sd) | ||
216 | { | ||
217 | if (sd && atomic_dec_and_test(&sd->s_count)) | ||
218 | release_sysfs_dirent(sd); | ||
219 | } | ||
220 | #define sysfs_put(sd) __sysfs_put(sd) | ||
221 | |||
222 | /* | ||
223 | * inode.c | ||
224 | */ | ||
225 | struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd); | ||
226 | void sysfs_evict_inode(struct inode *inode); | ||
227 | int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr); | ||
228 | int sysfs_permission(struct inode *inode, int mask); | ||
229 | int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); | ||
230 | int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
231 | struct kstat *stat); | ||
232 | int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, | ||
233 | size_t size, int flags); | ||
234 | int sysfs_inode_init(void); | ||
235 | 27 | ||
236 | /* | 28 | /* |
237 | * file.c | 29 | * file.c |
238 | */ | 30 | */ |
239 | extern const struct file_operations sysfs_file_operations; | 31 | int sysfs_add_file(struct kernfs_node *parent, |
240 | extern const struct file_operations sysfs_bin_operations; | 32 | const struct attribute *attr, bool is_bin); |
241 | 33 | int sysfs_add_file_mode_ns(struct kernfs_node *parent, | |
242 | int sysfs_add_file(struct sysfs_dirent *dir_sd, | 34 | const struct attribute *attr, bool is_bin, |
243 | const struct attribute *attr, int type); | ||
244 | |||
245 | int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd, | ||
246 | const struct attribute *attr, int type, | ||
247 | umode_t amode, const void *ns); | 35 | umode_t amode, const void *ns); |
248 | void sysfs_unmap_bin_file(struct sysfs_dirent *sd); | ||
249 | 36 | ||
250 | /* | 37 | /* |
251 | * symlink.c | 38 | * symlink.c |
252 | */ | 39 | */ |
253 | extern const struct inode_operations sysfs_symlink_inode_operations; | 40 | int sysfs_create_link_sd(struct kernfs_node *kn, struct kobject *target, |
254 | int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target, | ||
255 | const char *name); | 41 | const char *name); |
42 | |||
43 | #endif /* __SYSFS_INTERNAL_H */ | ||
diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 5f6fc17d6bc5..9737cba1357d 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c | |||
@@ -1010,6 +1010,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, | |||
1010 | else | 1010 | else |
1011 | udf_truncate_tail_extent(inode); | 1011 | udf_truncate_tail_extent(inode); |
1012 | mark_inode_dirty(inode); | 1012 | mark_inode_dirty(inode); |
1013 | up_write(&iinfo->i_data_sem); | ||
1013 | 1014 | ||
1014 | fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); | 1015 | fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); |
1015 | if (!fi) | 1016 | if (!fi) |
@@ -1023,7 +1024,6 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, | |||
1023 | udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); | 1024 | udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); |
1024 | if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) | 1025 | if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) |
1025 | mark_inode_dirty(dir); | 1026 | mark_inode_dirty(dir); |
1026 | up_write(&iinfo->i_data_sem); | ||
1027 | if (fibh.sbh != fibh.ebh) | 1027 | if (fibh.sbh != fibh.ebh) |
1028 | brelse(fibh.ebh); | 1028 | brelse(fibh.ebh); |
1029 | brelse(fibh.sbh); | 1029 | brelse(fibh.sbh); |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 71c8c9d2b882..a26739451b53 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -1217,7 +1217,7 @@ __xfs_get_blocks( | |||
1217 | lockmode = XFS_ILOCK_EXCL; | 1217 | lockmode = XFS_ILOCK_EXCL; |
1218 | xfs_ilock(ip, lockmode); | 1218 | xfs_ilock(ip, lockmode); |
1219 | } else { | 1219 | } else { |
1220 | lockmode = xfs_ilock_map_shared(ip); | 1220 | lockmode = xfs_ilock_data_map_shared(ip); |
1221 | } | 1221 | } |
1222 | 1222 | ||
1223 | ASSERT(offset <= mp->m_super->s_maxbytes); | 1223 | ASSERT(offset <= mp->m_super->s_maxbytes); |
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index b86127072ac3..01b6a0102fbd 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c | |||
@@ -164,6 +164,7 @@ xfs_attr_get( | |||
164 | { | 164 | { |
165 | int error; | 165 | int error; |
166 | struct xfs_name xname; | 166 | struct xfs_name xname; |
167 | uint lock_mode; | ||
167 | 168 | ||
168 | XFS_STATS_INC(xs_attr_get); | 169 | XFS_STATS_INC(xs_attr_get); |
169 | 170 | ||
@@ -174,9 +175,9 @@ xfs_attr_get( | |||
174 | if (error) | 175 | if (error) |
175 | return error; | 176 | return error; |
176 | 177 | ||
177 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 178 | lock_mode = xfs_ilock_attr_map_shared(ip); |
178 | error = xfs_attr_get_int(ip, &xname, value, valuelenp, flags); | 179 | error = xfs_attr_get_int(ip, &xname, value, valuelenp, flags); |
179 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 180 | xfs_iunlock(ip, lock_mode); |
180 | return(error); | 181 | return(error); |
181 | } | 182 | } |
182 | 183 | ||
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 2d174b128153..01db96f60cf0 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c | |||
@@ -507,17 +507,17 @@ xfs_attr_list_int( | |||
507 | { | 507 | { |
508 | int error; | 508 | int error; |
509 | xfs_inode_t *dp = context->dp; | 509 | xfs_inode_t *dp = context->dp; |
510 | uint lock_mode; | ||
510 | 511 | ||
511 | XFS_STATS_INC(xs_attr_list); | 512 | XFS_STATS_INC(xs_attr_list); |
512 | 513 | ||
513 | if (XFS_FORCED_SHUTDOWN(dp->i_mount)) | 514 | if (XFS_FORCED_SHUTDOWN(dp->i_mount)) |
514 | return EIO; | 515 | return EIO; |
515 | 516 | ||
516 | xfs_ilock(dp, XFS_ILOCK_SHARED); | ||
517 | |||
518 | /* | 517 | /* |
519 | * Decide on what work routines to call based on the inode size. | 518 | * Decide on what work routines to call based on the inode size. |
520 | */ | 519 | */ |
520 | lock_mode = xfs_ilock_attr_map_shared(dp); | ||
521 | if (!xfs_inode_hasattr(dp)) { | 521 | if (!xfs_inode_hasattr(dp)) { |
522 | error = 0; | 522 | error = 0; |
523 | } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { | 523 | } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { |
@@ -527,9 +527,7 @@ xfs_attr_list_int( | |||
527 | } else { | 527 | } else { |
528 | error = xfs_attr_node_list(context); | 528 | error = xfs_attr_node_list(context); |
529 | } | 529 | } |
530 | 530 | xfs_iunlock(dp, lock_mode); | |
531 | xfs_iunlock(dp, XFS_ILOCK_SHARED); | ||
532 | |||
533 | return error; | 531 | return error; |
534 | } | 532 | } |
535 | 533 | ||
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c index 739e0a52deda..5549d69ddb45 100644 --- a/fs/xfs/xfs_attr_remote.c +++ b/fs/xfs/xfs_attr_remote.c | |||
@@ -110,7 +110,7 @@ xfs_attr3_rmt_verify( | |||
110 | if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) | 110 | if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) |
111 | return false; | 111 | return false; |
112 | if (be32_to_cpu(rmt->rm_offset) + | 112 | if (be32_to_cpu(rmt->rm_offset) + |
113 | be32_to_cpu(rmt->rm_bytes) >= XATTR_SIZE_MAX) | 113 | be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX) |
114 | return false; | 114 | return false; |
115 | if (rmt->rm_owner == 0) | 115 | if (rmt->rm_owner == 0) |
116 | return false; | 116 | return false; |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 3ef11b22e750..152543c4ca70 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -1635,7 +1635,7 @@ xfs_bmap_last_extent( | |||
1635 | * blocks at the end of the file which do not start at the previous data block, | 1635 | * blocks at the end of the file which do not start at the previous data block, |
1636 | * we will try to align the new blocks at stripe unit boundaries. | 1636 | * we will try to align the new blocks at stripe unit boundaries. |
1637 | * | 1637 | * |
1638 | * Returns 0 in bma->aeof if the file (fork) is empty as any new write will be | 1638 | * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be |
1639 | * at, or past the EOF. | 1639 | * at, or past the EOF. |
1640 | */ | 1640 | */ |
1641 | STATIC int | 1641 | STATIC int |
@@ -1650,9 +1650,14 @@ xfs_bmap_isaeof( | |||
1650 | bma->aeof = 0; | 1650 | bma->aeof = 0; |
1651 | error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, | 1651 | error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, |
1652 | &is_empty); | 1652 | &is_empty); |
1653 | if (error || is_empty) | 1653 | if (error) |
1654 | return error; | 1654 | return error; |
1655 | 1655 | ||
1656 | if (is_empty) { | ||
1657 | bma->aeof = 1; | ||
1658 | return 0; | ||
1659 | } | ||
1660 | |||
1656 | /* | 1661 | /* |
1657 | * Check if we are allocation or past the last extent, or at least into | 1662 | * Check if we are allocation or past the last extent, or at least into |
1658 | * the last delayed allocated extent. | 1663 | * the last delayed allocated extent. |
@@ -3643,10 +3648,19 @@ xfs_bmap_btalloc( | |||
3643 | int isaligned; | 3648 | int isaligned; |
3644 | int tryagain; | 3649 | int tryagain; |
3645 | int error; | 3650 | int error; |
3651 | int stripe_align; | ||
3646 | 3652 | ||
3647 | ASSERT(ap->length); | 3653 | ASSERT(ap->length); |
3648 | 3654 | ||
3649 | mp = ap->ip->i_mount; | 3655 | mp = ap->ip->i_mount; |
3656 | |||
3657 | /* stripe alignment for allocation is determined by mount parameters */ | ||
3658 | stripe_align = 0; | ||
3659 | if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC)) | ||
3660 | stripe_align = mp->m_swidth; | ||
3661 | else if (mp->m_dalign) | ||
3662 | stripe_align = mp->m_dalign; | ||
3663 | |||
3650 | align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0; | 3664 | align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0; |
3651 | if (unlikely(align)) { | 3665 | if (unlikely(align)) { |
3652 | error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, | 3666 | error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, |
@@ -3655,6 +3669,8 @@ xfs_bmap_btalloc( | |||
3655 | ASSERT(!error); | 3669 | ASSERT(!error); |
3656 | ASSERT(ap->length); | 3670 | ASSERT(ap->length); |
3657 | } | 3671 | } |
3672 | |||
3673 | |||
3658 | nullfb = *ap->firstblock == NULLFSBLOCK; | 3674 | nullfb = *ap->firstblock == NULLFSBLOCK; |
3659 | fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); | 3675 | fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); |
3660 | if (nullfb) { | 3676 | if (nullfb) { |
@@ -3730,7 +3746,7 @@ xfs_bmap_btalloc( | |||
3730 | */ | 3746 | */ |
3731 | if (!ap->flist->xbf_low && ap->aeof) { | 3747 | if (!ap->flist->xbf_low && ap->aeof) { |
3732 | if (!ap->offset) { | 3748 | if (!ap->offset) { |
3733 | args.alignment = mp->m_dalign; | 3749 | args.alignment = stripe_align; |
3734 | atype = args.type; | 3750 | atype = args.type; |
3735 | isaligned = 1; | 3751 | isaligned = 1; |
3736 | /* | 3752 | /* |
@@ -3755,13 +3771,13 @@ xfs_bmap_btalloc( | |||
3755 | * of minlen+alignment+slop doesn't go up | 3771 | * of minlen+alignment+slop doesn't go up |
3756 | * between the calls. | 3772 | * between the calls. |
3757 | */ | 3773 | */ |
3758 | if (blen > mp->m_dalign && blen <= args.maxlen) | 3774 | if (blen > stripe_align && blen <= args.maxlen) |
3759 | nextminlen = blen - mp->m_dalign; | 3775 | nextminlen = blen - stripe_align; |
3760 | else | 3776 | else |
3761 | nextminlen = args.minlen; | 3777 | nextminlen = args.minlen; |
3762 | if (nextminlen + mp->m_dalign > args.minlen + 1) | 3778 | if (nextminlen + stripe_align > args.minlen + 1) |
3763 | args.minalignslop = | 3779 | args.minalignslop = |
3764 | nextminlen + mp->m_dalign - | 3780 | nextminlen + stripe_align - |
3765 | args.minlen - 1; | 3781 | args.minlen - 1; |
3766 | else | 3782 | else |
3767 | args.minalignslop = 0; | 3783 | args.minalignslop = 0; |
@@ -3783,7 +3799,7 @@ xfs_bmap_btalloc( | |||
3783 | */ | 3799 | */ |
3784 | args.type = atype; | 3800 | args.type = atype; |
3785 | args.fsbno = ap->blkno; | 3801 | args.fsbno = ap->blkno; |
3786 | args.alignment = mp->m_dalign; | 3802 | args.alignment = stripe_align; |
3787 | args.minlen = nextminlen; | 3803 | args.minlen = nextminlen; |
3788 | args.minalignslop = 0; | 3804 | args.minalignslop = 0; |
3789 | isaligned = 1; | 3805 | isaligned = 1; |
@@ -3997,6 +4013,7 @@ xfs_bmapi_read( | |||
3997 | ASSERT(*nmap >= 1); | 4013 | ASSERT(*nmap >= 1); |
3998 | ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE| | 4014 | ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE| |
3999 | XFS_BMAPI_IGSTATE))); | 4015 | XFS_BMAPI_IGSTATE))); |
4016 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)); | ||
4000 | 4017 | ||
4001 | if (unlikely(XFS_TEST_ERROR( | 4018 | if (unlikely(XFS_TEST_ERROR( |
4002 | (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && | 4019 | (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && |
@@ -4191,6 +4208,7 @@ xfs_bmapi_delay( | |||
4191 | ASSERT(*nmap >= 1); | 4208 | ASSERT(*nmap >= 1); |
4192 | ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); | 4209 | ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); |
4193 | ASSERT(!(flags & ~XFS_BMAPI_ENTIRE)); | 4210 | ASSERT(!(flags & ~XFS_BMAPI_ENTIRE)); |
4211 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
4194 | 4212 | ||
4195 | if (unlikely(XFS_TEST_ERROR( | 4213 | if (unlikely(XFS_TEST_ERROR( |
4196 | (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS && | 4214 | (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS && |
@@ -4484,6 +4502,7 @@ xfs_bmapi_write( | |||
4484 | ASSERT(tp != NULL); | 4502 | ASSERT(tp != NULL); |
4485 | ASSERT(len > 0); | 4503 | ASSERT(len > 0); |
4486 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); | 4504 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); |
4505 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
4487 | 4506 | ||
4488 | if (unlikely(XFS_TEST_ERROR( | 4507 | if (unlikely(XFS_TEST_ERROR( |
4489 | (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && | 4508 | (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && |
@@ -5035,6 +5054,7 @@ xfs_bunmapi( | |||
5035 | if (XFS_FORCED_SHUTDOWN(mp)) | 5054 | if (XFS_FORCED_SHUTDOWN(mp)) |
5036 | return XFS_ERROR(EIO); | 5055 | return XFS_ERROR(EIO); |
5037 | 5056 | ||
5057 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
5038 | ASSERT(len > 0); | 5058 | ASSERT(len > 0); |
5039 | ASSERT(nexts >= 0); | 5059 | ASSERT(nexts >= 0); |
5040 | 5060 | ||
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 5887e41c0323..f264616080ca 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c | |||
@@ -287,6 +287,7 @@ xfs_bmapi_allocate( | |||
287 | INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker); | 287 | INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker); |
288 | queue_work(xfs_alloc_wq, &args->work); | 288 | queue_work(xfs_alloc_wq, &args->work); |
289 | wait_for_completion(&done); | 289 | wait_for_completion(&done); |
290 | destroy_work_on_stack(&args->work); | ||
290 | return args->result; | 291 | return args->result; |
291 | } | 292 | } |
292 | 293 | ||
@@ -617,22 +618,27 @@ xfs_getbmap( | |||
617 | return XFS_ERROR(ENOMEM); | 618 | return XFS_ERROR(ENOMEM); |
618 | 619 | ||
619 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 620 | xfs_ilock(ip, XFS_IOLOCK_SHARED); |
620 | if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { | 621 | if (whichfork == XFS_DATA_FORK) { |
621 | if (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size) { | 622 | if (!(iflags & BMV_IF_DELALLOC) && |
623 | (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { | ||
622 | error = -filemap_write_and_wait(VFS_I(ip)->i_mapping); | 624 | error = -filemap_write_and_wait(VFS_I(ip)->i_mapping); |
623 | if (error) | 625 | if (error) |
624 | goto out_unlock_iolock; | 626 | goto out_unlock_iolock; |
627 | |||
628 | /* | ||
629 | * Even after flushing the inode, there can still be | ||
630 | * delalloc blocks on the inode beyond EOF due to | ||
631 | * speculative preallocation. These are not removed | ||
632 | * until the release function is called or the inode | ||
633 | * is inactivated. Hence we cannot assert here that | ||
634 | * ip->i_delayed_blks == 0. | ||
635 | */ | ||
625 | } | 636 | } |
626 | /* | ||
627 | * even after flushing the inode, there can still be delalloc | ||
628 | * blocks on the inode beyond EOF due to speculative | ||
629 | * preallocation. These are not removed until the release | ||
630 | * function is called or the inode is inactivated. Hence we | ||
631 | * cannot assert here that ip->i_delayed_blks == 0. | ||
632 | */ | ||
633 | } | ||
634 | 637 | ||
635 | lock = xfs_ilock_map_shared(ip); | 638 | lock = xfs_ilock_data_map_shared(ip); |
639 | } else { | ||
640 | lock = xfs_ilock_attr_map_shared(ip); | ||
641 | } | ||
636 | 642 | ||
637 | /* | 643 | /* |
638 | * Don't let nex be bigger than the number of extents | 644 | * Don't let nex be bigger than the number of extents |
@@ -737,7 +743,7 @@ xfs_getbmap( | |||
737 | out_free_map: | 743 | out_free_map: |
738 | kmem_free(map); | 744 | kmem_free(map); |
739 | out_unlock_ilock: | 745 | out_unlock_ilock: |
740 | xfs_iunlock_map_shared(ip, lock); | 746 | xfs_iunlock(ip, lock); |
741 | out_unlock_iolock: | 747 | out_unlock_iolock: |
742 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 748 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
743 | 749 | ||
@@ -1168,9 +1174,15 @@ xfs_zero_remaining_bytes( | |||
1168 | xfs_buf_unlock(bp); | 1174 | xfs_buf_unlock(bp); |
1169 | 1175 | ||
1170 | for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { | 1176 | for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { |
1177 | uint lock_mode; | ||
1178 | |||
1171 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | 1179 | offset_fsb = XFS_B_TO_FSBT(mp, offset); |
1172 | nimap = 1; | 1180 | nimap = 1; |
1181 | |||
1182 | lock_mode = xfs_ilock_data_map_shared(ip); | ||
1173 | error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0); | 1183 | error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0); |
1184 | xfs_iunlock(ip, lock_mode); | ||
1185 | |||
1174 | if (error || nimap < 1) | 1186 | if (error || nimap < 1) |
1175 | break; | 1187 | break; |
1176 | ASSERT(imap.br_blockcount >= 1); | 1188 | ASSERT(imap.br_blockcount >= 1); |
@@ -1187,7 +1199,12 @@ xfs_zero_remaining_bytes( | |||
1187 | XFS_BUF_UNWRITE(bp); | 1199 | XFS_BUF_UNWRITE(bp); |
1188 | XFS_BUF_READ(bp); | 1200 | XFS_BUF_READ(bp); |
1189 | XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); | 1201 | XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); |
1190 | xfsbdstrat(mp, bp); | 1202 | |
1203 | if (XFS_FORCED_SHUTDOWN(mp)) { | ||
1204 | error = XFS_ERROR(EIO); | ||
1205 | break; | ||
1206 | } | ||
1207 | xfs_buf_iorequest(bp); | ||
1191 | error = xfs_buf_iowait(bp); | 1208 | error = xfs_buf_iowait(bp); |
1192 | if (error) { | 1209 | if (error) { |
1193 | xfs_buf_ioerror_alert(bp, | 1210 | xfs_buf_ioerror_alert(bp, |
@@ -1200,7 +1217,12 @@ xfs_zero_remaining_bytes( | |||
1200 | XFS_BUF_UNDONE(bp); | 1217 | XFS_BUF_UNDONE(bp); |
1201 | XFS_BUF_UNREAD(bp); | 1218 | XFS_BUF_UNREAD(bp); |
1202 | XFS_BUF_WRITE(bp); | 1219 | XFS_BUF_WRITE(bp); |
1203 | xfsbdstrat(mp, bp); | 1220 | |
1221 | if (XFS_FORCED_SHUTDOWN(mp)) { | ||
1222 | error = XFS_ERROR(EIO); | ||
1223 | break; | ||
1224 | } | ||
1225 | xfs_buf_iorequest(bp); | ||
1204 | error = xfs_buf_iowait(bp); | 1226 | error = xfs_buf_iowait(bp); |
1205 | if (error) { | 1227 | if (error) { |
1206 | xfs_buf_ioerror_alert(bp, | 1228 | xfs_buf_ioerror_alert(bp, |
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index c7f0b77dcb00..9fccfb594291 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -698,7 +698,11 @@ xfs_buf_read_uncached( | |||
698 | bp->b_flags |= XBF_READ; | 698 | bp->b_flags |= XBF_READ; |
699 | bp->b_ops = ops; | 699 | bp->b_ops = ops; |
700 | 700 | ||
701 | xfsbdstrat(target->bt_mount, bp); | 701 | if (XFS_FORCED_SHUTDOWN(target->bt_mount)) { |
702 | xfs_buf_relse(bp); | ||
703 | return NULL; | ||
704 | } | ||
705 | xfs_buf_iorequest(bp); | ||
702 | xfs_buf_iowait(bp); | 706 | xfs_buf_iowait(bp); |
703 | return bp; | 707 | return bp; |
704 | } | 708 | } |
@@ -1089,7 +1093,7 @@ xfs_bioerror( | |||
1089 | * This is meant for userdata errors; metadata bufs come with | 1093 | * This is meant for userdata errors; metadata bufs come with |
1090 | * iodone functions attached, so that we can track down errors. | 1094 | * iodone functions attached, so that we can track down errors. |
1091 | */ | 1095 | */ |
1092 | STATIC int | 1096 | int |
1093 | xfs_bioerror_relse( | 1097 | xfs_bioerror_relse( |
1094 | struct xfs_buf *bp) | 1098 | struct xfs_buf *bp) |
1095 | { | 1099 | { |
@@ -1152,7 +1156,7 @@ xfs_bwrite( | |||
1152 | ASSERT(xfs_buf_islocked(bp)); | 1156 | ASSERT(xfs_buf_islocked(bp)); |
1153 | 1157 | ||
1154 | bp->b_flags |= XBF_WRITE; | 1158 | bp->b_flags |= XBF_WRITE; |
1155 | bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q); | 1159 | bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL); |
1156 | 1160 | ||
1157 | xfs_bdstrat_cb(bp); | 1161 | xfs_bdstrat_cb(bp); |
1158 | 1162 | ||
@@ -1164,25 +1168,6 @@ xfs_bwrite( | |||
1164 | return error; | 1168 | return error; |
1165 | } | 1169 | } |
1166 | 1170 | ||
1167 | /* | ||
1168 | * Wrapper around bdstrat so that we can stop data from going to disk in case | ||
1169 | * we are shutting down the filesystem. Typically user data goes thru this | ||
1170 | * path; one of the exceptions is the superblock. | ||
1171 | */ | ||
1172 | void | ||
1173 | xfsbdstrat( | ||
1174 | struct xfs_mount *mp, | ||
1175 | struct xfs_buf *bp) | ||
1176 | { | ||
1177 | if (XFS_FORCED_SHUTDOWN(mp)) { | ||
1178 | trace_xfs_bdstrat_shut(bp, _RET_IP_); | ||
1179 | xfs_bioerror_relse(bp); | ||
1180 | return; | ||
1181 | } | ||
1182 | |||
1183 | xfs_buf_iorequest(bp); | ||
1184 | } | ||
1185 | |||
1186 | STATIC void | 1171 | STATIC void |
1187 | _xfs_buf_ioend( | 1172 | _xfs_buf_ioend( |
1188 | xfs_buf_t *bp, | 1173 | xfs_buf_t *bp, |
@@ -1516,6 +1501,12 @@ xfs_wait_buftarg( | |||
1516 | struct xfs_buf *bp; | 1501 | struct xfs_buf *bp; |
1517 | bp = list_first_entry(&dispose, struct xfs_buf, b_lru); | 1502 | bp = list_first_entry(&dispose, struct xfs_buf, b_lru); |
1518 | list_del_init(&bp->b_lru); | 1503 | list_del_init(&bp->b_lru); |
1504 | if (bp->b_flags & XBF_WRITE_FAIL) { | ||
1505 | xfs_alert(btp->bt_mount, | ||
1506 | "Corruption Alert: Buffer at block 0x%llx had permanent write failures!\n" | ||
1507 | "Please run xfs_repair to determine the extent of the problem.", | ||
1508 | (long long)bp->b_bn); | ||
1509 | } | ||
1519 | xfs_buf_rele(bp); | 1510 | xfs_buf_rele(bp); |
1520 | } | 1511 | } |
1521 | if (loop++ != 0) | 1512 | if (loop++ != 0) |
@@ -1602,12 +1593,11 @@ xfs_free_buftarg( | |||
1602 | kmem_free(btp); | 1593 | kmem_free(btp); |
1603 | } | 1594 | } |
1604 | 1595 | ||
1605 | STATIC int | 1596 | int |
1606 | xfs_setsize_buftarg_flags( | 1597 | xfs_setsize_buftarg( |
1607 | xfs_buftarg_t *btp, | 1598 | xfs_buftarg_t *btp, |
1608 | unsigned int blocksize, | 1599 | unsigned int blocksize, |
1609 | unsigned int sectorsize, | 1600 | unsigned int sectorsize) |
1610 | int verbose) | ||
1611 | { | 1601 | { |
1612 | btp->bt_bsize = blocksize; | 1602 | btp->bt_bsize = blocksize; |
1613 | btp->bt_sshift = ffs(sectorsize) - 1; | 1603 | btp->bt_sshift = ffs(sectorsize) - 1; |
@@ -1628,26 +1618,17 @@ xfs_setsize_buftarg_flags( | |||
1628 | } | 1618 | } |
1629 | 1619 | ||
1630 | /* | 1620 | /* |
1631 | * When allocating the initial buffer target we have not yet | 1621 | * When allocating the initial buffer target we have not yet |
1632 | * read in the superblock, so don't know what sized sectors | 1622 | * read in the superblock, so don't know what sized sectors |
1633 | * are being used at this early stage. Play safe. | 1623 | * are being used at this early stage. Play safe. |
1634 | */ | 1624 | */ |
1635 | STATIC int | 1625 | STATIC int |
1636 | xfs_setsize_buftarg_early( | 1626 | xfs_setsize_buftarg_early( |
1637 | xfs_buftarg_t *btp, | 1627 | xfs_buftarg_t *btp, |
1638 | struct block_device *bdev) | 1628 | struct block_device *bdev) |
1639 | { | 1629 | { |
1640 | return xfs_setsize_buftarg_flags(btp, | 1630 | return xfs_setsize_buftarg(btp, PAGE_SIZE, |
1641 | PAGE_SIZE, bdev_logical_block_size(bdev), 0); | 1631 | bdev_logical_block_size(bdev)); |
1642 | } | ||
1643 | |||
1644 | int | ||
1645 | xfs_setsize_buftarg( | ||
1646 | xfs_buftarg_t *btp, | ||
1647 | unsigned int blocksize, | ||
1648 | unsigned int sectorsize) | ||
1649 | { | ||
1650 | return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1); | ||
1651 | } | 1632 | } |
1652 | 1633 | ||
1653 | xfs_buftarg_t * | 1634 | xfs_buftarg_t * |
@@ -1799,7 +1780,7 @@ __xfs_buf_delwri_submit( | |||
1799 | 1780 | ||
1800 | blk_start_plug(&plug); | 1781 | blk_start_plug(&plug); |
1801 | list_for_each_entry_safe(bp, n, io_list, b_list) { | 1782 | list_for_each_entry_safe(bp, n, io_list, b_list) { |
1802 | bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC); | 1783 | bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL); |
1803 | bp->b_flags |= XBF_WRITE; | 1784 | bp->b_flags |= XBF_WRITE; |
1804 | 1785 | ||
1805 | if (!wait) { | 1786 | if (!wait) { |
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index e65683361017..1cf21a4a9f22 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h | |||
@@ -45,6 +45,7 @@ typedef enum { | |||
45 | #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ | 45 | #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ |
46 | #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ | 46 | #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ |
47 | #define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ | 47 | #define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ |
48 | #define XBF_WRITE_FAIL (1 << 24)/* async writes have failed on this buffer */ | ||
48 | 49 | ||
49 | /* I/O hints for the BIO layer */ | 50 | /* I/O hints for the BIO layer */ |
50 | #define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ | 51 | #define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ |
@@ -70,6 +71,7 @@ typedef unsigned int xfs_buf_flags_t; | |||
70 | { XBF_ASYNC, "ASYNC" }, \ | 71 | { XBF_ASYNC, "ASYNC" }, \ |
71 | { XBF_DONE, "DONE" }, \ | 72 | { XBF_DONE, "DONE" }, \ |
72 | { XBF_STALE, "STALE" }, \ | 73 | { XBF_STALE, "STALE" }, \ |
74 | { XBF_WRITE_FAIL, "WRITE_FAIL" }, \ | ||
73 | { XBF_SYNCIO, "SYNCIO" }, \ | 75 | { XBF_SYNCIO, "SYNCIO" }, \ |
74 | { XBF_FUA, "FUA" }, \ | 76 | { XBF_FUA, "FUA" }, \ |
75 | { XBF_FLUSH, "FLUSH" }, \ | 77 | { XBF_FLUSH, "FLUSH" }, \ |
@@ -80,6 +82,7 @@ typedef unsigned int xfs_buf_flags_t; | |||
80 | { _XBF_DELWRI_Q, "DELWRI_Q" }, \ | 82 | { _XBF_DELWRI_Q, "DELWRI_Q" }, \ |
81 | { _XBF_COMPOUND, "COMPOUND" } | 83 | { _XBF_COMPOUND, "COMPOUND" } |
82 | 84 | ||
85 | |||
83 | /* | 86 | /* |
84 | * Internal state flags. | 87 | * Internal state flags. |
85 | */ | 88 | */ |
@@ -269,9 +272,6 @@ extern void xfs_buf_unlock(xfs_buf_t *); | |||
269 | 272 | ||
270 | /* Buffer Read and Write Routines */ | 273 | /* Buffer Read and Write Routines */ |
271 | extern int xfs_bwrite(struct xfs_buf *bp); | 274 | extern int xfs_bwrite(struct xfs_buf *bp); |
272 | |||
273 | extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); | ||
274 | |||
275 | extern void xfs_buf_ioend(xfs_buf_t *, int); | 275 | extern void xfs_buf_ioend(xfs_buf_t *, int); |
276 | extern void xfs_buf_ioerror(xfs_buf_t *, int); | 276 | extern void xfs_buf_ioerror(xfs_buf_t *, int); |
277 | extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); | 277 | extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); |
@@ -282,6 +282,8 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, | |||
282 | #define xfs_buf_zero(bp, off, len) \ | 282 | #define xfs_buf_zero(bp, off, len) \ |
283 | xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) | 283 | xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) |
284 | 284 | ||
285 | extern int xfs_bioerror_relse(struct xfs_buf *); | ||
286 | |||
285 | static inline int xfs_buf_geterror(xfs_buf_t *bp) | 287 | static inline int xfs_buf_geterror(xfs_buf_t *bp) |
286 | { | 288 | { |
287 | return bp ? bp->b_error : ENOMEM; | 289 | return bp ? bp->b_error : ENOMEM; |
@@ -301,7 +303,8 @@ extern void xfs_buf_terminate(void); | |||
301 | 303 | ||
302 | #define XFS_BUF_ZEROFLAGS(bp) \ | 304 | #define XFS_BUF_ZEROFLAGS(bp) \ |
303 | ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \ | 305 | ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \ |
304 | XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) | 306 | XBF_SYNCIO|XBF_FUA|XBF_FLUSH| \ |
307 | XBF_WRITE_FAIL)) | ||
305 | 308 | ||
306 | void xfs_buf_stale(struct xfs_buf *bp); | 309 | void xfs_buf_stale(struct xfs_buf *bp); |
307 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) | 310 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index a64f67ba25d3..33149113e333 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -182,21 +182,47 @@ xfs_buf_item_size( | |||
182 | trace_xfs_buf_item_size(bip); | 182 | trace_xfs_buf_item_size(bip); |
183 | } | 183 | } |
184 | 184 | ||
185 | static struct xfs_log_iovec * | 185 | static inline void |
186 | xfs_buf_item_copy_iovec( | ||
187 | struct xfs_log_vec *lv, | ||
188 | struct xfs_log_iovec **vecp, | ||
189 | struct xfs_buf *bp, | ||
190 | uint offset, | ||
191 | int first_bit, | ||
192 | uint nbits) | ||
193 | { | ||
194 | offset += first_bit * XFS_BLF_CHUNK; | ||
195 | xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BCHUNK, | ||
196 | xfs_buf_offset(bp, offset), | ||
197 | nbits * XFS_BLF_CHUNK); | ||
198 | } | ||
199 | |||
200 | static inline bool | ||
201 | xfs_buf_item_straddle( | ||
202 | struct xfs_buf *bp, | ||
203 | uint offset, | ||
204 | int next_bit, | ||
205 | int last_bit) | ||
206 | { | ||
207 | return xfs_buf_offset(bp, offset + (next_bit << XFS_BLF_SHIFT)) != | ||
208 | (xfs_buf_offset(bp, offset + (last_bit << XFS_BLF_SHIFT)) + | ||
209 | XFS_BLF_CHUNK); | ||
210 | } | ||
211 | |||
212 | static void | ||
186 | xfs_buf_item_format_segment( | 213 | xfs_buf_item_format_segment( |
187 | struct xfs_buf_log_item *bip, | 214 | struct xfs_buf_log_item *bip, |
188 | struct xfs_log_iovec *vecp, | 215 | struct xfs_log_vec *lv, |
216 | struct xfs_log_iovec **vecp, | ||
189 | uint offset, | 217 | uint offset, |
190 | struct xfs_buf_log_format *blfp) | 218 | struct xfs_buf_log_format *blfp) |
191 | { | 219 | { |
192 | struct xfs_buf *bp = bip->bli_buf; | 220 | struct xfs_buf *bp = bip->bli_buf; |
193 | uint base_size; | 221 | uint base_size; |
194 | uint nvecs; | ||
195 | int first_bit; | 222 | int first_bit; |
196 | int last_bit; | 223 | int last_bit; |
197 | int next_bit; | 224 | int next_bit; |
198 | uint nbits; | 225 | uint nbits; |
199 | uint buffer_offset; | ||
200 | 226 | ||
201 | /* copy the flags across from the base format item */ | 227 | /* copy the flags across from the base format item */ |
202 | blfp->blf_flags = bip->__bli_format.blf_flags; | 228 | blfp->blf_flags = bip->__bli_format.blf_flags; |
@@ -208,21 +234,17 @@ xfs_buf_item_format_segment( | |||
208 | */ | 234 | */ |
209 | base_size = xfs_buf_log_format_size(blfp); | 235 | base_size = xfs_buf_log_format_size(blfp); |
210 | 236 | ||
211 | nvecs = 0; | ||
212 | first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0); | 237 | first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0); |
213 | if (!(bip->bli_flags & XFS_BLI_STALE) && first_bit == -1) { | 238 | if (!(bip->bli_flags & XFS_BLI_STALE) && first_bit == -1) { |
214 | /* | 239 | /* |
215 | * If the map is not be dirty in the transaction, mark | 240 | * If the map is not be dirty in the transaction, mark |
216 | * the size as zero and do not advance the vector pointer. | 241 | * the size as zero and do not advance the vector pointer. |
217 | */ | 242 | */ |
218 | goto out; | 243 | return; |
219 | } | 244 | } |
220 | 245 | ||
221 | vecp->i_addr = blfp; | 246 | blfp = xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BFORMAT, blfp, base_size); |
222 | vecp->i_len = base_size; | 247 | blfp->blf_size = 1; |
223 | vecp->i_type = XLOG_REG_TYPE_BFORMAT; | ||
224 | vecp++; | ||
225 | nvecs = 1; | ||
226 | 248 | ||
227 | if (bip->bli_flags & XFS_BLI_STALE) { | 249 | if (bip->bli_flags & XFS_BLI_STALE) { |
228 | /* | 250 | /* |
@@ -232,14 +254,13 @@ xfs_buf_item_format_segment( | |||
232 | */ | 254 | */ |
233 | trace_xfs_buf_item_format_stale(bip); | 255 | trace_xfs_buf_item_format_stale(bip); |
234 | ASSERT(blfp->blf_flags & XFS_BLF_CANCEL); | 256 | ASSERT(blfp->blf_flags & XFS_BLF_CANCEL); |
235 | goto out; | 257 | return; |
236 | } | 258 | } |
237 | 259 | ||
238 | 260 | ||
239 | /* | 261 | /* |
240 | * Fill in an iovec for each set of contiguous chunks. | 262 | * Fill in an iovec for each set of contiguous chunks. |
241 | */ | 263 | */ |
242 | |||
243 | last_bit = first_bit; | 264 | last_bit = first_bit; |
244 | nbits = 1; | 265 | nbits = 1; |
245 | for (;;) { | 266 | for (;;) { |
@@ -252,42 +273,22 @@ xfs_buf_item_format_segment( | |||
252 | next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, | 273 | next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, |
253 | (uint)last_bit + 1); | 274 | (uint)last_bit + 1); |
254 | /* | 275 | /* |
255 | * If we run out of bits fill in the last iovec and get | 276 | * If we run out of bits fill in the last iovec and get out of |
256 | * out of the loop. | 277 | * the loop. Else if we start a new set of bits then fill in |
257 | * Else if we start a new set of bits then fill in the | 278 | * the iovec for the series we were looking at and start |
258 | * iovec for the series we were looking at and start | 279 | * counting the bits in the new one. Else we're still in the |
259 | * counting the bits in the new one. | 280 | * same set of bits so just keep counting and scanning. |
260 | * Else we're still in the same set of bits so just | ||
261 | * keep counting and scanning. | ||
262 | */ | 281 | */ |
263 | if (next_bit == -1) { | 282 | if (next_bit == -1) { |
264 | buffer_offset = offset + first_bit * XFS_BLF_CHUNK; | 283 | xfs_buf_item_copy_iovec(lv, vecp, bp, offset, |
265 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); | 284 | first_bit, nbits); |
266 | vecp->i_len = nbits * XFS_BLF_CHUNK; | 285 | blfp->blf_size++; |
267 | vecp->i_type = XLOG_REG_TYPE_BCHUNK; | ||
268 | nvecs++; | ||
269 | break; | 286 | break; |
270 | } else if (next_bit != last_bit + 1) { | 287 | } else if (next_bit != last_bit + 1 || |
271 | buffer_offset = offset + first_bit * XFS_BLF_CHUNK; | 288 | xfs_buf_item_straddle(bp, offset, next_bit, last_bit)) { |
272 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); | 289 | xfs_buf_item_copy_iovec(lv, vecp, bp, offset, |
273 | vecp->i_len = nbits * XFS_BLF_CHUNK; | 290 | first_bit, nbits); |
274 | vecp->i_type = XLOG_REG_TYPE_BCHUNK; | 291 | blfp->blf_size++; |
275 | nvecs++; | ||
276 | vecp++; | ||
277 | first_bit = next_bit; | ||
278 | last_bit = next_bit; | ||
279 | nbits = 1; | ||
280 | } else if (xfs_buf_offset(bp, offset + | ||
281 | (next_bit << XFS_BLF_SHIFT)) != | ||
282 | (xfs_buf_offset(bp, offset + | ||
283 | (last_bit << XFS_BLF_SHIFT)) + | ||
284 | XFS_BLF_CHUNK)) { | ||
285 | buffer_offset = offset + first_bit * XFS_BLF_CHUNK; | ||
286 | vecp->i_addr = xfs_buf_offset(bp, buffer_offset); | ||
287 | vecp->i_len = nbits * XFS_BLF_CHUNK; | ||
288 | vecp->i_type = XLOG_REG_TYPE_BCHUNK; | ||
289 | nvecs++; | ||
290 | vecp++; | ||
291 | first_bit = next_bit; | 292 | first_bit = next_bit; |
292 | last_bit = next_bit; | 293 | last_bit = next_bit; |
293 | nbits = 1; | 294 | nbits = 1; |
@@ -296,9 +297,6 @@ xfs_buf_item_format_segment( | |||
296 | nbits++; | 297 | nbits++; |
297 | } | 298 | } |
298 | } | 299 | } |
299 | out: | ||
300 | blfp->blf_size = nvecs; | ||
301 | return vecp; | ||
302 | } | 300 | } |
303 | 301 | ||
304 | /* | 302 | /* |
@@ -310,10 +308,11 @@ out: | |||
310 | STATIC void | 308 | STATIC void |
311 | xfs_buf_item_format( | 309 | xfs_buf_item_format( |
312 | struct xfs_log_item *lip, | 310 | struct xfs_log_item *lip, |
313 | struct xfs_log_iovec *vecp) | 311 | struct xfs_log_vec *lv) |
314 | { | 312 | { |
315 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); | 313 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); |
316 | struct xfs_buf *bp = bip->bli_buf; | 314 | struct xfs_buf *bp = bip->bli_buf; |
315 | struct xfs_log_iovec *vecp = NULL; | ||
317 | uint offset = 0; | 316 | uint offset = 0; |
318 | int i; | 317 | int i; |
319 | 318 | ||
@@ -354,8 +353,8 @@ xfs_buf_item_format( | |||
354 | } | 353 | } |
355 | 354 | ||
356 | for (i = 0; i < bip->bli_format_count; i++) { | 355 | for (i = 0; i < bip->bli_format_count; i++) { |
357 | vecp = xfs_buf_item_format_segment(bip, vecp, offset, | 356 | xfs_buf_item_format_segment(bip, lv, &vecp, offset, |
358 | &bip->bli_formats[i]); | 357 | &bip->bli_formats[i]); |
359 | offset += bp->b_maps[i].bm_len; | 358 | offset += bp->b_maps[i].bm_len; |
360 | } | 359 | } |
361 | 360 | ||
@@ -496,6 +495,14 @@ xfs_buf_item_unpin( | |||
496 | } | 495 | } |
497 | } | 496 | } |
498 | 497 | ||
498 | /* | ||
499 | * Buffer IO error rate limiting. Limit it to no more than 10 messages per 30 | ||
500 | * seconds so as to not spam logs too much on repeated detection of the same | ||
501 | * buffer being bad.. | ||
502 | */ | ||
503 | |||
504 | DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10); | ||
505 | |||
499 | STATIC uint | 506 | STATIC uint |
500 | xfs_buf_item_push( | 507 | xfs_buf_item_push( |
501 | struct xfs_log_item *lip, | 508 | struct xfs_log_item *lip, |
@@ -524,6 +531,14 @@ xfs_buf_item_push( | |||
524 | 531 | ||
525 | trace_xfs_buf_item_push(bip); | 532 | trace_xfs_buf_item_push(bip); |
526 | 533 | ||
534 | /* has a previous flush failed due to IO errors? */ | ||
535 | if ((bp->b_flags & XBF_WRITE_FAIL) && | ||
536 | ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) { | ||
537 | xfs_warn(bp->b_target->bt_mount, | ||
538 | "Detected failing async write on buffer block 0x%llx. Retrying async write.\n", | ||
539 | (long long)bp->b_bn); | ||
540 | } | ||
541 | |||
527 | if (!xfs_buf_delwri_queue(bp, buffer_list)) | 542 | if (!xfs_buf_delwri_queue(bp, buffer_list)) |
528 | rval = XFS_ITEM_FLUSHING; | 543 | rval = XFS_ITEM_FLUSHING; |
529 | xfs_buf_unlock(bp); | 544 | xfs_buf_unlock(bp); |
@@ -1096,8 +1111,9 @@ xfs_buf_iodone_callbacks( | |||
1096 | 1111 | ||
1097 | xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */ | 1112 | xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */ |
1098 | 1113 | ||
1099 | if (!XFS_BUF_ISSTALE(bp)) { | 1114 | if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) { |
1100 | bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE; | 1115 | bp->b_flags |= XBF_WRITE | XBF_ASYNC | |
1116 | XBF_DONE | XBF_WRITE_FAIL; | ||
1101 | xfs_buf_iorequest(bp); | 1117 | xfs_buf_iorequest(bp); |
1102 | } else { | 1118 | } else { |
1103 | xfs_buf_relse(bp); | 1119 | xfs_buf_relse(bp); |
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index 56369d4509d5..48c7d18f68c3 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c | |||
@@ -2067,12 +2067,12 @@ xfs_dir2_node_lookup( | |||
2067 | */ | 2067 | */ |
2068 | int /* error */ | 2068 | int /* error */ |
2069 | xfs_dir2_node_removename( | 2069 | xfs_dir2_node_removename( |
2070 | xfs_da_args_t *args) /* operation arguments */ | 2070 | struct xfs_da_args *args) /* operation arguments */ |
2071 | { | 2071 | { |
2072 | xfs_da_state_blk_t *blk; /* leaf block */ | 2072 | struct xfs_da_state_blk *blk; /* leaf block */ |
2073 | int error; /* error return value */ | 2073 | int error; /* error return value */ |
2074 | int rval; /* operation return value */ | 2074 | int rval; /* operation return value */ |
2075 | xfs_da_state_t *state; /* btree cursor */ | 2075 | struct xfs_da_state *state; /* btree cursor */ |
2076 | 2076 | ||
2077 | trace_xfs_dir2_node_removename(args); | 2077 | trace_xfs_dir2_node_removename(args); |
2078 | 2078 | ||
@@ -2084,19 +2084,18 @@ xfs_dir2_node_removename( | |||
2084 | state->mp = args->dp->i_mount; | 2084 | state->mp = args->dp->i_mount; |
2085 | state->blocksize = state->mp->m_dirblksize; | 2085 | state->blocksize = state->mp->m_dirblksize; |
2086 | state->node_ents = state->mp->m_dir_node_ents; | 2086 | state->node_ents = state->mp->m_dir_node_ents; |
2087 | /* | 2087 | |
2088 | * Look up the entry we're deleting, set up the cursor. | 2088 | /* Look up the entry we're deleting, set up the cursor. */ |
2089 | */ | ||
2090 | error = xfs_da3_node_lookup_int(state, &rval); | 2089 | error = xfs_da3_node_lookup_int(state, &rval); |
2091 | if (error) | 2090 | if (error) |
2092 | rval = error; | 2091 | goto out_free; |
2093 | /* | 2092 | |
2094 | * Didn't find it, upper layer screwed up. | 2093 | /* Didn't find it, upper layer screwed up. */ |
2095 | */ | ||
2096 | if (rval != EEXIST) { | 2094 | if (rval != EEXIST) { |
2097 | xfs_da_state_free(state); | 2095 | error = rval; |
2098 | return rval; | 2096 | goto out_free; |
2099 | } | 2097 | } |
2098 | |||
2100 | blk = &state->path.blk[state->path.active - 1]; | 2099 | blk = &state->path.blk[state->path.active - 1]; |
2101 | ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); | 2100 | ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); |
2102 | ASSERT(state->extravalid); | 2101 | ASSERT(state->extravalid); |
@@ -2107,7 +2106,7 @@ xfs_dir2_node_removename( | |||
2107 | error = xfs_dir2_leafn_remove(args, blk->bp, blk->index, | 2106 | error = xfs_dir2_leafn_remove(args, blk->bp, blk->index, |
2108 | &state->extrablk, &rval); | 2107 | &state->extrablk, &rval); |
2109 | if (error) | 2108 | if (error) |
2110 | return error; | 2109 | goto out_free; |
2111 | /* | 2110 | /* |
2112 | * Fix the hash values up the btree. | 2111 | * Fix the hash values up the btree. |
2113 | */ | 2112 | */ |
@@ -2122,6 +2121,7 @@ xfs_dir2_node_removename( | |||
2122 | */ | 2121 | */ |
2123 | if (!error) | 2122 | if (!error) |
2124 | error = xfs_dir2_node_to_leaf(state); | 2123 | error = xfs_dir2_node_to_leaf(state); |
2124 | out_free: | ||
2125 | xfs_da_state_free(state); | 2125 | xfs_da_state_free(state); |
2126 | return error; | 2126 | return error; |
2127 | } | 2127 | } |
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index c4e50c6ed584..aead369e1c30 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c | |||
@@ -674,6 +674,7 @@ xfs_readdir( | |||
674 | { | 674 | { |
675 | int rval; /* return value */ | 675 | int rval; /* return value */ |
676 | int v; /* type-checking value */ | 676 | int v; /* type-checking value */ |
677 | uint lock_mode; | ||
677 | 678 | ||
678 | trace_xfs_readdir(dp); | 679 | trace_xfs_readdir(dp); |
679 | 680 | ||
@@ -683,6 +684,7 @@ xfs_readdir( | |||
683 | ASSERT(S_ISDIR(dp->i_d.di_mode)); | 684 | ASSERT(S_ISDIR(dp->i_d.di_mode)); |
684 | XFS_STATS_INC(xs_dir_getdents); | 685 | XFS_STATS_INC(xs_dir_getdents); |
685 | 686 | ||
687 | lock_mode = xfs_ilock_data_map_shared(dp); | ||
686 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) | 688 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) |
687 | rval = xfs_dir2_sf_getdents(dp, ctx); | 689 | rval = xfs_dir2_sf_getdents(dp, ctx); |
688 | else if ((rval = xfs_dir2_isblock(NULL, dp, &v))) | 690 | else if ((rval = xfs_dir2_isblock(NULL, dp, &v))) |
@@ -691,5 +693,7 @@ xfs_readdir( | |||
691 | rval = xfs_dir2_block_getdents(dp, ctx); | 693 | rval = xfs_dir2_block_getdents(dp, ctx); |
692 | else | 694 | else |
693 | rval = xfs_dir2_leaf_getdents(dp, ctx, bufsize); | 695 | rval = xfs_dir2_leaf_getdents(dp, ctx, bufsize); |
696 | xfs_iunlock(dp, lock_mode); | ||
697 | |||
694 | return rval; | 698 | return rval; |
695 | } | 699 | } |
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index aafc6e46cb58..3725fb1b902b 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c | |||
@@ -170,6 +170,7 @@ xfs_dir2_block_to_sf( | |||
170 | char *ptr; /* current data pointer */ | 170 | char *ptr; /* current data pointer */ |
171 | xfs_dir2_sf_entry_t *sfep; /* shortform entry */ | 171 | xfs_dir2_sf_entry_t *sfep; /* shortform entry */ |
172 | xfs_dir2_sf_hdr_t *sfp; /* shortform directory header */ | 172 | xfs_dir2_sf_hdr_t *sfp; /* shortform directory header */ |
173 | xfs_dir2_sf_hdr_t *dst; /* temporary data buffer */ | ||
173 | 174 | ||
174 | trace_xfs_dir2_block_to_sf(args); | 175 | trace_xfs_dir2_block_to_sf(args); |
175 | 176 | ||
@@ -177,35 +178,20 @@ xfs_dir2_block_to_sf( | |||
177 | mp = dp->i_mount; | 178 | mp = dp->i_mount; |
178 | 179 | ||
179 | /* | 180 | /* |
180 | * Make a copy of the block data, so we can shrink the inode | 181 | * allocate a temporary destination buffer the size of the inode |
181 | * and add local data. | 182 | * to format the data into. Once we have formatted the data, we |
183 | * can free the block and copy the formatted data into the inode literal | ||
184 | * area. | ||
182 | */ | 185 | */ |
183 | hdr = kmem_alloc(mp->m_dirblksize, KM_SLEEP); | 186 | dst = kmem_alloc(mp->m_sb.sb_inodesize, KM_SLEEP); |
184 | memcpy(hdr, bp->b_addr, mp->m_dirblksize); | 187 | hdr = bp->b_addr; |
185 | logflags = XFS_ILOG_CORE; | ||
186 | if ((error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp))) { | ||
187 | ASSERT(error != ENOSPC); | ||
188 | goto out; | ||
189 | } | ||
190 | 188 | ||
191 | /* | 189 | /* |
192 | * The buffer is now unconditionally gone, whether | ||
193 | * xfs_dir2_shrink_inode worked or not. | ||
194 | * | ||
195 | * Convert the inode to local format. | ||
196 | */ | ||
197 | dp->i_df.if_flags &= ~XFS_IFEXTENTS; | ||
198 | dp->i_df.if_flags |= XFS_IFINLINE; | ||
199 | dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; | ||
200 | ASSERT(dp->i_df.if_bytes == 0); | ||
201 | xfs_idata_realloc(dp, size, XFS_DATA_FORK); | ||
202 | logflags |= XFS_ILOG_DDATA; | ||
203 | /* | ||
204 | * Copy the header into the newly allocate local space. | 190 | * Copy the header into the newly allocate local space. |
205 | */ | 191 | */ |
206 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | 192 | sfp = (xfs_dir2_sf_hdr_t *)dst; |
207 | memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count)); | 193 | memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count)); |
208 | dp->i_d.di_size = size; | 194 | |
209 | /* | 195 | /* |
210 | * Set up to loop over the block's entries. | 196 | * Set up to loop over the block's entries. |
211 | */ | 197 | */ |
@@ -258,10 +244,34 @@ xfs_dir2_block_to_sf( | |||
258 | ptr += dp->d_ops->data_entsize(dep->namelen); | 244 | ptr += dp->d_ops->data_entsize(dep->namelen); |
259 | } | 245 | } |
260 | ASSERT((char *)sfep - (char *)sfp == size); | 246 | ASSERT((char *)sfep - (char *)sfp == size); |
247 | |||
248 | /* now we are done with the block, we can shrink the inode */ | ||
249 | logflags = XFS_ILOG_CORE; | ||
250 | error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp); | ||
251 | if (error) { | ||
252 | ASSERT(error != ENOSPC); | ||
253 | goto out; | ||
254 | } | ||
255 | |||
256 | /* | ||
257 | * The buffer is now unconditionally gone, whether | ||
258 | * xfs_dir2_shrink_inode worked or not. | ||
259 | * | ||
260 | * Convert the inode to local format and copy the data in. | ||
261 | */ | ||
262 | dp->i_df.if_flags &= ~XFS_IFEXTENTS; | ||
263 | dp->i_df.if_flags |= XFS_IFINLINE; | ||
264 | dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; | ||
265 | ASSERT(dp->i_df.if_bytes == 0); | ||
266 | xfs_idata_realloc(dp, size, XFS_DATA_FORK); | ||
267 | |||
268 | logflags |= XFS_ILOG_DDATA; | ||
269 | memcpy(dp->i_df.if_u1.if_data, dst, size); | ||
270 | dp->i_d.di_size = size; | ||
261 | xfs_dir2_sf_check(args); | 271 | xfs_dir2_sf_check(args); |
262 | out: | 272 | out: |
263 | xfs_trans_log_inode(args->trans, dp, logflags); | 273 | xfs_trans_log_inode(args->trans, dp, logflags); |
264 | kmem_free(hdr); | 274 | kmem_free(dst); |
265 | return error; | 275 | return error; |
266 | } | 276 | } |
267 | 277 | ||
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 6b1e695caf0e..7aeb4c895b32 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c | |||
@@ -469,16 +469,17 @@ xfs_qm_dqtobp( | |||
469 | struct xfs_mount *mp = dqp->q_mount; | 469 | struct xfs_mount *mp = dqp->q_mount; |
470 | xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); | 470 | xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); |
471 | struct xfs_trans *tp = (tpp ? *tpp : NULL); | 471 | struct xfs_trans *tp = (tpp ? *tpp : NULL); |
472 | uint lock_mode; | ||
472 | 473 | ||
473 | dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; | 474 | dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; |
474 | 475 | ||
475 | xfs_ilock(quotip, XFS_ILOCK_SHARED); | 476 | lock_mode = xfs_ilock_data_map_shared(quotip); |
476 | if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) { | 477 | if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) { |
477 | /* | 478 | /* |
478 | * Return if this type of quotas is turned off while we | 479 | * Return if this type of quotas is turned off while we |
479 | * didn't have the quota inode lock. | 480 | * didn't have the quota inode lock. |
480 | */ | 481 | */ |
481 | xfs_iunlock(quotip, XFS_ILOCK_SHARED); | 482 | xfs_iunlock(quotip, lock_mode); |
482 | return ESRCH; | 483 | return ESRCH; |
483 | } | 484 | } |
484 | 485 | ||
@@ -488,7 +489,7 @@ xfs_qm_dqtobp( | |||
488 | error = xfs_bmapi_read(quotip, dqp->q_fileoffset, | 489 | error = xfs_bmapi_read(quotip, dqp->q_fileoffset, |
489 | XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0); | 490 | XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0); |
490 | 491 | ||
491 | xfs_iunlock(quotip, XFS_ILOCK_SHARED); | 492 | xfs_iunlock(quotip, lock_mode); |
492 | if (error) | 493 | if (error) |
493 | return error; | 494 | return error; |
494 | 495 | ||
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 92e5f62eefc6..f33fbaaa4d8a 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c | |||
@@ -57,20 +57,24 @@ xfs_qm_dquot_logitem_size( | |||
57 | STATIC void | 57 | STATIC void |
58 | xfs_qm_dquot_logitem_format( | 58 | xfs_qm_dquot_logitem_format( |
59 | struct xfs_log_item *lip, | 59 | struct xfs_log_item *lip, |
60 | struct xfs_log_iovec *logvec) | 60 | struct xfs_log_vec *lv) |
61 | { | 61 | { |
62 | struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); | 62 | struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); |
63 | 63 | struct xfs_log_iovec *vecp = NULL; | |
64 | logvec->i_addr = &qlip->qli_format; | 64 | struct xfs_dq_logformat *qlf; |
65 | logvec->i_len = sizeof(xfs_dq_logformat_t); | 65 | |
66 | logvec->i_type = XLOG_REG_TYPE_QFORMAT; | 66 | qlf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_QFORMAT); |
67 | logvec++; | 67 | qlf->qlf_type = XFS_LI_DQUOT; |
68 | logvec->i_addr = &qlip->qli_dquot->q_core; | 68 | qlf->qlf_size = 2; |
69 | logvec->i_len = sizeof(xfs_disk_dquot_t); | 69 | qlf->qlf_id = be32_to_cpu(qlip->qli_dquot->q_core.d_id); |
70 | logvec->i_type = XLOG_REG_TYPE_DQUOT; | 70 | qlf->qlf_blkno = qlip->qli_dquot->q_blkno; |
71 | 71 | qlf->qlf_len = 1; | |
72 | qlip->qli_format.qlf_size = 2; | 72 | qlf->qlf_boffset = qlip->qli_dquot->q_bufoffset; |
73 | 73 | xlog_finish_iovec(lv, vecp, sizeof(struct xfs_dq_logformat)); | |
74 | |||
75 | xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_DQUOT, | ||
76 | &qlip->qli_dquot->q_core, | ||
77 | sizeof(struct xfs_disk_dquot)); | ||
74 | } | 78 | } |
75 | 79 | ||
76 | /* | 80 | /* |
@@ -257,18 +261,6 @@ xfs_qm_dquot_logitem_init( | |||
257 | xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT, | 261 | xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT, |
258 | &xfs_dquot_item_ops); | 262 | &xfs_dquot_item_ops); |
259 | lp->qli_dquot = dqp; | 263 | lp->qli_dquot = dqp; |
260 | lp->qli_format.qlf_type = XFS_LI_DQUOT; | ||
261 | lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id); | ||
262 | lp->qli_format.qlf_blkno = dqp->q_blkno; | ||
263 | lp->qli_format.qlf_len = 1; | ||
264 | /* | ||
265 | * This is just the offset of this dquot within its buffer | ||
266 | * (which is currently 1 FSB and probably won't change). | ||
267 | * Hence 32 bits for this offset should be just fine. | ||
268 | * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t)) | ||
269 | * here, and recompute it at recovery time. | ||
270 | */ | ||
271 | lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset; | ||
272 | } | 264 | } |
273 | 265 | ||
274 | /*------------------ QUOTAOFF LOG ITEMS -------------------*/ | 266 | /*------------------ QUOTAOFF LOG ITEMS -------------------*/ |
@@ -294,26 +286,20 @@ xfs_qm_qoff_logitem_size( | |||
294 | *nbytes += sizeof(struct xfs_qoff_logitem); | 286 | *nbytes += sizeof(struct xfs_qoff_logitem); |
295 | } | 287 | } |
296 | 288 | ||
297 | /* | ||
298 | * This is called to fill in the vector of log iovecs for the | ||
299 | * given quotaoff log item. We use only 1 iovec, and we point that | ||
300 | * at the quotaoff_log_format structure embedded in the quotaoff item. | ||
301 | * It is at this point that we assert that all of the extent | ||
302 | * slots in the quotaoff item have been filled. | ||
303 | */ | ||
304 | STATIC void | 289 | STATIC void |
305 | xfs_qm_qoff_logitem_format( | 290 | xfs_qm_qoff_logitem_format( |
306 | struct xfs_log_item *lip, | 291 | struct xfs_log_item *lip, |
307 | struct xfs_log_iovec *log_vector) | 292 | struct xfs_log_vec *lv) |
308 | { | 293 | { |
309 | struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip); | 294 | struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip); |
310 | 295 | struct xfs_log_iovec *vecp = NULL; | |
311 | ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF); | 296 | struct xfs_qoff_logformat *qlf; |
312 | 297 | ||
313 | log_vector->i_addr = &qflip->qql_format; | 298 | qlf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_QUOTAOFF); |
314 | log_vector->i_len = sizeof(xfs_qoff_logitem_t); | 299 | qlf->qf_type = XFS_LI_QUOTAOFF; |
315 | log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF; | 300 | qlf->qf_size = 1; |
316 | qflip->qql_format.qf_size = 1; | 301 | qlf->qf_flags = qflip->qql_flags; |
302 | xlog_finish_iovec(lv, vecp, sizeof(struct xfs_qoff_logitem)); | ||
317 | } | 303 | } |
318 | 304 | ||
319 | /* | 305 | /* |
@@ -453,8 +439,7 @@ xfs_qm_qoff_logitem_init( | |||
453 | xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ? | 439 | xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ? |
454 | &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops); | 440 | &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops); |
455 | qf->qql_item.li_mountp = mp; | 441 | qf->qql_item.li_mountp = mp; |
456 | qf->qql_format.qf_type = XFS_LI_QUOTAOFF; | ||
457 | qf->qql_format.qf_flags = flags; | ||
458 | qf->qql_start_lip = start; | 442 | qf->qql_start_lip = start; |
443 | qf->qql_flags = flags; | ||
459 | return qf; | 444 | return qf; |
460 | } | 445 | } |
diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h index 5acae2ada70b..502e9464634a 100644 --- a/fs/xfs/xfs_dquot_item.h +++ b/fs/xfs/xfs_dquot_item.h | |||
@@ -27,13 +27,12 @@ typedef struct xfs_dq_logitem { | |||
27 | xfs_log_item_t qli_item; /* common portion */ | 27 | xfs_log_item_t qli_item; /* common portion */ |
28 | struct xfs_dquot *qli_dquot; /* dquot ptr */ | 28 | struct xfs_dquot *qli_dquot; /* dquot ptr */ |
29 | xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ | 29 | xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ |
30 | xfs_dq_logformat_t qli_format; /* logged structure */ | ||
31 | } xfs_dq_logitem_t; | 30 | } xfs_dq_logitem_t; |
32 | 31 | ||
33 | typedef struct xfs_qoff_logitem { | 32 | typedef struct xfs_qoff_logitem { |
34 | xfs_log_item_t qql_item; /* common portion */ | 33 | xfs_log_item_t qql_item; /* common portion */ |
35 | struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */ | 34 | struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */ |
36 | xfs_qoff_logformat_t qql_format; /* logged structure */ | 35 | unsigned int qql_flags; |
37 | } xfs_qoff_logitem_t; | 36 | } xfs_qoff_logitem_t; |
38 | 37 | ||
39 | 38 | ||
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 3680d04f973f..fb7a4c1ce1c5 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include "xfs_trans_priv.h" | 26 | #include "xfs_trans_priv.h" |
27 | #include "xfs_buf_item.h" | 27 | #include "xfs_buf_item.h" |
28 | #include "xfs_extfree_item.h" | 28 | #include "xfs_extfree_item.h" |
29 | #include "xfs_log.h" | ||
29 | 30 | ||
30 | 31 | ||
31 | kmem_zone_t *xfs_efi_zone; | 32 | kmem_zone_t *xfs_efi_zone; |
@@ -101,9 +102,10 @@ xfs_efi_item_size( | |||
101 | STATIC void | 102 | STATIC void |
102 | xfs_efi_item_format( | 103 | xfs_efi_item_format( |
103 | struct xfs_log_item *lip, | 104 | struct xfs_log_item *lip, |
104 | struct xfs_log_iovec *log_vector) | 105 | struct xfs_log_vec *lv) |
105 | { | 106 | { |
106 | struct xfs_efi_log_item *efip = EFI_ITEM(lip); | 107 | struct xfs_efi_log_item *efip = EFI_ITEM(lip); |
108 | struct xfs_log_iovec *vecp = NULL; | ||
107 | 109 | ||
108 | ASSERT(atomic_read(&efip->efi_next_extent) == | 110 | ASSERT(atomic_read(&efip->efi_next_extent) == |
109 | efip->efi_format.efi_nextents); | 111 | efip->efi_format.efi_nextents); |
@@ -111,10 +113,9 @@ xfs_efi_item_format( | |||
111 | efip->efi_format.efi_type = XFS_LI_EFI; | 113 | efip->efi_format.efi_type = XFS_LI_EFI; |
112 | efip->efi_format.efi_size = 1; | 114 | efip->efi_format.efi_size = 1; |
113 | 115 | ||
114 | log_vector->i_addr = &efip->efi_format; | 116 | xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFI_FORMAT, |
115 | log_vector->i_len = xfs_efi_item_sizeof(efip); | 117 | &efip->efi_format, |
116 | log_vector->i_type = XLOG_REG_TYPE_EFI_FORMAT; | 118 | xfs_efi_item_sizeof(efip)); |
117 | ASSERT(log_vector->i_len >= sizeof(xfs_efi_log_format_t)); | ||
118 | } | 119 | } |
119 | 120 | ||
120 | 121 | ||
@@ -368,19 +369,19 @@ xfs_efd_item_size( | |||
368 | STATIC void | 369 | STATIC void |
369 | xfs_efd_item_format( | 370 | xfs_efd_item_format( |
370 | struct xfs_log_item *lip, | 371 | struct xfs_log_item *lip, |
371 | struct xfs_log_iovec *log_vector) | 372 | struct xfs_log_vec *lv) |
372 | { | 373 | { |
373 | struct xfs_efd_log_item *efdp = EFD_ITEM(lip); | 374 | struct xfs_efd_log_item *efdp = EFD_ITEM(lip); |
375 | struct xfs_log_iovec *vecp = NULL; | ||
374 | 376 | ||
375 | ASSERT(efdp->efd_next_extent == efdp->efd_format.efd_nextents); | 377 | ASSERT(efdp->efd_next_extent == efdp->efd_format.efd_nextents); |
376 | 378 | ||
377 | efdp->efd_format.efd_type = XFS_LI_EFD; | 379 | efdp->efd_format.efd_type = XFS_LI_EFD; |
378 | efdp->efd_format.efd_size = 1; | 380 | efdp->efd_format.efd_size = 1; |
379 | 381 | ||
380 | log_vector->i_addr = &efdp->efd_format; | 382 | xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFD_FORMAT, |
381 | log_vector->i_len = xfs_efd_item_sizeof(efdp); | 383 | &efdp->efd_format, |
382 | log_vector->i_type = XLOG_REG_TYPE_EFD_FORMAT; | 384 | xfs_efd_item_sizeof(efdp)); |
383 | ASSERT(log_vector->i_len >= sizeof(xfs_efd_log_format_t)); | ||
384 | } | 385 | } |
385 | 386 | ||
386 | /* | 387 | /* |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 52c91e143725..e00121592632 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -912,7 +912,7 @@ xfs_dir_open( | |||
912 | * If there are any blocks, read-ahead block 0 as we're almost | 912 | * If there are any blocks, read-ahead block 0 as we're almost |
913 | * certain to have the next operation be a read there. | 913 | * certain to have the next operation be a read there. |
914 | */ | 914 | */ |
915 | mode = xfs_ilock_map_shared(ip); | 915 | mode = xfs_ilock_data_map_shared(ip); |
916 | if (ip->i_d.di_nextents > 0) | 916 | if (ip->i_d.di_nextents > 0) |
917 | xfs_dir3_data_readahead(NULL, ip, 0, -1); | 917 | xfs_dir3_data_readahead(NULL, ip, 0, -1); |
918 | xfs_iunlock(ip, mode); | 918 | xfs_iunlock(ip, mode); |
@@ -1215,7 +1215,7 @@ xfs_seek_data( | |||
1215 | uint lock; | 1215 | uint lock; |
1216 | int error; | 1216 | int error; |
1217 | 1217 | ||
1218 | lock = xfs_ilock_map_shared(ip); | 1218 | lock = xfs_ilock_data_map_shared(ip); |
1219 | 1219 | ||
1220 | isize = i_size_read(inode); | 1220 | isize = i_size_read(inode); |
1221 | if (start >= isize) { | 1221 | if (start >= isize) { |
@@ -1294,7 +1294,7 @@ out: | |||
1294 | offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); | 1294 | offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); |
1295 | 1295 | ||
1296 | out_unlock: | 1296 | out_unlock: |
1297 | xfs_iunlock_map_shared(ip, lock); | 1297 | xfs_iunlock(ip, lock); |
1298 | 1298 | ||
1299 | if (error) | 1299 | if (error) |
1300 | return -error; | 1300 | return -error; |
@@ -1319,7 +1319,7 @@ xfs_seek_hole( | |||
1319 | if (XFS_FORCED_SHUTDOWN(mp)) | 1319 | if (XFS_FORCED_SHUTDOWN(mp)) |
1320 | return -XFS_ERROR(EIO); | 1320 | return -XFS_ERROR(EIO); |
1321 | 1321 | ||
1322 | lock = xfs_ilock_map_shared(ip); | 1322 | lock = xfs_ilock_data_map_shared(ip); |
1323 | 1323 | ||
1324 | isize = i_size_read(inode); | 1324 | isize = i_size_read(inode); |
1325 | if (start >= isize) { | 1325 | if (start >= isize) { |
@@ -1402,7 +1402,7 @@ out: | |||
1402 | offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); | 1402 | offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); |
1403 | 1403 | ||
1404 | out_unlock: | 1404 | out_unlock: |
1405 | xfs_iunlock_map_shared(ip, lock); | 1405 | xfs_iunlock(ip, lock); |
1406 | 1406 | ||
1407 | if (error) | 1407 | if (error) |
1408 | return -error; | 1408 | return -error; |
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index e87719c5bebe..5d7f105a1c82 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
@@ -52,7 +52,7 @@ xfs_ialloc_cluster_alignment( | |||
52 | { | 52 | { |
53 | if (xfs_sb_version_hasalign(&args->mp->m_sb) && | 53 | if (xfs_sb_version_hasalign(&args->mp->m_sb) && |
54 | args->mp->m_sb.sb_inoalignmt >= | 54 | args->mp->m_sb.sb_inoalignmt >= |
55 | XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp))) | 55 | XFS_B_TO_FSBT(args->mp, args->mp->m_inode_cluster_size)) |
56 | return args->mp->m_sb.sb_inoalignmt; | 56 | return args->mp->m_sb.sb_inoalignmt; |
57 | return 1; | 57 | return 1; |
58 | } | 58 | } |
@@ -170,27 +170,20 @@ xfs_ialloc_inode_init( | |||
170 | { | 170 | { |
171 | struct xfs_buf *fbuf; | 171 | struct xfs_buf *fbuf; |
172 | struct xfs_dinode *free; | 172 | struct xfs_dinode *free; |
173 | int blks_per_cluster, nbufs, ninodes; | 173 | int nbufs, blks_per_cluster, inodes_per_cluster; |
174 | int version; | 174 | int version; |
175 | int i, j; | 175 | int i, j; |
176 | xfs_daddr_t d; | 176 | xfs_daddr_t d; |
177 | xfs_ino_t ino = 0; | 177 | xfs_ino_t ino = 0; |
178 | 178 | ||
179 | /* | 179 | /* |
180 | * Loop over the new block(s), filling in the inodes. | 180 | * Loop over the new block(s), filling in the inodes. For small block |
181 | * For small block sizes, manipulate the inodes in buffers | 181 | * sizes, manipulate the inodes in buffers which are multiples of the |
182 | * which are multiples of the blocks size. | 182 | * blocks size. |
183 | */ | 183 | */ |
184 | if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { | 184 | blks_per_cluster = xfs_icluster_size_fsb(mp); |
185 | blks_per_cluster = 1; | 185 | inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog; |
186 | nbufs = length; | 186 | nbufs = length / blks_per_cluster; |
187 | ninodes = mp->m_sb.sb_inopblock; | ||
188 | } else { | ||
189 | blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / | ||
190 | mp->m_sb.sb_blocksize; | ||
191 | nbufs = length / blks_per_cluster; | ||
192 | ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; | ||
193 | } | ||
194 | 187 | ||
195 | /* | 188 | /* |
196 | * Figure out what version number to use in the inodes we create. If | 189 | * Figure out what version number to use in the inodes we create. If |
@@ -225,7 +218,7 @@ xfs_ialloc_inode_init( | |||
225 | * they track in the AIL as if they were physically logged. | 218 | * they track in the AIL as if they were physically logged. |
226 | */ | 219 | */ |
227 | if (tp) | 220 | if (tp) |
228 | xfs_icreate_log(tp, agno, agbno, XFS_IALLOC_INODES(mp), | 221 | xfs_icreate_log(tp, agno, agbno, mp->m_ialloc_inos, |
229 | mp->m_sb.sb_inodesize, length, gen); | 222 | mp->m_sb.sb_inodesize, length, gen); |
230 | } else if (xfs_sb_version_hasnlink(&mp->m_sb)) | 223 | } else if (xfs_sb_version_hasnlink(&mp->m_sb)) |
231 | version = 2; | 224 | version = 2; |
@@ -246,7 +239,7 @@ xfs_ialloc_inode_init( | |||
246 | /* Initialize the inode buffers and log them appropriately. */ | 239 | /* Initialize the inode buffers and log them appropriately. */ |
247 | fbuf->b_ops = &xfs_inode_buf_ops; | 240 | fbuf->b_ops = &xfs_inode_buf_ops; |
248 | xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); | 241 | xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); |
249 | for (i = 0; i < ninodes; i++) { | 242 | for (i = 0; i < inodes_per_cluster; i++) { |
250 | int ioffset = i << mp->m_sb.sb_inodelog; | 243 | int ioffset = i << mp->m_sb.sb_inodelog; |
251 | uint isize = xfs_dinode_size(version); | 244 | uint isize = xfs_dinode_size(version); |
252 | 245 | ||
@@ -329,11 +322,11 @@ xfs_ialloc_ag_alloc( | |||
329 | * Locking will ensure that we don't have two callers in here | 322 | * Locking will ensure that we don't have two callers in here |
330 | * at one time. | 323 | * at one time. |
331 | */ | 324 | */ |
332 | newlen = XFS_IALLOC_INODES(args.mp); | 325 | newlen = args.mp->m_ialloc_inos; |
333 | if (args.mp->m_maxicount && | 326 | if (args.mp->m_maxicount && |
334 | args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) | 327 | args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) |
335 | return XFS_ERROR(ENOSPC); | 328 | return XFS_ERROR(ENOSPC); |
336 | args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp); | 329 | args.minlen = args.maxlen = args.mp->m_ialloc_blks; |
337 | /* | 330 | /* |
338 | * First try to allocate inodes contiguous with the last-allocated | 331 | * First try to allocate inodes contiguous with the last-allocated |
339 | * chunk of inodes. If the filesystem is striped, this will fill | 332 | * chunk of inodes. If the filesystem is striped, this will fill |
@@ -343,7 +336,7 @@ xfs_ialloc_ag_alloc( | |||
343 | newino = be32_to_cpu(agi->agi_newino); | 336 | newino = be32_to_cpu(agi->agi_newino); |
344 | agno = be32_to_cpu(agi->agi_seqno); | 337 | agno = be32_to_cpu(agi->agi_seqno); |
345 | args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + | 338 | args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + |
346 | XFS_IALLOC_BLOCKS(args.mp); | 339 | args.mp->m_ialloc_blks; |
347 | if (likely(newino != NULLAGINO && | 340 | if (likely(newino != NULLAGINO && |
348 | (args.agbno < be32_to_cpu(agi->agi_length)))) { | 341 | (args.agbno < be32_to_cpu(agi->agi_length)))) { |
349 | args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); | 342 | args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); |
@@ -585,7 +578,7 @@ xfs_ialloc_ag_select( | |||
585 | * Is there enough free space for the file plus a block of | 578 | * Is there enough free space for the file plus a block of |
586 | * inodes? (if we need to allocate some)? | 579 | * inodes? (if we need to allocate some)? |
587 | */ | 580 | */ |
588 | ineed = XFS_IALLOC_BLOCKS(mp); | 581 | ineed = mp->m_ialloc_blks; |
589 | longest = pag->pagf_longest; | 582 | longest = pag->pagf_longest; |
590 | if (!longest) | 583 | if (!longest) |
591 | longest = pag->pagf_flcount > 0; | 584 | longest = pag->pagf_flcount > 0; |
@@ -999,7 +992,7 @@ xfs_dialloc( | |||
999 | * inode. | 992 | * inode. |
1000 | */ | 993 | */ |
1001 | if (mp->m_maxicount && | 994 | if (mp->m_maxicount && |
1002 | mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) { | 995 | mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) { |
1003 | noroom = 1; | 996 | noroom = 1; |
1004 | okalloc = 0; | 997 | okalloc = 0; |
1005 | } | 998 | } |
@@ -1202,7 +1195,7 @@ xfs_difree( | |||
1202 | * When an inode cluster is free, it becomes eligible for removal | 1195 | * When an inode cluster is free, it becomes eligible for removal |
1203 | */ | 1196 | */ |
1204 | if (!(mp->m_flags & XFS_MOUNT_IKEEP) && | 1197 | if (!(mp->m_flags & XFS_MOUNT_IKEEP) && |
1205 | (rec.ir_freecount == XFS_IALLOC_INODES(mp))) { | 1198 | (rec.ir_freecount == mp->m_ialloc_inos)) { |
1206 | 1199 | ||
1207 | *delete = 1; | 1200 | *delete = 1; |
1208 | *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); | 1201 | *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); |
@@ -1212,7 +1205,7 @@ xfs_difree( | |||
1212 | * AGI and Superblock inode counts, and mark the disk space | 1205 | * AGI and Superblock inode counts, and mark the disk space |
1213 | * to be freed when the transaction is committed. | 1206 | * to be freed when the transaction is committed. |
1214 | */ | 1207 | */ |
1215 | ilen = XFS_IALLOC_INODES(mp); | 1208 | ilen = mp->m_ialloc_inos; |
1216 | be32_add_cpu(&agi->agi_count, -ilen); | 1209 | be32_add_cpu(&agi->agi_count, -ilen); |
1217 | be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); | 1210 | be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); |
1218 | xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); | 1211 | xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); |
@@ -1228,9 +1221,9 @@ xfs_difree( | |||
1228 | goto error0; | 1221 | goto error0; |
1229 | } | 1222 | } |
1230 | 1223 | ||
1231 | xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, | 1224 | xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, |
1232 | agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)), | 1225 | XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)), |
1233 | XFS_IALLOC_BLOCKS(mp), flist, mp); | 1226 | mp->m_ialloc_blks, flist, mp); |
1234 | } else { | 1227 | } else { |
1235 | *delete = 0; | 1228 | *delete = 0; |
1236 | 1229 | ||
@@ -1311,7 +1304,7 @@ xfs_imap_lookup( | |||
1311 | 1304 | ||
1312 | /* check that the returned record contains the required inode */ | 1305 | /* check that the returned record contains the required inode */ |
1313 | if (rec.ir_startino > agino || | 1306 | if (rec.ir_startino > agino || |
1314 | rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino) | 1307 | rec.ir_startino + mp->m_ialloc_inos <= agino) |
1315 | return EINVAL; | 1308 | return EINVAL; |
1316 | 1309 | ||
1317 | /* for untrusted inodes check it is allocated first */ | 1310 | /* for untrusted inodes check it is allocated first */ |
@@ -1384,7 +1377,7 @@ xfs_imap( | |||
1384 | return XFS_ERROR(EINVAL); | 1377 | return XFS_ERROR(EINVAL); |
1385 | } | 1378 | } |
1386 | 1379 | ||
1387 | blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; | 1380 | blks_per_cluster = xfs_icluster_size_fsb(mp); |
1388 | 1381 | ||
1389 | /* | 1382 | /* |
1390 | * For bulkstat and handle lookups, we have an untrusted inode number | 1383 | * For bulkstat and handle lookups, we have an untrusted inode number |
@@ -1405,7 +1398,7 @@ xfs_imap( | |||
1405 | * If the inode cluster size is the same as the blocksize or | 1398 | * If the inode cluster size is the same as the blocksize or |
1406 | * smaller we get to the buffer by simple arithmetics. | 1399 | * smaller we get to the buffer by simple arithmetics. |
1407 | */ | 1400 | */ |
1408 | if (XFS_INODE_CLUSTER_SIZE(mp) <= mp->m_sb.sb_blocksize) { | 1401 | if (blks_per_cluster == 1) { |
1409 | offset = XFS_INO_TO_OFFSET(mp, ino); | 1402 | offset = XFS_INO_TO_OFFSET(mp, ino); |
1410 | ASSERT(offset < mp->m_sb.sb_inopblock); | 1403 | ASSERT(offset < mp->m_sb.sb_inopblock); |
1411 | 1404 | ||
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index a8f76a5ff418..812365d17e67 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h | |||
@@ -25,17 +25,18 @@ struct xfs_mount; | |||
25 | struct xfs_trans; | 25 | struct xfs_trans; |
26 | struct xfs_btree_cur; | 26 | struct xfs_btree_cur; |
27 | 27 | ||
28 | /* | 28 | /* Move inodes in clusters of this size */ |
29 | * Allocation parameters for inode allocation. | ||
30 | */ | ||
31 | #define XFS_IALLOC_INODES(mp) (mp)->m_ialloc_inos | ||
32 | #define XFS_IALLOC_BLOCKS(mp) (mp)->m_ialloc_blks | ||
33 | |||
34 | /* | ||
35 | * Move inodes in clusters of this size. | ||
36 | */ | ||
37 | #define XFS_INODE_BIG_CLUSTER_SIZE 8192 | 29 | #define XFS_INODE_BIG_CLUSTER_SIZE 8192 |
38 | #define XFS_INODE_CLUSTER_SIZE(mp) (mp)->m_inode_cluster_size | 30 | |
31 | /* Calculate and return the number of filesystem blocks per inode cluster */ | ||
32 | static inline int | ||
33 | xfs_icluster_size_fsb( | ||
34 | struct xfs_mount *mp) | ||
35 | { | ||
36 | if (mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size) | ||
37 | return 1; | ||
38 | return mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog; | ||
39 | } | ||
39 | 40 | ||
40 | /* | 41 | /* |
41 | * Make an inode pointer out of the buffer/offset. | 42 | * Make an inode pointer out of the buffer/offset. |
diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index d2eaccfa73f4..7e4549233251 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include "xfs_trans_priv.h" | 28 | #include "xfs_trans_priv.h" |
29 | #include "xfs_error.h" | 29 | #include "xfs_error.h" |
30 | #include "xfs_icreate_item.h" | 30 | #include "xfs_icreate_item.h" |
31 | #include "xfs_log.h" | ||
31 | 32 | ||
32 | kmem_zone_t *xfs_icreate_zone; /* inode create item zone */ | 33 | kmem_zone_t *xfs_icreate_zone; /* inode create item zone */ |
33 | 34 | ||
@@ -58,13 +59,14 @@ xfs_icreate_item_size( | |||
58 | STATIC void | 59 | STATIC void |
59 | xfs_icreate_item_format( | 60 | xfs_icreate_item_format( |
60 | struct xfs_log_item *lip, | 61 | struct xfs_log_item *lip, |
61 | struct xfs_log_iovec *log_vector) | 62 | struct xfs_log_vec *lv) |
62 | { | 63 | { |
63 | struct xfs_icreate_item *icp = ICR_ITEM(lip); | 64 | struct xfs_icreate_item *icp = ICR_ITEM(lip); |
65 | struct xfs_log_iovec *vecp = NULL; | ||
64 | 66 | ||
65 | log_vector->i_addr = (xfs_caddr_t)&icp->ic_format; | 67 | xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICREATE, |
66 | log_vector->i_len = sizeof(struct xfs_icreate_log); | 68 | &icp->ic_format, |
67 | log_vector->i_type = XLOG_REG_TYPE_ICREATE; | 69 | sizeof(struct xfs_icreate_log)); |
68 | } | 70 | } |
69 | 71 | ||
70 | 72 | ||
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 001aa893ed59..3a137e9f9a7d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -77,48 +77,44 @@ xfs_get_extsz_hint( | |||
77 | } | 77 | } |
78 | 78 | ||
79 | /* | 79 | /* |
80 | * This is a wrapper routine around the xfs_ilock() routine used to centralize | 80 | * These two are wrapper routines around the xfs_ilock() routine used to |
81 | * some grungy code. It is used in places that wish to lock the inode solely | 81 | * centralize some grungy code. They are used in places that wish to lock the |
82 | * for reading the extents. The reason these places can't just call | 82 | * inode solely for reading the extents. The reason these places can't just |
83 | * xfs_ilock(SHARED) is that the inode lock also guards to bringing in of the | 83 | * call xfs_ilock(ip, XFS_ILOCK_SHARED) is that the inode lock also guards to |
84 | * extents from disk for a file in b-tree format. If the inode is in b-tree | 84 | * bringing in of the extents from disk for a file in b-tree format. If the |
85 | * format, then we need to lock the inode exclusively until the extents are read | 85 | * inode is in b-tree format, then we need to lock the inode exclusively until |
86 | * in. Locking it exclusively all the time would limit our parallelism | 86 | * the extents are read in. Locking it exclusively all the time would limit |
87 | * unnecessarily, though. What we do instead is check to see if the extents | 87 | * our parallelism unnecessarily, though. What we do instead is check to see |
88 | * have been read in yet, and only lock the inode exclusively if they have not. | 88 | * if the extents have been read in yet, and only lock the inode exclusively |
89 | * if they have not. | ||
89 | * | 90 | * |
90 | * The function returns a value which should be given to the corresponding | 91 | * The functions return a value which should be given to the corresponding |
91 | * xfs_iunlock_map_shared(). This value is the mode in which the lock was | 92 | * xfs_iunlock() call. |
92 | * actually taken. | ||
93 | */ | 93 | */ |
94 | uint | 94 | uint |
95 | xfs_ilock_map_shared( | 95 | xfs_ilock_data_map_shared( |
96 | xfs_inode_t *ip) | 96 | struct xfs_inode *ip) |
97 | { | 97 | { |
98 | uint lock_mode; | 98 | uint lock_mode = XFS_ILOCK_SHARED; |
99 | 99 | ||
100 | if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) && | 100 | if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && |
101 | ((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) { | 101 | (ip->i_df.if_flags & XFS_IFEXTENTS) == 0) |
102 | lock_mode = XFS_ILOCK_EXCL; | 102 | lock_mode = XFS_ILOCK_EXCL; |
103 | } else { | ||
104 | lock_mode = XFS_ILOCK_SHARED; | ||
105 | } | ||
106 | |||
107 | xfs_ilock(ip, lock_mode); | 103 | xfs_ilock(ip, lock_mode); |
108 | |||
109 | return lock_mode; | 104 | return lock_mode; |
110 | } | 105 | } |
111 | 106 | ||
112 | /* | 107 | uint |
113 | * This is simply the unlock routine to go with xfs_ilock_map_shared(). | 108 | xfs_ilock_attr_map_shared( |
114 | * All it does is call xfs_iunlock() with the given lock_mode. | 109 | struct xfs_inode *ip) |
115 | */ | ||
116 | void | ||
117 | xfs_iunlock_map_shared( | ||
118 | xfs_inode_t *ip, | ||
119 | unsigned int lock_mode) | ||
120 | { | 110 | { |
121 | xfs_iunlock(ip, lock_mode); | 111 | uint lock_mode = XFS_ILOCK_SHARED; |
112 | |||
113 | if (ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE && | ||
114 | (ip->i_afp->if_flags & XFS_IFEXTENTS) == 0) | ||
115 | lock_mode = XFS_ILOCK_EXCL; | ||
116 | xfs_ilock(ip, lock_mode); | ||
117 | return lock_mode; | ||
122 | } | 118 | } |
123 | 119 | ||
124 | /* | 120 | /* |
@@ -588,9 +584,9 @@ xfs_lookup( | |||
588 | if (XFS_FORCED_SHUTDOWN(dp->i_mount)) | 584 | if (XFS_FORCED_SHUTDOWN(dp->i_mount)) |
589 | return XFS_ERROR(EIO); | 585 | return XFS_ERROR(EIO); |
590 | 586 | ||
591 | lock_mode = xfs_ilock_map_shared(dp); | 587 | lock_mode = xfs_ilock_data_map_shared(dp); |
592 | error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); | 588 | error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); |
593 | xfs_iunlock_map_shared(dp, lock_mode); | 589 | xfs_iunlock(dp, lock_mode); |
594 | 590 | ||
595 | if (error) | 591 | if (error) |
596 | goto out; | 592 | goto out; |
@@ -2141,8 +2137,8 @@ xfs_ifree_cluster( | |||
2141 | { | 2137 | { |
2142 | xfs_mount_t *mp = free_ip->i_mount; | 2138 | xfs_mount_t *mp = free_ip->i_mount; |
2143 | int blks_per_cluster; | 2139 | int blks_per_cluster; |
2140 | int inodes_per_cluster; | ||
2144 | int nbufs; | 2141 | int nbufs; |
2145 | int ninodes; | ||
2146 | int i, j; | 2142 | int i, j; |
2147 | xfs_daddr_t blkno; | 2143 | xfs_daddr_t blkno; |
2148 | xfs_buf_t *bp; | 2144 | xfs_buf_t *bp; |
@@ -2152,18 +2148,11 @@ xfs_ifree_cluster( | |||
2152 | struct xfs_perag *pag; | 2148 | struct xfs_perag *pag; |
2153 | 2149 | ||
2154 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); | 2150 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); |
2155 | if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { | 2151 | blks_per_cluster = xfs_icluster_size_fsb(mp); |
2156 | blks_per_cluster = 1; | 2152 | inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog; |
2157 | ninodes = mp->m_sb.sb_inopblock; | 2153 | nbufs = mp->m_ialloc_blks / blks_per_cluster; |
2158 | nbufs = XFS_IALLOC_BLOCKS(mp); | ||
2159 | } else { | ||
2160 | blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / | ||
2161 | mp->m_sb.sb_blocksize; | ||
2162 | ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; | ||
2163 | nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; | ||
2164 | } | ||
2165 | 2154 | ||
2166 | for (j = 0; j < nbufs; j++, inum += ninodes) { | 2155 | for (j = 0; j < nbufs; j++, inum += inodes_per_cluster) { |
2167 | blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), | 2156 | blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), |
2168 | XFS_INO_TO_AGBNO(mp, inum)); | 2157 | XFS_INO_TO_AGBNO(mp, inum)); |
2169 | 2158 | ||
@@ -2225,7 +2214,7 @@ xfs_ifree_cluster( | |||
2225 | * transaction stale above, which means there is no point in | 2214 | * transaction stale above, which means there is no point in |
2226 | * even trying to lock them. | 2215 | * even trying to lock them. |
2227 | */ | 2216 | */ |
2228 | for (i = 0; i < ninodes; i++) { | 2217 | for (i = 0; i < inodes_per_cluster; i++) { |
2229 | retry: | 2218 | retry: |
2230 | rcu_read_lock(); | 2219 | rcu_read_lock(); |
2231 | ip = radix_tree_lookup(&pag->pag_ici_root, | 2220 | ip = radix_tree_lookup(&pag->pag_ici_root, |
@@ -2906,13 +2895,13 @@ xfs_iflush_cluster( | |||
2906 | 2895 | ||
2907 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); | 2896 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); |
2908 | 2897 | ||
2909 | inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; | 2898 | inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; |
2910 | ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); | 2899 | ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); |
2911 | ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); | 2900 | ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); |
2912 | if (!ilist) | 2901 | if (!ilist) |
2913 | goto out_put; | 2902 | goto out_put; |
2914 | 2903 | ||
2915 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); | 2904 | mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1); |
2916 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; | 2905 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; |
2917 | rcu_read_lock(); | 2906 | rcu_read_lock(); |
2918 | /* really need a gang lookup range call here */ | 2907 | /* really need a gang lookup range call here */ |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 9e6efccbae04..65e2350f449c 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -337,8 +337,8 @@ int xfs_ilock_nowait(xfs_inode_t *, uint); | |||
337 | void xfs_iunlock(xfs_inode_t *, uint); | 337 | void xfs_iunlock(xfs_inode_t *, uint); |
338 | void xfs_ilock_demote(xfs_inode_t *, uint); | 338 | void xfs_ilock_demote(xfs_inode_t *, uint); |
339 | int xfs_isilocked(xfs_inode_t *, uint); | 339 | int xfs_isilocked(xfs_inode_t *, uint); |
340 | uint xfs_ilock_map_shared(xfs_inode_t *); | 340 | uint xfs_ilock_data_map_shared(struct xfs_inode *); |
341 | void xfs_iunlock_map_shared(xfs_inode_t *, uint); | 341 | uint xfs_ilock_attr_map_shared(struct xfs_inode *); |
342 | int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t, | 342 | int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t, |
343 | xfs_nlink_t, xfs_dev_t, prid_t, int, | 343 | xfs_nlink_t, xfs_dev_t, prid_t, int, |
344 | struct xfs_buf **, xfs_inode_t **); | 344 | struct xfs_buf **, xfs_inode_t **); |
diff --git a/fs/xfs/xfs_inode_fork.c b/fs/xfs/xfs_inode_fork.c index cfee14a83cfe..73514c0486b7 100644 --- a/fs/xfs/xfs_inode_fork.c +++ b/fs/xfs/xfs_inode_fork.c | |||
@@ -431,6 +431,8 @@ xfs_iread_extents( | |||
431 | xfs_ifork_t *ifp; | 431 | xfs_ifork_t *ifp; |
432 | xfs_extnum_t nextents; | 432 | xfs_extnum_t nextents; |
433 | 433 | ||
434 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
435 | |||
434 | if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { | 436 | if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { |
435 | XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, | 437 | XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, |
436 | ip->i_mount); | 438 | ip->i_mount); |
@@ -721,15 +723,16 @@ xfs_idestroy_fork( | |||
721 | } | 723 | } |
722 | 724 | ||
723 | /* | 725 | /* |
724 | * xfs_iextents_copy() | 726 | * Convert in-core extents to on-disk form |
725 | * | 727 | * |
726 | * This is called to copy the REAL extents (as opposed to the delayed | 728 | * For either the data or attr fork in extent format, we need to endian convert |
727 | * allocation extents) from the inode into the given buffer. It | 729 | * the in-core extent as we place them into the on-disk inode. |
728 | * returns the number of bytes copied into the buffer. | ||
729 | * | 730 | * |
730 | * If there are no delayed allocation extents, then we can just | 731 | * In the case of the data fork, the in-core and on-disk fork sizes can be |
731 | * memcpy() the extents into the buffer. Otherwise, we need to | 732 | * different due to delayed allocation extents. We only copy on-disk extents |
732 | * examine each extent in turn and skip those which are delayed. | 733 | * here, so callers must always use the physical fork size to determine the |
734 | * size of the buffer passed to this routine. We will return the size actually | ||
735 | * used. | ||
733 | */ | 736 | */ |
734 | int | 737 | int |
735 | xfs_iextents_copy( | 738 | xfs_iextents_copy( |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 7c0d391f9a6e..686889b4a1e5 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include "xfs_trace.h" | 30 | #include "xfs_trace.h" |
31 | #include "xfs_trans_priv.h" | 31 | #include "xfs_trans_priv.h" |
32 | #include "xfs_dinode.h" | 32 | #include "xfs_dinode.h" |
33 | #include "xfs_log.h" | ||
33 | 34 | ||
34 | 35 | ||
35 | kmem_zone_t *xfs_ili_zone; /* inode log item zone */ | 36 | kmem_zone_t *xfs_ili_zone; /* inode log item zone */ |
@@ -39,27 +40,14 @@ static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip) | |||
39 | return container_of(lip, struct xfs_inode_log_item, ili_item); | 40 | return container_of(lip, struct xfs_inode_log_item, ili_item); |
40 | } | 41 | } |
41 | 42 | ||
42 | |||
43 | /* | ||
44 | * This returns the number of iovecs needed to log the given inode item. | ||
45 | * | ||
46 | * We need one iovec for the inode log format structure, one for the | ||
47 | * inode core, and possibly one for the inode data/extents/b-tree root | ||
48 | * and one for the inode attribute data/extents/b-tree root. | ||
49 | */ | ||
50 | STATIC void | 43 | STATIC void |
51 | xfs_inode_item_size( | 44 | xfs_inode_item_data_fork_size( |
52 | struct xfs_log_item *lip, | 45 | struct xfs_inode_log_item *iip, |
53 | int *nvecs, | 46 | int *nvecs, |
54 | int *nbytes) | 47 | int *nbytes) |
55 | { | 48 | { |
56 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | ||
57 | struct xfs_inode *ip = iip->ili_inode; | 49 | struct xfs_inode *ip = iip->ili_inode; |
58 | 50 | ||
59 | *nvecs += 2; | ||
60 | *nbytes += sizeof(struct xfs_inode_log_format) + | ||
61 | xfs_icdinode_size(ip->i_d.di_version); | ||
62 | |||
63 | switch (ip->i_d.di_format) { | 51 | switch (ip->i_d.di_format) { |
64 | case XFS_DINODE_FMT_EXTENTS: | 52 | case XFS_DINODE_FMT_EXTENTS: |
65 | if ((iip->ili_fields & XFS_ILOG_DEXT) && | 53 | if ((iip->ili_fields & XFS_ILOG_DEXT) && |
@@ -70,7 +58,6 @@ xfs_inode_item_size( | |||
70 | *nvecs += 1; | 58 | *nvecs += 1; |
71 | } | 59 | } |
72 | break; | 60 | break; |
73 | |||
74 | case XFS_DINODE_FMT_BTREE: | 61 | case XFS_DINODE_FMT_BTREE: |
75 | if ((iip->ili_fields & XFS_ILOG_DBROOT) && | 62 | if ((iip->ili_fields & XFS_ILOG_DBROOT) && |
76 | ip->i_df.if_broot_bytes > 0) { | 63 | ip->i_df.if_broot_bytes > 0) { |
@@ -78,7 +65,6 @@ xfs_inode_item_size( | |||
78 | *nvecs += 1; | 65 | *nvecs += 1; |
79 | } | 66 | } |
80 | break; | 67 | break; |
81 | |||
82 | case XFS_DINODE_FMT_LOCAL: | 68 | case XFS_DINODE_FMT_LOCAL: |
83 | if ((iip->ili_fields & XFS_ILOG_DDATA) && | 69 | if ((iip->ili_fields & XFS_ILOG_DDATA) && |
84 | ip->i_df.if_bytes > 0) { | 70 | ip->i_df.if_bytes > 0) { |
@@ -90,19 +76,20 @@ xfs_inode_item_size( | |||
90 | case XFS_DINODE_FMT_DEV: | 76 | case XFS_DINODE_FMT_DEV: |
91 | case XFS_DINODE_FMT_UUID: | 77 | case XFS_DINODE_FMT_UUID: |
92 | break; | 78 | break; |
93 | |||
94 | default: | 79 | default: |
95 | ASSERT(0); | 80 | ASSERT(0); |
96 | break; | 81 | break; |
97 | } | 82 | } |
83 | } | ||
98 | 84 | ||
99 | if (!XFS_IFORK_Q(ip)) | 85 | STATIC void |
100 | return; | 86 | xfs_inode_item_attr_fork_size( |
101 | 87 | struct xfs_inode_log_item *iip, | |
88 | int *nvecs, | ||
89 | int *nbytes) | ||
90 | { | ||
91 | struct xfs_inode *ip = iip->ili_inode; | ||
102 | 92 | ||
103 | /* | ||
104 | * Log any necessary attribute data. | ||
105 | */ | ||
106 | switch (ip->i_d.di_aformat) { | 93 | switch (ip->i_d.di_aformat) { |
107 | case XFS_DINODE_FMT_EXTENTS: | 94 | case XFS_DINODE_FMT_EXTENTS: |
108 | if ((iip->ili_fields & XFS_ILOG_AEXT) && | 95 | if ((iip->ili_fields & XFS_ILOG_AEXT) && |
@@ -113,7 +100,6 @@ xfs_inode_item_size( | |||
113 | *nvecs += 1; | 100 | *nvecs += 1; |
114 | } | 101 | } |
115 | break; | 102 | break; |
116 | |||
117 | case XFS_DINODE_FMT_BTREE: | 103 | case XFS_DINODE_FMT_BTREE: |
118 | if ((iip->ili_fields & XFS_ILOG_ABROOT) && | 104 | if ((iip->ili_fields & XFS_ILOG_ABROOT) && |
119 | ip->i_afp->if_broot_bytes > 0) { | 105 | ip->i_afp->if_broot_bytes > 0) { |
@@ -121,7 +107,6 @@ xfs_inode_item_size( | |||
121 | *nvecs += 1; | 107 | *nvecs += 1; |
122 | } | 108 | } |
123 | break; | 109 | break; |
124 | |||
125 | case XFS_DINODE_FMT_LOCAL: | 110 | case XFS_DINODE_FMT_LOCAL: |
126 | if ((iip->ili_fields & XFS_ILOG_ADATA) && | 111 | if ((iip->ili_fields & XFS_ILOG_ADATA) && |
127 | ip->i_afp->if_bytes > 0) { | 112 | ip->i_afp->if_bytes > 0) { |
@@ -129,7 +114,6 @@ xfs_inode_item_size( | |||
129 | *nvecs += 1; | 114 | *nvecs += 1; |
130 | } | 115 | } |
131 | break; | 116 | break; |
132 | |||
133 | default: | 117 | default: |
134 | ASSERT(0); | 118 | ASSERT(0); |
135 | break; | 119 | break; |
@@ -137,98 +121,67 @@ xfs_inode_item_size( | |||
137 | } | 121 | } |
138 | 122 | ||
139 | /* | 123 | /* |
140 | * xfs_inode_item_format_extents - convert in-core extents to on-disk form | 124 | * This returns the number of iovecs needed to log the given inode item. |
141 | * | ||
142 | * For either the data or attr fork in extent format, we need to endian convert | ||
143 | * the in-core extent as we place them into the on-disk inode. In this case, we | ||
144 | * need to do this conversion before we write the extents into the log. Because | ||
145 | * we don't have the disk inode to write into here, we allocate a buffer and | ||
146 | * format the extents into it via xfs_iextents_copy(). We free the buffer in | ||
147 | * the unlock routine after the copy for the log has been made. | ||
148 | * | 125 | * |
149 | * In the case of the data fork, the in-core and on-disk fork sizes can be | 126 | * We need one iovec for the inode log format structure, one for the |
150 | * different due to delayed allocation extents. We only log on-disk extents | 127 | * inode core, and possibly one for the inode data/extents/b-tree root |
151 | * here, so always use the physical fork size to determine the size of the | 128 | * and one for the inode attribute data/extents/b-tree root. |
152 | * buffer we need to allocate. | ||
153 | */ | 129 | */ |
154 | STATIC void | 130 | STATIC void |
155 | xfs_inode_item_format_extents( | 131 | xfs_inode_item_size( |
156 | struct xfs_inode *ip, | 132 | struct xfs_log_item *lip, |
157 | struct xfs_log_iovec *vecp, | 133 | int *nvecs, |
158 | int whichfork, | 134 | int *nbytes) |
159 | int type) | ||
160 | { | 135 | { |
161 | xfs_bmbt_rec_t *ext_buffer; | 136 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); |
137 | struct xfs_inode *ip = iip->ili_inode; | ||
162 | 138 | ||
163 | ext_buffer = kmem_alloc(XFS_IFORK_SIZE(ip, whichfork), KM_SLEEP); | 139 | *nvecs += 2; |
164 | if (whichfork == XFS_DATA_FORK) | 140 | *nbytes += sizeof(struct xfs_inode_log_format) + |
165 | ip->i_itemp->ili_extents_buf = ext_buffer; | 141 | xfs_icdinode_size(ip->i_d.di_version); |
166 | else | ||
167 | ip->i_itemp->ili_aextents_buf = ext_buffer; | ||
168 | 142 | ||
169 | vecp->i_addr = ext_buffer; | 143 | xfs_inode_item_data_fork_size(iip, nvecs, nbytes); |
170 | vecp->i_len = xfs_iextents_copy(ip, ext_buffer, whichfork); | 144 | if (XFS_IFORK_Q(ip)) |
171 | vecp->i_type = type; | 145 | xfs_inode_item_attr_fork_size(iip, nvecs, nbytes); |
172 | } | 146 | } |
173 | 147 | ||
174 | /* | 148 | /* |
175 | * This is called to fill in the vector of log iovecs for the | 149 | * If this is a v1 format inode, then we need to log it as such. This means |
176 | * given inode log item. It fills the first item with an inode | 150 | * that we have to copy the link count from the new field to the old. We |
177 | * log format structure, the second with the on-disk inode structure, | 151 | * don't have to worry about the new fields, because nothing trusts them as |
178 | * and a possible third and/or fourth with the inode data/extents/b-tree | 152 | * long as the old inode version number is there. |
179 | * root and inode attributes data/extents/b-tree root. | ||
180 | */ | 153 | */ |
181 | STATIC void | 154 | STATIC void |
182 | xfs_inode_item_format( | 155 | xfs_inode_item_format_v1_inode( |
183 | struct xfs_log_item *lip, | 156 | struct xfs_inode *ip) |
184 | struct xfs_log_iovec *vecp) | 157 | { |
158 | if (!xfs_sb_version_hasnlink(&ip->i_mount->m_sb)) { | ||
159 | /* | ||
160 | * Convert it back. | ||
161 | */ | ||
162 | ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); | ||
163 | ip->i_d.di_onlink = ip->i_d.di_nlink; | ||
164 | } else { | ||
165 | /* | ||
166 | * The superblock version has already been bumped, | ||
167 | * so just make the conversion to the new inode | ||
168 | * format permanent. | ||
169 | */ | ||
170 | ip->i_d.di_version = 2; | ||
171 | ip->i_d.di_onlink = 0; | ||
172 | memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); | ||
173 | } | ||
174 | } | ||
175 | |||
176 | STATIC void | ||
177 | xfs_inode_item_format_data_fork( | ||
178 | struct xfs_inode_log_item *iip, | ||
179 | struct xfs_inode_log_format *ilf, | ||
180 | struct xfs_log_vec *lv, | ||
181 | struct xfs_log_iovec **vecp) | ||
185 | { | 182 | { |
186 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | ||
187 | struct xfs_inode *ip = iip->ili_inode; | 183 | struct xfs_inode *ip = iip->ili_inode; |
188 | uint nvecs; | ||
189 | size_t data_bytes; | 184 | size_t data_bytes; |
190 | xfs_mount_t *mp; | ||
191 | |||
192 | vecp->i_addr = &iip->ili_format; | ||
193 | vecp->i_len = sizeof(xfs_inode_log_format_t); | ||
194 | vecp->i_type = XLOG_REG_TYPE_IFORMAT; | ||
195 | vecp++; | ||
196 | nvecs = 1; | ||
197 | |||
198 | vecp->i_addr = &ip->i_d; | ||
199 | vecp->i_len = xfs_icdinode_size(ip->i_d.di_version); | ||
200 | vecp->i_type = XLOG_REG_TYPE_ICORE; | ||
201 | vecp++; | ||
202 | nvecs++; | ||
203 | |||
204 | /* | ||
205 | * If this is really an old format inode, then we need to | ||
206 | * log it as such. This means that we have to copy the link | ||
207 | * count from the new field to the old. We don't have to worry | ||
208 | * about the new fields, because nothing trusts them as long as | ||
209 | * the old inode version number is there. If the superblock already | ||
210 | * has a new version number, then we don't bother converting back. | ||
211 | */ | ||
212 | mp = ip->i_mount; | ||
213 | ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); | ||
214 | if (ip->i_d.di_version == 1) { | ||
215 | if (!xfs_sb_version_hasnlink(&mp->m_sb)) { | ||
216 | /* | ||
217 | * Convert it back. | ||
218 | */ | ||
219 | ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); | ||
220 | ip->i_d.di_onlink = ip->i_d.di_nlink; | ||
221 | } else { | ||
222 | /* | ||
223 | * The superblock version has already been bumped, | ||
224 | * so just make the conversion to the new inode | ||
225 | * format permanent. | ||
226 | */ | ||
227 | ip->i_d.di_version = 2; | ||
228 | ip->i_d.di_onlink = 0; | ||
229 | memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); | ||
230 | } | ||
231 | } | ||
232 | 185 | ||
233 | switch (ip->i_d.di_format) { | 186 | switch (ip->i_d.di_format) { |
234 | case XFS_DINODE_FMT_EXTENTS: | 187 | case XFS_DINODE_FMT_EXTENTS: |
@@ -239,36 +192,23 @@ xfs_inode_item_format( | |||
239 | if ((iip->ili_fields & XFS_ILOG_DEXT) && | 192 | if ((iip->ili_fields & XFS_ILOG_DEXT) && |
240 | ip->i_d.di_nextents > 0 && | 193 | ip->i_d.di_nextents > 0 && |
241 | ip->i_df.if_bytes > 0) { | 194 | ip->i_df.if_bytes > 0) { |
195 | struct xfs_bmbt_rec *p; | ||
196 | |||
242 | ASSERT(ip->i_df.if_u1.if_extents != NULL); | 197 | ASSERT(ip->i_df.if_u1.if_extents != NULL); |
243 | ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0); | 198 | ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0); |
244 | ASSERT(iip->ili_extents_buf == NULL); | 199 | |
245 | 200 | p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT); | |
246 | #ifdef XFS_NATIVE_HOST | 201 | data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK); |
247 | if (ip->i_d.di_nextents == ip->i_df.if_bytes / | 202 | xlog_finish_iovec(lv, *vecp, data_bytes); |
248 | (uint)sizeof(xfs_bmbt_rec_t)) { | 203 | |
249 | /* | 204 | ASSERT(data_bytes <= ip->i_df.if_bytes); |
250 | * There are no delayed allocation | 205 | |
251 | * extents, so just point to the | 206 | ilf->ilf_dsize = data_bytes; |
252 | * real extents array. | 207 | ilf->ilf_size++; |
253 | */ | ||
254 | vecp->i_addr = ip->i_df.if_u1.if_extents; | ||
255 | vecp->i_len = ip->i_df.if_bytes; | ||
256 | vecp->i_type = XLOG_REG_TYPE_IEXT; | ||
257 | } else | ||
258 | #endif | ||
259 | { | ||
260 | xfs_inode_item_format_extents(ip, vecp, | ||
261 | XFS_DATA_FORK, XLOG_REG_TYPE_IEXT); | ||
262 | } | ||
263 | ASSERT(vecp->i_len <= ip->i_df.if_bytes); | ||
264 | iip->ili_format.ilf_dsize = vecp->i_len; | ||
265 | vecp++; | ||
266 | nvecs++; | ||
267 | } else { | 208 | } else { |
268 | iip->ili_fields &= ~XFS_ILOG_DEXT; | 209 | iip->ili_fields &= ~XFS_ILOG_DEXT; |
269 | } | 210 | } |
270 | break; | 211 | break; |
271 | |||
272 | case XFS_DINODE_FMT_BTREE: | 212 | case XFS_DINODE_FMT_BTREE: |
273 | iip->ili_fields &= | 213 | iip->ili_fields &= |
274 | ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | | 214 | ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | |
@@ -277,80 +217,70 @@ xfs_inode_item_format( | |||
277 | if ((iip->ili_fields & XFS_ILOG_DBROOT) && | 217 | if ((iip->ili_fields & XFS_ILOG_DBROOT) && |
278 | ip->i_df.if_broot_bytes > 0) { | 218 | ip->i_df.if_broot_bytes > 0) { |
279 | ASSERT(ip->i_df.if_broot != NULL); | 219 | ASSERT(ip->i_df.if_broot != NULL); |
280 | vecp->i_addr = ip->i_df.if_broot; | 220 | xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IBROOT, |
281 | vecp->i_len = ip->i_df.if_broot_bytes; | 221 | ip->i_df.if_broot, |
282 | vecp->i_type = XLOG_REG_TYPE_IBROOT; | 222 | ip->i_df.if_broot_bytes); |
283 | vecp++; | 223 | ilf->ilf_dsize = ip->i_df.if_broot_bytes; |
284 | nvecs++; | 224 | ilf->ilf_size++; |
285 | iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; | ||
286 | } else { | 225 | } else { |
287 | ASSERT(!(iip->ili_fields & | 226 | ASSERT(!(iip->ili_fields & |
288 | XFS_ILOG_DBROOT)); | 227 | XFS_ILOG_DBROOT)); |
289 | iip->ili_fields &= ~XFS_ILOG_DBROOT; | 228 | iip->ili_fields &= ~XFS_ILOG_DBROOT; |
290 | } | 229 | } |
291 | break; | 230 | break; |
292 | |||
293 | case XFS_DINODE_FMT_LOCAL: | 231 | case XFS_DINODE_FMT_LOCAL: |
294 | iip->ili_fields &= | 232 | iip->ili_fields &= |
295 | ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | | 233 | ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | |
296 | XFS_ILOG_DEV | XFS_ILOG_UUID); | 234 | XFS_ILOG_DEV | XFS_ILOG_UUID); |
297 | if ((iip->ili_fields & XFS_ILOG_DDATA) && | 235 | if ((iip->ili_fields & XFS_ILOG_DDATA) && |
298 | ip->i_df.if_bytes > 0) { | 236 | ip->i_df.if_bytes > 0) { |
299 | ASSERT(ip->i_df.if_u1.if_data != NULL); | ||
300 | ASSERT(ip->i_d.di_size > 0); | ||
301 | |||
302 | vecp->i_addr = ip->i_df.if_u1.if_data; | ||
303 | /* | 237 | /* |
304 | * Round i_bytes up to a word boundary. | 238 | * Round i_bytes up to a word boundary. |
305 | * The underlying memory is guaranteed to | 239 | * The underlying memory is guaranteed to |
306 | * to be there by xfs_idata_realloc(). | 240 | * to be there by xfs_idata_realloc(). |
307 | */ | 241 | */ |
308 | data_bytes = roundup(ip->i_df.if_bytes, 4); | 242 | data_bytes = roundup(ip->i_df.if_bytes, 4); |
309 | ASSERT((ip->i_df.if_real_bytes == 0) || | 243 | ASSERT(ip->i_df.if_real_bytes == 0 || |
310 | (ip->i_df.if_real_bytes == data_bytes)); | 244 | ip->i_df.if_real_bytes == data_bytes); |
311 | vecp->i_len = (int)data_bytes; | 245 | ASSERT(ip->i_df.if_u1.if_data != NULL); |
312 | vecp->i_type = XLOG_REG_TYPE_ILOCAL; | 246 | ASSERT(ip->i_d.di_size > 0); |
313 | vecp++; | 247 | xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL, |
314 | nvecs++; | 248 | ip->i_df.if_u1.if_data, data_bytes); |
315 | iip->ili_format.ilf_dsize = (unsigned)data_bytes; | 249 | ilf->ilf_dsize = (unsigned)data_bytes; |
250 | ilf->ilf_size++; | ||
316 | } else { | 251 | } else { |
317 | iip->ili_fields &= ~XFS_ILOG_DDATA; | 252 | iip->ili_fields &= ~XFS_ILOG_DDATA; |
318 | } | 253 | } |
319 | break; | 254 | break; |
320 | |||
321 | case XFS_DINODE_FMT_DEV: | 255 | case XFS_DINODE_FMT_DEV: |
322 | iip->ili_fields &= | 256 | iip->ili_fields &= |
323 | ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | | 257 | ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | |
324 | XFS_ILOG_DEXT | XFS_ILOG_UUID); | 258 | XFS_ILOG_DEXT | XFS_ILOG_UUID); |
325 | if (iip->ili_fields & XFS_ILOG_DEV) { | 259 | if (iip->ili_fields & XFS_ILOG_DEV) |
326 | iip->ili_format.ilf_u.ilfu_rdev = | 260 | ilf->ilf_u.ilfu_rdev = ip->i_df.if_u2.if_rdev; |
327 | ip->i_df.if_u2.if_rdev; | ||
328 | } | ||
329 | break; | 261 | break; |
330 | |||
331 | case XFS_DINODE_FMT_UUID: | 262 | case XFS_DINODE_FMT_UUID: |
332 | iip->ili_fields &= | 263 | iip->ili_fields &= |
333 | ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | | 264 | ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | |
334 | XFS_ILOG_DEXT | XFS_ILOG_DEV); | 265 | XFS_ILOG_DEXT | XFS_ILOG_DEV); |
335 | if (iip->ili_fields & XFS_ILOG_UUID) { | 266 | if (iip->ili_fields & XFS_ILOG_UUID) |
336 | iip->ili_format.ilf_u.ilfu_uuid = | 267 | ilf->ilf_u.ilfu_uuid = ip->i_df.if_u2.if_uuid; |
337 | ip->i_df.if_u2.if_uuid; | ||
338 | } | ||
339 | break; | 268 | break; |
340 | |||
341 | default: | 269 | default: |
342 | ASSERT(0); | 270 | ASSERT(0); |
343 | break; | 271 | break; |
344 | } | 272 | } |
273 | } | ||
345 | 274 | ||
346 | /* | 275 | STATIC void |
347 | * If there are no attributes associated with the file, then we're done. | 276 | xfs_inode_item_format_attr_fork( |
348 | */ | 277 | struct xfs_inode_log_item *iip, |
349 | if (!XFS_IFORK_Q(ip)) { | 278 | struct xfs_inode_log_format *ilf, |
350 | iip->ili_fields &= | 279 | struct xfs_log_vec *lv, |
351 | ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); | 280 | struct xfs_log_iovec **vecp) |
352 | goto out; | 281 | { |
353 | } | 282 | struct xfs_inode *ip = iip->ili_inode; |
283 | size_t data_bytes; | ||
354 | 284 | ||
355 | switch (ip->i_d.di_aformat) { | 285 | switch (ip->i_d.di_aformat) { |
356 | case XFS_DINODE_FMT_EXTENTS: | 286 | case XFS_DINODE_FMT_EXTENTS: |
@@ -360,30 +290,22 @@ xfs_inode_item_format( | |||
360 | if ((iip->ili_fields & XFS_ILOG_AEXT) && | 290 | if ((iip->ili_fields & XFS_ILOG_AEXT) && |
361 | ip->i_d.di_anextents > 0 && | 291 | ip->i_d.di_anextents > 0 && |
362 | ip->i_afp->if_bytes > 0) { | 292 | ip->i_afp->if_bytes > 0) { |
293 | struct xfs_bmbt_rec *p; | ||
294 | |||
363 | ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) == | 295 | ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) == |
364 | ip->i_d.di_anextents); | 296 | ip->i_d.di_anextents); |
365 | ASSERT(ip->i_afp->if_u1.if_extents != NULL); | 297 | ASSERT(ip->i_afp->if_u1.if_extents != NULL); |
366 | #ifdef XFS_NATIVE_HOST | 298 | |
367 | /* | 299 | p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT); |
368 | * There are not delayed allocation extents | 300 | data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK); |
369 | * for attributes, so just point at the array. | 301 | xlog_finish_iovec(lv, *vecp, data_bytes); |
370 | */ | 302 | |
371 | vecp->i_addr = ip->i_afp->if_u1.if_extents; | 303 | ilf->ilf_asize = data_bytes; |
372 | vecp->i_len = ip->i_afp->if_bytes; | 304 | ilf->ilf_size++; |
373 | vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; | ||
374 | #else | ||
375 | ASSERT(iip->ili_aextents_buf == NULL); | ||
376 | xfs_inode_item_format_extents(ip, vecp, | ||
377 | XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT); | ||
378 | #endif | ||
379 | iip->ili_format.ilf_asize = vecp->i_len; | ||
380 | vecp++; | ||
381 | nvecs++; | ||
382 | } else { | 305 | } else { |
383 | iip->ili_fields &= ~XFS_ILOG_AEXT; | 306 | iip->ili_fields &= ~XFS_ILOG_AEXT; |
384 | } | 307 | } |
385 | break; | 308 | break; |
386 | |||
387 | case XFS_DINODE_FMT_BTREE: | 309 | case XFS_DINODE_FMT_BTREE: |
388 | iip->ili_fields &= | 310 | iip->ili_fields &= |
389 | ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT); | 311 | ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT); |
@@ -392,61 +314,89 @@ xfs_inode_item_format( | |||
392 | ip->i_afp->if_broot_bytes > 0) { | 314 | ip->i_afp->if_broot_bytes > 0) { |
393 | ASSERT(ip->i_afp->if_broot != NULL); | 315 | ASSERT(ip->i_afp->if_broot != NULL); |
394 | 316 | ||
395 | vecp->i_addr = ip->i_afp->if_broot; | 317 | xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_BROOT, |
396 | vecp->i_len = ip->i_afp->if_broot_bytes; | 318 | ip->i_afp->if_broot, |
397 | vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT; | 319 | ip->i_afp->if_broot_bytes); |
398 | vecp++; | 320 | ilf->ilf_asize = ip->i_afp->if_broot_bytes; |
399 | nvecs++; | 321 | ilf->ilf_size++; |
400 | iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; | ||
401 | } else { | 322 | } else { |
402 | iip->ili_fields &= ~XFS_ILOG_ABROOT; | 323 | iip->ili_fields &= ~XFS_ILOG_ABROOT; |
403 | } | 324 | } |
404 | break; | 325 | break; |
405 | |||
406 | case XFS_DINODE_FMT_LOCAL: | 326 | case XFS_DINODE_FMT_LOCAL: |
407 | iip->ili_fields &= | 327 | iip->ili_fields &= |
408 | ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT); | 328 | ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT); |
409 | 329 | ||
410 | if ((iip->ili_fields & XFS_ILOG_ADATA) && | 330 | if ((iip->ili_fields & XFS_ILOG_ADATA) && |
411 | ip->i_afp->if_bytes > 0) { | 331 | ip->i_afp->if_bytes > 0) { |
412 | ASSERT(ip->i_afp->if_u1.if_data != NULL); | ||
413 | |||
414 | vecp->i_addr = ip->i_afp->if_u1.if_data; | ||
415 | /* | 332 | /* |
416 | * Round i_bytes up to a word boundary. | 333 | * Round i_bytes up to a word boundary. |
417 | * The underlying memory is guaranteed to | 334 | * The underlying memory is guaranteed to |
418 | * to be there by xfs_idata_realloc(). | 335 | * to be there by xfs_idata_realloc(). |
419 | */ | 336 | */ |
420 | data_bytes = roundup(ip->i_afp->if_bytes, 4); | 337 | data_bytes = roundup(ip->i_afp->if_bytes, 4); |
421 | ASSERT((ip->i_afp->if_real_bytes == 0) || | 338 | ASSERT(ip->i_afp->if_real_bytes == 0 || |
422 | (ip->i_afp->if_real_bytes == data_bytes)); | 339 | ip->i_afp->if_real_bytes == data_bytes); |
423 | vecp->i_len = (int)data_bytes; | 340 | ASSERT(ip->i_afp->if_u1.if_data != NULL); |
424 | vecp->i_type = XLOG_REG_TYPE_IATTR_LOCAL; | 341 | xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL, |
425 | vecp++; | 342 | ip->i_afp->if_u1.if_data, |
426 | nvecs++; | 343 | data_bytes); |
427 | iip->ili_format.ilf_asize = (unsigned)data_bytes; | 344 | ilf->ilf_asize = (unsigned)data_bytes; |
345 | ilf->ilf_size++; | ||
428 | } else { | 346 | } else { |
429 | iip->ili_fields &= ~XFS_ILOG_ADATA; | 347 | iip->ili_fields &= ~XFS_ILOG_ADATA; |
430 | } | 348 | } |
431 | break; | 349 | break; |
432 | |||
433 | default: | 350 | default: |
434 | ASSERT(0); | 351 | ASSERT(0); |
435 | break; | 352 | break; |
436 | } | 353 | } |
437 | |||
438 | out: | ||
439 | /* | ||
440 | * Now update the log format that goes out to disk from the in-core | ||
441 | * values. We always write the inode core to make the arithmetic | ||
442 | * games in recovery easier, which isn't a big deal as just about any | ||
443 | * transaction would dirty it anyway. | ||
444 | */ | ||
445 | iip->ili_format.ilf_fields = XFS_ILOG_CORE | | ||
446 | (iip->ili_fields & ~XFS_ILOG_TIMESTAMP); | ||
447 | iip->ili_format.ilf_size = nvecs; | ||
448 | } | 354 | } |
449 | 355 | ||
356 | /* | ||
357 | * This is called to fill in the vector of log iovecs for the given inode | ||
358 | * log item. It fills the first item with an inode log format structure, | ||
359 | * the second with the on-disk inode structure, and a possible third and/or | ||
360 | * fourth with the inode data/extents/b-tree root and inode attributes | ||
361 | * data/extents/b-tree root. | ||
362 | */ | ||
363 | STATIC void | ||
364 | xfs_inode_item_format( | ||
365 | struct xfs_log_item *lip, | ||
366 | struct xfs_log_vec *lv) | ||
367 | { | ||
368 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | ||
369 | struct xfs_inode *ip = iip->ili_inode; | ||
370 | struct xfs_inode_log_format *ilf; | ||
371 | struct xfs_log_iovec *vecp = NULL; | ||
372 | |||
373 | ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT); | ||
374 | ilf->ilf_type = XFS_LI_INODE; | ||
375 | ilf->ilf_ino = ip->i_ino; | ||
376 | ilf->ilf_blkno = ip->i_imap.im_blkno; | ||
377 | ilf->ilf_len = ip->i_imap.im_len; | ||
378 | ilf->ilf_boffset = ip->i_imap.im_boffset; | ||
379 | ilf->ilf_fields = XFS_ILOG_CORE; | ||
380 | ilf->ilf_size = 2; /* format + core */ | ||
381 | xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format)); | ||
382 | |||
383 | if (ip->i_d.di_version == 1) | ||
384 | xfs_inode_item_format_v1_inode(ip); | ||
385 | xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICORE, | ||
386 | &ip->i_d, | ||
387 | xfs_icdinode_size(ip->i_d.di_version)); | ||
388 | |||
389 | xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp); | ||
390 | if (XFS_IFORK_Q(ip)) { | ||
391 | xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp); | ||
392 | } else { | ||
393 | iip->ili_fields &= | ||
394 | ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); | ||
395 | } | ||
396 | |||
397 | /* update the format with the exact fields we actually logged */ | ||
398 | ilf->ilf_fields |= (iip->ili_fields & ~XFS_ILOG_TIMESTAMP); | ||
399 | } | ||
450 | 400 | ||
451 | /* | 401 | /* |
452 | * This is called to pin the inode associated with the inode log | 402 | * This is called to pin the inode associated with the inode log |
@@ -563,27 +513,6 @@ xfs_inode_item_unlock( | |||
563 | ASSERT(ip->i_itemp != NULL); | 513 | ASSERT(ip->i_itemp != NULL); |
564 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 514 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
565 | 515 | ||
566 | /* | ||
567 | * If the inode needed a separate buffer with which to log | ||
568 | * its extents, then free it now. | ||
569 | */ | ||
570 | if (iip->ili_extents_buf != NULL) { | ||
571 | ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS); | ||
572 | ASSERT(ip->i_d.di_nextents > 0); | ||
573 | ASSERT(iip->ili_fields & XFS_ILOG_DEXT); | ||
574 | ASSERT(ip->i_df.if_bytes > 0); | ||
575 | kmem_free(iip->ili_extents_buf); | ||
576 | iip->ili_extents_buf = NULL; | ||
577 | } | ||
578 | if (iip->ili_aextents_buf != NULL) { | ||
579 | ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS); | ||
580 | ASSERT(ip->i_d.di_anextents > 0); | ||
581 | ASSERT(iip->ili_fields & XFS_ILOG_AEXT); | ||
582 | ASSERT(ip->i_afp->if_bytes > 0); | ||
583 | kmem_free(iip->ili_aextents_buf); | ||
584 | iip->ili_aextents_buf = NULL; | ||
585 | } | ||
586 | |||
587 | lock_flags = iip->ili_lock_flags; | 516 | lock_flags = iip->ili_lock_flags; |
588 | iip->ili_lock_flags = 0; | 517 | iip->ili_lock_flags = 0; |
589 | if (lock_flags) | 518 | if (lock_flags) |
@@ -670,11 +599,6 @@ xfs_inode_item_init( | |||
670 | iip->ili_inode = ip; | 599 | iip->ili_inode = ip; |
671 | xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE, | 600 | xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE, |
672 | &xfs_inode_item_ops); | 601 | &xfs_inode_item_ops); |
673 | iip->ili_format.ilf_type = XFS_LI_INODE; | ||
674 | iip->ili_format.ilf_ino = ip->i_ino; | ||
675 | iip->ili_format.ilf_blkno = ip->i_imap.im_blkno; | ||
676 | iip->ili_format.ilf_len = ip->i_imap.im_len; | ||
677 | iip->ili_format.ilf_boffset = ip->i_imap.im_boffset; | ||
678 | } | 602 | } |
679 | 603 | ||
680 | /* | 604 | /* |
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index dce4d656768c..488d81254e28 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h | |||
@@ -34,11 +34,6 @@ typedef struct xfs_inode_log_item { | |||
34 | unsigned short ili_logged; /* flushed logged data */ | 34 | unsigned short ili_logged; /* flushed logged data */ |
35 | unsigned int ili_last_fields; /* fields when flushed */ | 35 | unsigned int ili_last_fields; /* fields when flushed */ |
36 | unsigned int ili_fields; /* fields to be logged */ | 36 | unsigned int ili_fields; /* fields to be logged */ |
37 | struct xfs_bmbt_rec *ili_extents_buf; /* array of logged | ||
38 | data exts */ | ||
39 | struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged | ||
40 | attr exts */ | ||
41 | xfs_inode_log_format_t ili_format; /* logged structure */ | ||
42 | } xfs_inode_log_item_t; | 37 | } xfs_inode_log_item_t; |
43 | 38 | ||
44 | static inline int xfs_inode_clean(xfs_inode_t *ip) | 39 | static inline int xfs_inode_clean(xfs_inode_t *ip) |
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 33ad9a77791f..518aa56b8f2e 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c | |||
@@ -112,15 +112,11 @@ xfs_find_handle( | |||
112 | memset(&handle.ha_fid, 0, sizeof(handle.ha_fid)); | 112 | memset(&handle.ha_fid, 0, sizeof(handle.ha_fid)); |
113 | hsize = sizeof(xfs_fsid_t); | 113 | hsize = sizeof(xfs_fsid_t); |
114 | } else { | 114 | } else { |
115 | int lock_mode; | ||
116 | |||
117 | lock_mode = xfs_ilock_map_shared(ip); | ||
118 | handle.ha_fid.fid_len = sizeof(xfs_fid_t) - | 115 | handle.ha_fid.fid_len = sizeof(xfs_fid_t) - |
119 | sizeof(handle.ha_fid.fid_len); | 116 | sizeof(handle.ha_fid.fid_len); |
120 | handle.ha_fid.fid_pad = 0; | 117 | handle.ha_fid.fid_pad = 0; |
121 | handle.ha_fid.fid_gen = ip->i_d.di_gen; | 118 | handle.ha_fid.fid_gen = ip->i_d.di_gen; |
122 | handle.ha_fid.fid_ino = ip->i_ino; | 119 | handle.ha_fid.fid_ino = ip->i_ino; |
123 | xfs_iunlock_map_shared(ip, lock_mode); | ||
124 | 120 | ||
125 | hsize = XFS_HSIZE(handle); | 121 | hsize = XFS_HSIZE(handle); |
126 | } | 122 | } |
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 27e0e544e963..0ce1d759156e 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c | |||
@@ -459,14 +459,12 @@ xfs_vn_getattr( | |||
459 | 459 | ||
460 | static void | 460 | static void |
461 | xfs_setattr_mode( | 461 | xfs_setattr_mode( |
462 | struct xfs_trans *tp, | ||
463 | struct xfs_inode *ip, | 462 | struct xfs_inode *ip, |
464 | struct iattr *iattr) | 463 | struct iattr *iattr) |
465 | { | 464 | { |
466 | struct inode *inode = VFS_I(ip); | 465 | struct inode *inode = VFS_I(ip); |
467 | umode_t mode = iattr->ia_mode; | 466 | umode_t mode = iattr->ia_mode; |
468 | 467 | ||
469 | ASSERT(tp); | ||
470 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 468 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
471 | 469 | ||
472 | ip->i_d.di_mode &= S_IFMT; | 470 | ip->i_d.di_mode &= S_IFMT; |
@@ -476,6 +474,32 @@ xfs_setattr_mode( | |||
476 | inode->i_mode |= mode & ~S_IFMT; | 474 | inode->i_mode |= mode & ~S_IFMT; |
477 | } | 475 | } |
478 | 476 | ||
477 | static void | ||
478 | xfs_setattr_time( | ||
479 | struct xfs_inode *ip, | ||
480 | struct iattr *iattr) | ||
481 | { | ||
482 | struct inode *inode = VFS_I(ip); | ||
483 | |||
484 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
485 | |||
486 | if (iattr->ia_valid & ATTR_ATIME) { | ||
487 | inode->i_atime = iattr->ia_atime; | ||
488 | ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; | ||
489 | ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; | ||
490 | } | ||
491 | if (iattr->ia_valid & ATTR_CTIME) { | ||
492 | inode->i_ctime = iattr->ia_ctime; | ||
493 | ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; | ||
494 | ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; | ||
495 | } | ||
496 | if (iattr->ia_valid & ATTR_MTIME) { | ||
497 | inode->i_mtime = iattr->ia_mtime; | ||
498 | ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; | ||
499 | ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; | ||
500 | } | ||
501 | } | ||
502 | |||
479 | int | 503 | int |
480 | xfs_setattr_nonsize( | 504 | xfs_setattr_nonsize( |
481 | struct xfs_inode *ip, | 505 | struct xfs_inode *ip, |
@@ -618,7 +642,8 @@ xfs_setattr_nonsize( | |||
618 | } | 642 | } |
619 | if (!gid_eq(igid, gid)) { | 643 | if (!gid_eq(igid, gid)) { |
620 | if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { | 644 | if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { |
621 | ASSERT(!XFS_IS_PQUOTA_ON(mp)); | 645 | ASSERT(xfs_sb_version_has_pquotino(&mp->m_sb) || |
646 | !XFS_IS_PQUOTA_ON(mp)); | ||
622 | ASSERT(mask & ATTR_GID); | 647 | ASSERT(mask & ATTR_GID); |
623 | ASSERT(gdqp); | 648 | ASSERT(gdqp); |
624 | olddquot2 = xfs_qm_vop_chown(tp, ip, | 649 | olddquot2 = xfs_qm_vop_chown(tp, ip, |
@@ -629,30 +654,10 @@ xfs_setattr_nonsize( | |||
629 | } | 654 | } |
630 | } | 655 | } |
631 | 656 | ||
632 | /* | ||
633 | * Change file access modes. | ||
634 | */ | ||
635 | if (mask & ATTR_MODE) | 657 | if (mask & ATTR_MODE) |
636 | xfs_setattr_mode(tp, ip, iattr); | 658 | xfs_setattr_mode(ip, iattr); |
637 | 659 | if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) | |
638 | /* | 660 | xfs_setattr_time(ip, iattr); |
639 | * Change file access or modified times. | ||
640 | */ | ||
641 | if (mask & ATTR_ATIME) { | ||
642 | inode->i_atime = iattr->ia_atime; | ||
643 | ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; | ||
644 | ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; | ||
645 | } | ||
646 | if (mask & ATTR_CTIME) { | ||
647 | inode->i_ctime = iattr->ia_ctime; | ||
648 | ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; | ||
649 | ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; | ||
650 | } | ||
651 | if (mask & ATTR_MTIME) { | ||
652 | inode->i_mtime = iattr->ia_mtime; | ||
653 | ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; | ||
654 | ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; | ||
655 | } | ||
656 | 661 | ||
657 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 662 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
658 | 663 | ||
@@ -867,22 +872,10 @@ xfs_setattr_size( | |||
867 | xfs_inode_clear_eofblocks_tag(ip); | 872 | xfs_inode_clear_eofblocks_tag(ip); |
868 | } | 873 | } |
869 | 874 | ||
870 | /* | ||
871 | * Change file access modes. | ||
872 | */ | ||
873 | if (mask & ATTR_MODE) | 875 | if (mask & ATTR_MODE) |
874 | xfs_setattr_mode(tp, ip, iattr); | 876 | xfs_setattr_mode(ip, iattr); |
875 | 877 | if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) | |
876 | if (mask & ATTR_CTIME) { | 878 | xfs_setattr_time(ip, iattr); |
877 | inode->i_ctime = iattr->ia_ctime; | ||
878 | ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; | ||
879 | ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; | ||
880 | } | ||
881 | if (mask & ATTR_MTIME) { | ||
882 | inode->i_mtime = iattr->ia_mtime; | ||
883 | ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; | ||
884 | ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; | ||
885 | } | ||
886 | 879 | ||
887 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 880 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
888 | 881 | ||
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index c237ad15d500..f46338285152 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
@@ -209,9 +209,8 @@ xfs_bulkstat( | |||
209 | xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ | 209 | xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ |
210 | xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */ | 210 | xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */ |
211 | xfs_ino_t lastino; /* last inode number returned */ | 211 | xfs_ino_t lastino; /* last inode number returned */ |
212 | int nbcluster; /* # of blocks in a cluster */ | 212 | int blks_per_cluster; /* # of blocks per cluster */ |
213 | int nicluster; /* # of inodes in a cluster */ | 213 | int inodes_per_cluster;/* # of inodes per cluster */ |
214 | int nimask; /* mask for inode clusters */ | ||
215 | int nirbuf; /* size of irbuf */ | 214 | int nirbuf; /* size of irbuf */ |
216 | int rval; /* return value error code */ | 215 | int rval; /* return value error code */ |
217 | int tmp; /* result value from btree calls */ | 216 | int tmp; /* result value from btree calls */ |
@@ -243,11 +242,8 @@ xfs_bulkstat( | |||
243 | *done = 0; | 242 | *done = 0; |
244 | fmterror = 0; | 243 | fmterror = 0; |
245 | ubufp = ubuffer; | 244 | ubufp = ubuffer; |
246 | nicluster = mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp) ? | 245 | blks_per_cluster = xfs_icluster_size_fsb(mp); |
247 | mp->m_sb.sb_inopblock : | 246 | inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog; |
248 | (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog); | ||
249 | nimask = ~(nicluster - 1); | ||
250 | nbcluster = nicluster >> mp->m_sb.sb_inopblog; | ||
251 | irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); | 247 | irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); |
252 | if (!irbuf) | 248 | if (!irbuf) |
253 | return ENOMEM; | 249 | return ENOMEM; |
@@ -390,12 +386,12 @@ xfs_bulkstat( | |||
390 | agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino); | 386 | agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino); |
391 | for (chunkidx = 0; | 387 | for (chunkidx = 0; |
392 | chunkidx < XFS_INODES_PER_CHUNK; | 388 | chunkidx < XFS_INODES_PER_CHUNK; |
393 | chunkidx += nicluster, | 389 | chunkidx += inodes_per_cluster, |
394 | agbno += nbcluster) { | 390 | agbno += blks_per_cluster) { |
395 | if (xfs_inobt_maskn(chunkidx, nicluster) | 391 | if (xfs_inobt_maskn(chunkidx, |
396 | & ~r.ir_free) | 392 | inodes_per_cluster) & ~r.ir_free) |
397 | xfs_btree_reada_bufs(mp, agno, | 393 | xfs_btree_reada_bufs(mp, agno, |
398 | agbno, nbcluster, | 394 | agbno, blks_per_cluster, |
399 | &xfs_inode_buf_ops); | 395 | &xfs_inode_buf_ops); |
400 | } | 396 | } |
401 | blk_finish_plug(&plug); | 397 | blk_finish_plug(&plug); |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index e148719e0a5d..b0f4ef77fa70 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
@@ -30,6 +30,52 @@ struct xfs_log_vec { | |||
30 | 30 | ||
31 | #define XFS_LOG_VEC_ORDERED (-1) | 31 | #define XFS_LOG_VEC_ORDERED (-1) |
32 | 32 | ||
33 | static inline void * | ||
34 | xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, | ||
35 | uint type) | ||
36 | { | ||
37 | struct xfs_log_iovec *vec = *vecp; | ||
38 | |||
39 | if (vec) { | ||
40 | ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs); | ||
41 | vec++; | ||
42 | } else { | ||
43 | vec = &lv->lv_iovecp[0]; | ||
44 | } | ||
45 | |||
46 | vec->i_type = type; | ||
47 | vec->i_addr = lv->lv_buf + lv->lv_buf_len; | ||
48 | |||
49 | ASSERT(IS_ALIGNED((unsigned long)vec->i_addr, sizeof(uint64_t))); | ||
50 | |||
51 | *vecp = vec; | ||
52 | return vec->i_addr; | ||
53 | } | ||
54 | |||
55 | static inline void | ||
56 | xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len) | ||
57 | { | ||
58 | /* | ||
59 | * We need to make sure the next buffer is naturally aligned for the | ||
60 | * biggest basic data type we put into it. We already accounted for | ||
61 | * this when sizing the buffer. | ||
62 | */ | ||
63 | lv->lv_buf_len += round_up(len, sizeof(uint64_t)); | ||
64 | vec->i_len = len; | ||
65 | } | ||
66 | |||
67 | static inline void * | ||
68 | xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, | ||
69 | uint type, void *data, int len) | ||
70 | { | ||
71 | void *buf; | ||
72 | |||
73 | buf = xlog_prepare_iovec(lv, vecp, type); | ||
74 | memcpy(buf, data, len); | ||
75 | xlog_finish_iovec(lv, *vecp, len); | ||
76 | return buf; | ||
77 | } | ||
78 | |||
33 | /* | 79 | /* |
34 | * Structure used to pass callback function and the function's argument | 80 | * Structure used to pass callback function and the function's argument |
35 | * to the log manager. | 81 | * to the log manager. |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 5eb51fc5eb84..cdebd832c3db 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -82,36 +82,6 @@ xlog_cil_init_post_recovery( | |||
82 | log->l_curr_block); | 82 | log->l_curr_block); |
83 | } | 83 | } |
84 | 84 | ||
85 | STATIC int | ||
86 | xlog_cil_lv_item_format( | ||
87 | struct xfs_log_item *lip, | ||
88 | struct xfs_log_vec *lv) | ||
89 | { | ||
90 | int index; | ||
91 | char *ptr; | ||
92 | |||
93 | /* format new vectors into array */ | ||
94 | lip->li_ops->iop_format(lip, lv->lv_iovecp); | ||
95 | |||
96 | /* copy data into existing array */ | ||
97 | ptr = lv->lv_buf; | ||
98 | for (index = 0; index < lv->lv_niovecs; index++) { | ||
99 | struct xfs_log_iovec *vec = &lv->lv_iovecp[index]; | ||
100 | |||
101 | memcpy(ptr, vec->i_addr, vec->i_len); | ||
102 | vec->i_addr = ptr; | ||
103 | ptr += vec->i_len; | ||
104 | } | ||
105 | |||
106 | /* | ||
107 | * some size calculations for log vectors over-estimate, so the caller | ||
108 | * doesn't know the amount of space actually used by the item. Return | ||
109 | * the byte count to the caller so they can check and store it | ||
110 | * appropriately. | ||
111 | */ | ||
112 | return ptr - lv->lv_buf; | ||
113 | } | ||
114 | |||
115 | /* | 85 | /* |
116 | * Prepare the log item for insertion into the CIL. Calculate the difference in | 86 | * Prepare the log item for insertion into the CIL. Calculate the difference in |
117 | * log space and vectors it will consume, and if it is a new item pin it as | 87 | * log space and vectors it will consume, and if it is a new item pin it as |
@@ -232,6 +202,13 @@ xlog_cil_insert_format_items( | |||
232 | nbytes = 0; | 202 | nbytes = 0; |
233 | } | 203 | } |
234 | 204 | ||
205 | /* | ||
206 | * We 64-bit align the length of each iovec so that the start | ||
207 | * of the next one is naturally aligned. We'll need to | ||
208 | * account for that slack space here. | ||
209 | */ | ||
210 | nbytes += niovecs * sizeof(uint64_t); | ||
211 | |||
235 | /* grab the old item if it exists for reservation accounting */ | 212 | /* grab the old item if it exists for reservation accounting */ |
236 | old_lv = lip->li_lv; | 213 | old_lv = lip->li_lv; |
237 | 214 | ||
@@ -254,34 +231,27 @@ xlog_cil_insert_format_items( | |||
254 | */ | 231 | */ |
255 | *diff_iovecs -= lv->lv_niovecs; | 232 | *diff_iovecs -= lv->lv_niovecs; |
256 | *diff_len -= lv->lv_buf_len; | 233 | *diff_len -= lv->lv_buf_len; |
257 | 234 | } else { | |
258 | /* Ensure the lv is set up according to ->iop_size */ | 235 | /* allocate new data chunk */ |
259 | lv->lv_niovecs = niovecs; | 236 | lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS); |
260 | lv->lv_buf = (char *)lv + buf_size - nbytes; | 237 | lv->lv_item = lip; |
261 | 238 | lv->lv_size = buf_size; | |
262 | lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv); | 239 | if (ordered) { |
263 | goto insert; | 240 | /* track as an ordered logvec */ |
241 | ASSERT(lip->li_lv == NULL); | ||
242 | lv->lv_buf_len = XFS_LOG_VEC_ORDERED; | ||
243 | goto insert; | ||
244 | } | ||
245 | lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1]; | ||
264 | } | 246 | } |
265 | 247 | ||
266 | /* allocate new data chunk */ | 248 | /* Ensure the lv is set up according to ->iop_size */ |
267 | lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS); | ||
268 | lv->lv_item = lip; | ||
269 | lv->lv_size = buf_size; | ||
270 | lv->lv_niovecs = niovecs; | 249 | lv->lv_niovecs = niovecs; |
271 | if (ordered) { | ||
272 | /* track as an ordered logvec */ | ||
273 | ASSERT(lip->li_lv == NULL); | ||
274 | lv->lv_buf_len = XFS_LOG_VEC_ORDERED; | ||
275 | goto insert; | ||
276 | } | ||
277 | |||
278 | /* The allocated iovec region lies beyond the log vector. */ | ||
279 | lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1]; | ||
280 | 250 | ||
281 | /* The allocated data region lies beyond the iovec region */ | 251 | /* The allocated data region lies beyond the iovec region */ |
252 | lv->lv_buf_len = 0; | ||
282 | lv->lv_buf = (char *)lv + buf_size - nbytes; | 253 | lv->lv_buf = (char *)lv + buf_size - nbytes; |
283 | 254 | lip->li_ops->iop_format(lip, lv); | |
284 | lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv); | ||
285 | insert: | 255 | insert: |
286 | ASSERT(lv->lv_buf_len <= nbytes); | 256 | ASSERT(lv->lv_buf_len <= nbytes); |
287 | xfs_cil_prepare_item(log, lv, old_lv, diff_len, diff_iovecs); | 257 | xfs_cil_prepare_item(log, lv, old_lv, diff_len, diff_iovecs); |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index b6b669df40f3..bce53ac81096 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -193,7 +193,10 @@ xlog_bread_noalign( | |||
193 | bp->b_io_length = nbblks; | 193 | bp->b_io_length = nbblks; |
194 | bp->b_error = 0; | 194 | bp->b_error = 0; |
195 | 195 | ||
196 | xfsbdstrat(log->l_mp, bp); | 196 | if (XFS_FORCED_SHUTDOWN(log->l_mp)) |
197 | return XFS_ERROR(EIO); | ||
198 | |||
199 | xfs_buf_iorequest(bp); | ||
197 | error = xfs_buf_iowait(bp); | 200 | error = xfs_buf_iowait(bp); |
198 | if (error) | 201 | if (error) |
199 | xfs_buf_ioerror_alert(bp, __func__); | 202 | xfs_buf_ioerror_alert(bp, __func__); |
@@ -1651,6 +1654,7 @@ xlog_recover_reorder_trans( | |||
1651 | int pass) | 1654 | int pass) |
1652 | { | 1655 | { |
1653 | xlog_recover_item_t *item, *n; | 1656 | xlog_recover_item_t *item, *n; |
1657 | int error = 0; | ||
1654 | LIST_HEAD(sort_list); | 1658 | LIST_HEAD(sort_list); |
1655 | LIST_HEAD(cancel_list); | 1659 | LIST_HEAD(cancel_list); |
1656 | LIST_HEAD(buffer_list); | 1660 | LIST_HEAD(buffer_list); |
@@ -1692,9 +1696,17 @@ xlog_recover_reorder_trans( | |||
1692 | "%s: unrecognized type of log operation", | 1696 | "%s: unrecognized type of log operation", |
1693 | __func__); | 1697 | __func__); |
1694 | ASSERT(0); | 1698 | ASSERT(0); |
1695 | return XFS_ERROR(EIO); | 1699 | /* |
1700 | * return the remaining items back to the transaction | ||
1701 | * item list so they can be freed in caller. | ||
1702 | */ | ||
1703 | if (!list_empty(&sort_list)) | ||
1704 | list_splice_init(&sort_list, &trans->r_itemq); | ||
1705 | error = XFS_ERROR(EIO); | ||
1706 | goto out; | ||
1696 | } | 1707 | } |
1697 | } | 1708 | } |
1709 | out: | ||
1698 | ASSERT(list_empty(&sort_list)); | 1710 | ASSERT(list_empty(&sort_list)); |
1699 | if (!list_empty(&buffer_list)) | 1711 | if (!list_empty(&buffer_list)) |
1700 | list_splice(&buffer_list, &trans->r_itemq); | 1712 | list_splice(&buffer_list, &trans->r_itemq); |
@@ -1704,7 +1716,7 @@ xlog_recover_reorder_trans( | |||
1704 | list_splice_tail(&inode_buffer_list, &trans->r_itemq); | 1716 | list_splice_tail(&inode_buffer_list, &trans->r_itemq); |
1705 | if (!list_empty(&cancel_list)) | 1717 | if (!list_empty(&cancel_list)) |
1706 | list_splice_tail(&cancel_list, &trans->r_itemq); | 1718 | list_splice_tail(&cancel_list, &trans->r_itemq); |
1707 | return 0; | 1719 | return error; |
1708 | } | 1720 | } |
1709 | 1721 | ||
1710 | /* | 1722 | /* |
@@ -2514,19 +2526,19 @@ xlog_recover_buffer_pass2( | |||
2514 | * | 2526 | * |
2515 | * Also make sure that only inode buffers with good sizes stay in | 2527 | * Also make sure that only inode buffers with good sizes stay in |
2516 | * the buffer cache. The kernel moves inodes in buffers of 1 block | 2528 | * the buffer cache. The kernel moves inodes in buffers of 1 block |
2517 | * or XFS_INODE_CLUSTER_SIZE bytes, whichever is bigger. The inode | 2529 | * or mp->m_inode_cluster_size bytes, whichever is bigger. The inode |
2518 | * buffers in the log can be a different size if the log was generated | 2530 | * buffers in the log can be a different size if the log was generated |
2519 | * by an older kernel using unclustered inode buffers or a newer kernel | 2531 | * by an older kernel using unclustered inode buffers or a newer kernel |
2520 | * running with a different inode cluster size. Regardless, if the | 2532 | * running with a different inode cluster size. Regardless, if the |
2521 | * the inode buffer size isn't MAX(blocksize, XFS_INODE_CLUSTER_SIZE) | 2533 | * the inode buffer size isn't MAX(blocksize, mp->m_inode_cluster_size) |
2522 | * for *our* value of XFS_INODE_CLUSTER_SIZE, then we need to keep | 2534 | * for *our* value of mp->m_inode_cluster_size, then we need to keep |
2523 | * the buffer out of the buffer cache so that the buffer won't | 2535 | * the buffer out of the buffer cache so that the buffer won't |
2524 | * overlap with future reads of those inodes. | 2536 | * overlap with future reads of those inodes. |
2525 | */ | 2537 | */ |
2526 | if (XFS_DINODE_MAGIC == | 2538 | if (XFS_DINODE_MAGIC == |
2527 | be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && | 2539 | be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && |
2528 | (BBTOB(bp->b_io_length) != MAX(log->l_mp->m_sb.sb_blocksize, | 2540 | (BBTOB(bp->b_io_length) != MAX(log->l_mp->m_sb.sb_blocksize, |
2529 | (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) { | 2541 | (__uint32_t)log->l_mp->m_inode_cluster_size))) { |
2530 | xfs_buf_stale(bp); | 2542 | xfs_buf_stale(bp); |
2531 | error = xfs_bwrite(bp); | 2543 | error = xfs_bwrite(bp); |
2532 | } else { | 2544 | } else { |
@@ -3199,10 +3211,10 @@ xlog_recover_do_icreate_pass2( | |||
3199 | } | 3211 | } |
3200 | 3212 | ||
3201 | /* existing allocation is fixed value */ | 3213 | /* existing allocation is fixed value */ |
3202 | ASSERT(count == XFS_IALLOC_INODES(mp)); | 3214 | ASSERT(count == mp->m_ialloc_inos); |
3203 | ASSERT(length == XFS_IALLOC_BLOCKS(mp)); | 3215 | ASSERT(length == mp->m_ialloc_blks); |
3204 | if (count != XFS_IALLOC_INODES(mp) || | 3216 | if (count != mp->m_ialloc_inos || |
3205 | length != XFS_IALLOC_BLOCKS(mp)) { | 3217 | length != mp->m_ialloc_blks) { |
3206 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2"); | 3218 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2"); |
3207 | return EINVAL; | 3219 | return EINVAL; |
3208 | } | 3220 | } |
@@ -3608,8 +3620,10 @@ xlog_recover_process_data( | |||
3608 | error = XFS_ERROR(EIO); | 3620 | error = XFS_ERROR(EIO); |
3609 | break; | 3621 | break; |
3610 | } | 3622 | } |
3611 | if (error) | 3623 | if (error) { |
3624 | xlog_recover_free_trans(trans); | ||
3612 | return error; | 3625 | return error; |
3626 | } | ||
3613 | } | 3627 | } |
3614 | dp += be32_to_cpu(ohead->oh_len); | 3628 | dp += be32_to_cpu(ohead->oh_len); |
3615 | num_logops--; | 3629 | num_logops--; |
@@ -4397,7 +4411,13 @@ xlog_do_recover( | |||
4397 | XFS_BUF_READ(bp); | 4411 | XFS_BUF_READ(bp); |
4398 | XFS_BUF_UNASYNC(bp); | 4412 | XFS_BUF_UNASYNC(bp); |
4399 | bp->b_ops = &xfs_sb_buf_ops; | 4413 | bp->b_ops = &xfs_sb_buf_ops; |
4400 | xfsbdstrat(log->l_mp, bp); | 4414 | |
4415 | if (XFS_FORCED_SHUTDOWN(log->l_mp)) { | ||
4416 | xfs_buf_relse(bp); | ||
4417 | return XFS_ERROR(EIO); | ||
4418 | } | ||
4419 | |||
4420 | xfs_buf_iorequest(bp); | ||
4401 | error = xfs_buf_iowait(bp); | 4421 | error = xfs_buf_iowait(bp); |
4402 | if (error) { | 4422 | if (error) { |
4403 | xfs_buf_ioerror_alert(bp, __func__); | 4423 | xfs_buf_ioerror_alert(bp, __func__); |
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 14a4996cfec6..348e4d2ed6e6 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c | |||
@@ -134,8 +134,6 @@ xfs_qm_dqpurge( | |||
134 | { | 134 | { |
135 | struct xfs_mount *mp = dqp->q_mount; | 135 | struct xfs_mount *mp = dqp->q_mount; |
136 | struct xfs_quotainfo *qi = mp->m_quotainfo; | 136 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
137 | struct xfs_dquot *gdqp = NULL; | ||
138 | struct xfs_dquot *pdqp = NULL; | ||
139 | 137 | ||
140 | xfs_dqlock(dqp); | 138 | xfs_dqlock(dqp); |
141 | if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { | 139 | if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { |
@@ -143,21 +141,6 @@ xfs_qm_dqpurge( | |||
143 | return EAGAIN; | 141 | return EAGAIN; |
144 | } | 142 | } |
145 | 143 | ||
146 | /* | ||
147 | * If this quota has a hint attached, prepare for releasing it now. | ||
148 | */ | ||
149 | gdqp = dqp->q_gdquot; | ||
150 | if (gdqp) { | ||
151 | xfs_dqlock(gdqp); | ||
152 | dqp->q_gdquot = NULL; | ||
153 | } | ||
154 | |||
155 | pdqp = dqp->q_pdquot; | ||
156 | if (pdqp) { | ||
157 | xfs_dqlock(pdqp); | ||
158 | dqp->q_pdquot = NULL; | ||
159 | } | ||
160 | |||
161 | dqp->dq_flags |= XFS_DQ_FREEING; | 144 | dqp->dq_flags |= XFS_DQ_FREEING; |
162 | 145 | ||
163 | xfs_dqflock(dqp); | 146 | xfs_dqflock(dqp); |
@@ -206,11 +189,47 @@ xfs_qm_dqpurge( | |||
206 | XFS_STATS_DEC(xs_qm_dquot_unused); | 189 | XFS_STATS_DEC(xs_qm_dquot_unused); |
207 | 190 | ||
208 | xfs_qm_dqdestroy(dqp); | 191 | xfs_qm_dqdestroy(dqp); |
192 | return 0; | ||
193 | } | ||
194 | |||
195 | /* | ||
196 | * Release the group or project dquot pointers the user dquots maybe carrying | ||
197 | * around as a hint, and proceed to purge the user dquot cache if requested. | ||
198 | */ | ||
199 | STATIC int | ||
200 | xfs_qm_dqpurge_hints( | ||
201 | struct xfs_dquot *dqp, | ||
202 | void *data) | ||
203 | { | ||
204 | struct xfs_dquot *gdqp = NULL; | ||
205 | struct xfs_dquot *pdqp = NULL; | ||
206 | uint flags = *((uint *)data); | ||
207 | |||
208 | xfs_dqlock(dqp); | ||
209 | if (dqp->dq_flags & XFS_DQ_FREEING) { | ||
210 | xfs_dqunlock(dqp); | ||
211 | return EAGAIN; | ||
212 | } | ||
209 | 213 | ||
214 | /* If this quota has a hint attached, prepare for releasing it now */ | ||
215 | gdqp = dqp->q_gdquot; | ||
210 | if (gdqp) | 216 | if (gdqp) |
211 | xfs_qm_dqput(gdqp); | 217 | dqp->q_gdquot = NULL; |
218 | |||
219 | pdqp = dqp->q_pdquot; | ||
212 | if (pdqp) | 220 | if (pdqp) |
213 | xfs_qm_dqput(pdqp); | 221 | dqp->q_pdquot = NULL; |
222 | |||
223 | xfs_dqunlock(dqp); | ||
224 | |||
225 | if (gdqp) | ||
226 | xfs_qm_dqrele(gdqp); | ||
227 | if (pdqp) | ||
228 | xfs_qm_dqrele(pdqp); | ||
229 | |||
230 | if (flags & XFS_QMOPT_UQUOTA) | ||
231 | return xfs_qm_dqpurge(dqp, NULL); | ||
232 | |||
214 | return 0; | 233 | return 0; |
215 | } | 234 | } |
216 | 235 | ||
@@ -222,8 +241,18 @@ xfs_qm_dqpurge_all( | |||
222 | struct xfs_mount *mp, | 241 | struct xfs_mount *mp, |
223 | uint flags) | 242 | uint flags) |
224 | { | 243 | { |
225 | if (flags & XFS_QMOPT_UQUOTA) | 244 | /* |
226 | xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL); | 245 | * We have to release group/project dquot hint(s) from the user dquot |
246 | * at first if they are there, otherwise we would run into an infinite | ||
247 | * loop while walking through radix tree to purge other type of dquots | ||
248 | * since their refcount is not zero if the user dquot refers to them | ||
249 | * as hint. | ||
250 | * | ||
251 | * Call the special xfs_qm_dqpurge_hints() will end up go through the | ||
252 | * general xfs_qm_dqpurge() against user dquot cache if requested. | ||
253 | */ | ||
254 | xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge_hints, &flags); | ||
255 | |||
227 | if (flags & XFS_QMOPT_GQUOTA) | 256 | if (flags & XFS_QMOPT_GQUOTA) |
228 | xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL); | 257 | xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL); |
229 | if (flags & XFS_QMOPT_PQUOTA) | 258 | if (flags & XFS_QMOPT_PQUOTA) |
@@ -1193,16 +1222,18 @@ xfs_qm_dqiterate( | |||
1193 | lblkno = 0; | 1222 | lblkno = 0; |
1194 | maxlblkcnt = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); | 1223 | maxlblkcnt = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); |
1195 | do { | 1224 | do { |
1225 | uint lock_mode; | ||
1226 | |||
1196 | nmaps = XFS_DQITER_MAP_SIZE; | 1227 | nmaps = XFS_DQITER_MAP_SIZE; |
1197 | /* | 1228 | /* |
1198 | * We aren't changing the inode itself. Just changing | 1229 | * We aren't changing the inode itself. Just changing |
1199 | * some of its data. No new blocks are added here, and | 1230 | * some of its data. No new blocks are added here, and |
1200 | * the inode is never added to the transaction. | 1231 | * the inode is never added to the transaction. |
1201 | */ | 1232 | */ |
1202 | xfs_ilock(qip, XFS_ILOCK_SHARED); | 1233 | lock_mode = xfs_ilock_data_map_shared(qip); |
1203 | error = xfs_bmapi_read(qip, lblkno, maxlblkcnt - lblkno, | 1234 | error = xfs_bmapi_read(qip, lblkno, maxlblkcnt - lblkno, |
1204 | map, &nmaps, 0); | 1235 | map, &nmaps, 0); |
1205 | xfs_iunlock(qip, XFS_ILOCK_SHARED); | 1236 | xfs_iunlock(qip, lock_mode); |
1206 | if (error) | 1237 | if (error) |
1207 | break; | 1238 | break; |
1208 | 1239 | ||
@@ -2082,24 +2113,21 @@ xfs_qm_vop_create_dqattach( | |||
2082 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 2113 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
2083 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); | 2114 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); |
2084 | 2115 | ||
2085 | if (udqp) { | 2116 | if (udqp && XFS_IS_UQUOTA_ON(mp)) { |
2086 | ASSERT(ip->i_udquot == NULL); | 2117 | ASSERT(ip->i_udquot == NULL); |
2087 | ASSERT(XFS_IS_UQUOTA_ON(mp)); | ||
2088 | ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); | 2118 | ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); |
2089 | 2119 | ||
2090 | ip->i_udquot = xfs_qm_dqhold(udqp); | 2120 | ip->i_udquot = xfs_qm_dqhold(udqp); |
2091 | xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); | 2121 | xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); |
2092 | } | 2122 | } |
2093 | if (gdqp) { | 2123 | if (gdqp && XFS_IS_GQUOTA_ON(mp)) { |
2094 | ASSERT(ip->i_gdquot == NULL); | 2124 | ASSERT(ip->i_gdquot == NULL); |
2095 | ASSERT(XFS_IS_GQUOTA_ON(mp)); | ||
2096 | ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id)); | 2125 | ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id)); |
2097 | ip->i_gdquot = xfs_qm_dqhold(gdqp); | 2126 | ip->i_gdquot = xfs_qm_dqhold(gdqp); |
2098 | xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); | 2127 | xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); |
2099 | } | 2128 | } |
2100 | if (pdqp) { | 2129 | if (pdqp && XFS_IS_PQUOTA_ON(mp)) { |
2101 | ASSERT(ip->i_pdquot == NULL); | 2130 | ASSERT(ip->i_pdquot == NULL); |
2102 | ASSERT(XFS_IS_PQUOTA_ON(mp)); | ||
2103 | ASSERT(xfs_get_projid(ip) == be32_to_cpu(pdqp->q_core.d_id)); | 2131 | ASSERT(xfs_get_projid(ip) == be32_to_cpu(pdqp->q_core.d_id)); |
2104 | 2132 | ||
2105 | ip->i_pdquot = xfs_qm_dqhold(pdqp); | 2133 | ip->i_pdquot = xfs_qm_dqhold(pdqp); |
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index a788b66a5cb1..797fd4636273 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h | |||
@@ -20,13 +20,29 @@ | |||
20 | 20 | ||
21 | #include "xfs_dquot_item.h" | 21 | #include "xfs_dquot_item.h" |
22 | #include "xfs_dquot.h" | 22 | #include "xfs_dquot.h" |
23 | #include "xfs_quota_priv.h" | ||
24 | 23 | ||
25 | struct xfs_inode; | 24 | struct xfs_inode; |
26 | 25 | ||
27 | extern struct kmem_zone *xfs_qm_dqtrxzone; | 26 | extern struct kmem_zone *xfs_qm_dqtrxzone; |
28 | 27 | ||
29 | /* | 28 | /* |
29 | * Number of bmaps that we ask from bmapi when doing a quotacheck. | ||
30 | * We make this restriction to keep the memory usage to a minimum. | ||
31 | */ | ||
32 | #define XFS_DQITER_MAP_SIZE 10 | ||
33 | |||
34 | #define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ | ||
35 | !dqp->q_core.d_blk_hardlimit && \ | ||
36 | !dqp->q_core.d_blk_softlimit && \ | ||
37 | !dqp->q_core.d_rtb_hardlimit && \ | ||
38 | !dqp->q_core.d_rtb_softlimit && \ | ||
39 | !dqp->q_core.d_ino_hardlimit && \ | ||
40 | !dqp->q_core.d_ino_softlimit && \ | ||
41 | !dqp->q_core.d_bcount && \ | ||
42 | !dqp->q_core.d_rtbcount && \ | ||
43 | !dqp->q_core.d_icount) | ||
44 | |||
45 | /* | ||
30 | * This defines the unit of allocation of dquots. | 46 | * This defines the unit of allocation of dquots. |
31 | * Currently, it is just one file system block, and a 4K blk contains 30 | 47 | * Currently, it is just one file system block, and a 4K blk contains 30 |
32 | * (136 * 30 = 4080) dquots. It's probably not worth trying to make | 48 | * (136 * 30 = 4080) dquots. It's probably not worth trying to make |
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 437c9198031a..3daf5ea1eb8d 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c | |||
@@ -278,7 +278,7 @@ xfs_qm_scall_trunc_qfiles( | |||
278 | xfs_mount_t *mp, | 278 | xfs_mount_t *mp, |
279 | uint flags) | 279 | uint flags) |
280 | { | 280 | { |
281 | int error = 0, error2 = 0; | 281 | int error; |
282 | 282 | ||
283 | if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { | 283 | if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { |
284 | xfs_debug(mp, "%s: flags=%x m_qflags=%x", | 284 | xfs_debug(mp, "%s: flags=%x m_qflags=%x", |
@@ -286,14 +286,20 @@ xfs_qm_scall_trunc_qfiles( | |||
286 | return XFS_ERROR(EINVAL); | 286 | return XFS_ERROR(EINVAL); |
287 | } | 287 | } |
288 | 288 | ||
289 | if (flags & XFS_DQ_USER) | 289 | if (flags & XFS_DQ_USER) { |
290 | error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino); | 290 | error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino); |
291 | if (flags & XFS_DQ_GROUP) | 291 | if (error) |
292 | error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino); | 292 | return error; |
293 | } | ||
294 | if (flags & XFS_DQ_GROUP) { | ||
295 | error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino); | ||
296 | if (error) | ||
297 | return error; | ||
298 | } | ||
293 | if (flags & XFS_DQ_PROJ) | 299 | if (flags & XFS_DQ_PROJ) |
294 | error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_pquotino); | 300 | error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_pquotino); |
295 | 301 | ||
296 | return error ? error : error2; | 302 | return error; |
297 | } | 303 | } |
298 | 304 | ||
299 | /* | 305 | /* |
diff --git a/fs/xfs/xfs_quota_priv.h b/fs/xfs/xfs_quota_priv.h deleted file mode 100644 index 6d86219d93da..000000000000 --- a/fs/xfs/xfs_quota_priv.h +++ /dev/null | |||
@@ -1,42 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_QUOTA_PRIV_H__ | ||
19 | #define __XFS_QUOTA_PRIV_H__ | ||
20 | |||
21 | /* | ||
22 | * Number of bmaps that we ask from bmapi when doing a quotacheck. | ||
23 | * We make this restriction to keep the memory usage to a minimum. | ||
24 | */ | ||
25 | #define XFS_DQITER_MAP_SIZE 10 | ||
26 | |||
27 | #define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ | ||
28 | !dqp->q_core.d_blk_hardlimit && \ | ||
29 | !dqp->q_core.d_blk_softlimit && \ | ||
30 | !dqp->q_core.d_rtb_hardlimit && \ | ||
31 | !dqp->q_core.d_rtb_softlimit && \ | ||
32 | !dqp->q_core.d_ino_hardlimit && \ | ||
33 | !dqp->q_core.d_ino_softlimit && \ | ||
34 | !dqp->q_core.d_bcount && \ | ||
35 | !dqp->q_core.d_rtbcount && \ | ||
36 | !dqp->q_core.d_icount) | ||
37 | |||
38 | #define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \ | ||
39 | (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \ | ||
40 | (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???"))) | ||
41 | |||
42 | #endif /* __XFS_QUOTA_PRIV_H__ */ | ||
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 9b96d35e483d..b5bc1ab3c4da 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -64,7 +64,7 @@ typedef struct xfs_log_item { | |||
64 | 64 | ||
65 | struct xfs_item_ops { | 65 | struct xfs_item_ops { |
66 | void (*iop_size)(xfs_log_item_t *, int *, int *); | 66 | void (*iop_size)(xfs_log_item_t *, int *, int *); |
67 | void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); | 67 | void (*iop_format)(xfs_log_item_t *, struct xfs_log_vec *); |
68 | void (*iop_pin)(xfs_log_item_t *); | 68 | void (*iop_pin)(xfs_log_item_t *); |
69 | void (*iop_unpin)(xfs_log_item_t *, int remove); | 69 | void (*iop_unpin)(xfs_log_item_t *, int remove); |
70 | uint (*iop_push)(struct xfs_log_item *, struct list_head *); | 70 | uint (*iop_push)(struct xfs_log_item *, struct list_head *); |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index c035d11b7734..647b6f1d8923 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -314,7 +314,18 @@ xfs_trans_read_buf_map( | |||
314 | ASSERT(bp->b_iodone == NULL); | 314 | ASSERT(bp->b_iodone == NULL); |
315 | XFS_BUF_READ(bp); | 315 | XFS_BUF_READ(bp); |
316 | bp->b_ops = ops; | 316 | bp->b_ops = ops; |
317 | xfsbdstrat(tp->t_mountp, bp); | 317 | |
318 | /* | ||
319 | * XXX(hch): clean up the error handling here to be less | ||
320 | * of a mess.. | ||
321 | */ | ||
322 | if (XFS_FORCED_SHUTDOWN(mp)) { | ||
323 | trace_xfs_bdstrat_shut(bp, _RET_IP_); | ||
324 | xfs_bioerror_relse(bp); | ||
325 | } else { | ||
326 | xfs_buf_iorequest(bp); | ||
327 | } | ||
328 | |||
318 | error = xfs_buf_iowait(bp); | 329 | error = xfs_buf_iowait(bp); |
319 | if (error) { | 330 | if (error) { |
320 | xfs_buf_ioerror_alert(bp, __func__); | 331 | xfs_buf_ioerror_alert(bp, __func__); |
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index cd2a10e15d3a..41172861e857 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c | |||
@@ -295,8 +295,8 @@ xfs_trans_mod_dquot( | |||
295 | /* | 295 | /* |
296 | * Given an array of dqtrx structures, lock all the dquots associated and join | 296 | * Given an array of dqtrx structures, lock all the dquots associated and join |
297 | * them to the transaction, provided they have been modified. We know that the | 297 | * them to the transaction, provided they have been modified. We know that the |
298 | * highest number of dquots of one type - usr, grp OR prj - involved in a | 298 | * highest number of dquots of one type - usr, grp and prj - involved in a |
299 | * transaction is 2 so we don't need to make this very generic. | 299 | * transaction is 3 so we don't need to make this very generic. |
300 | */ | 300 | */ |
301 | STATIC void | 301 | STATIC void |
302 | xfs_trans_dqlockedjoin( | 302 | xfs_trans_dqlockedjoin( |
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c index 2fd59c0dae66..2ffd3e331b49 100644 --- a/fs/xfs/xfs_trans_resv.c +++ b/fs/xfs/xfs_trans_resv.c | |||
@@ -174,7 +174,7 @@ xfs_calc_itruncate_reservation( | |||
174 | xfs_calc_buf_res(5, 0) + | 174 | xfs_calc_buf_res(5, 0) + |
175 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | 175 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), |
176 | XFS_FSB_TO_B(mp, 1)) + | 176 | XFS_FSB_TO_B(mp, 1)) + |
177 | xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) + | 177 | xfs_calc_buf_res(2 + mp->m_ialloc_blks + |
178 | mp->m_in_maxlevels, 0))); | 178 | mp->m_in_maxlevels, 0))); |
179 | } | 179 | } |
180 | 180 | ||
@@ -282,7 +282,7 @@ xfs_calc_create_resv_modify( | |||
282 | * For create we can allocate some inodes giving: | 282 | * For create we can allocate some inodes giving: |
283 | * the agi and agf of the ag getting the new inodes: 2 * sectorsize | 283 | * the agi and agf of the ag getting the new inodes: 2 * sectorsize |
284 | * the superblock for the nlink flag: sector size | 284 | * the superblock for the nlink flag: sector size |
285 | * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize | 285 | * the inode blocks allocated: mp->m_ialloc_blks * blocksize |
286 | * the inode btree: max depth * blocksize | 286 | * the inode btree: max depth * blocksize |
287 | * the allocation btrees: 2 trees * (max depth - 1) * block size | 287 | * the allocation btrees: 2 trees * (max depth - 1) * block size |
288 | */ | 288 | */ |
@@ -292,7 +292,7 @@ xfs_calc_create_resv_alloc( | |||
292 | { | 292 | { |
293 | return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + | 293 | return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + |
294 | mp->m_sb.sb_sectsize + | 294 | mp->m_sb.sb_sectsize + |
295 | xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), XFS_FSB_TO_B(mp, 1)) + | 295 | xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) + |
296 | xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + | 296 | xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + |
297 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | 297 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), |
298 | XFS_FSB_TO_B(mp, 1)); | 298 | XFS_FSB_TO_B(mp, 1)); |
@@ -385,9 +385,9 @@ xfs_calc_ifree_reservation( | |||
385 | xfs_calc_inode_res(mp, 1) + | 385 | xfs_calc_inode_res(mp, 1) + |
386 | xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + | 386 | xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + |
387 | xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + | 387 | xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + |
388 | max_t(uint, XFS_FSB_TO_B(mp, 1), XFS_INODE_CLUSTER_SIZE(mp)) + | 388 | max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size) + |
389 | xfs_calc_buf_res(1, 0) + | 389 | xfs_calc_buf_res(1, 0) + |
390 | xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) + | 390 | xfs_calc_buf_res(2 + mp->m_ialloc_blks + |
391 | mp->m_in_maxlevels, 0) + | 391 | mp->m_in_maxlevels, 0) + |
392 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | 392 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), |
393 | XFS_FSB_TO_B(mp, 1)); | 393 | XFS_FSB_TO_B(mp, 1)); |
diff --git a/fs/xfs/xfs_trans_space.h b/fs/xfs/xfs_trans_space.h index 7d2c920dfb9c..af5dbe06cb65 100644 --- a/fs/xfs/xfs_trans_space.h +++ b/fs/xfs/xfs_trans_space.h | |||
@@ -47,7 +47,7 @@ | |||
47 | #define XFS_DIRREMOVE_SPACE_RES(mp) \ | 47 | #define XFS_DIRREMOVE_SPACE_RES(mp) \ |
48 | XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) | 48 | XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) |
49 | #define XFS_IALLOC_SPACE_RES(mp) \ | 49 | #define XFS_IALLOC_SPACE_RES(mp) \ |
50 | (XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels - 1) | 50 | ((mp)->m_ialloc_blks + (mp)->m_in_maxlevels - 1) |
51 | 51 | ||
52 | /* | 52 | /* |
53 | * Space reservation values for various transactions. | 53 | * Space reservation values for various transactions. |
diff --git a/fs/xfs/xfs_vnode.h b/fs/xfs/xfs_vnode.h index 3e8e797c6d11..e8a77383c0d5 100644 --- a/fs/xfs/xfs_vnode.h +++ b/fs/xfs/xfs_vnode.h | |||
@@ -35,15 +35,6 @@ struct attrlist_cursor_kern; | |||
35 | { IO_INVIS, "INVIS"} | 35 | { IO_INVIS, "INVIS"} |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * Flush/Invalidate options for vop_toss/flush/flushinval_pages. | ||
39 | */ | ||
40 | #define FI_NONE 0 /* none */ | ||
41 | #define FI_REMAPF 1 /* Do a remapf prior to the operation */ | ||
42 | #define FI_REMAPF_LOCKED 2 /* Do a remapf prior to the operation. | ||
43 | Prevent VM access to the pages until | ||
44 | the operation completes. */ | ||
45 | |||
46 | /* | ||
47 | * Some useful predicates. | 38 | * Some useful predicates. |
48 | */ | 39 | */ |
49 | #define VN_MAPPED(vp) mapping_mapped(vp->i_mapping) | 40 | #define VN_MAPPED(vp) mapping_mapped(vp->i_mapping) |