diff options
Diffstat (limited to 'fs')
38 files changed, 490 insertions, 309 deletions
@@ -244,9 +244,14 @@ static void aio_free_ring(struct kioctx *ctx) | |||
244 | int i; | 244 | int i; |
245 | 245 | ||
246 | for (i = 0; i < ctx->nr_pages; i++) { | 246 | for (i = 0; i < ctx->nr_pages; i++) { |
247 | struct page *page; | ||
247 | pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i, | 248 | pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i, |
248 | page_count(ctx->ring_pages[i])); | 249 | page_count(ctx->ring_pages[i])); |
249 | put_page(ctx->ring_pages[i]); | 250 | page = ctx->ring_pages[i]; |
251 | if (!page) | ||
252 | continue; | ||
253 | ctx->ring_pages[i] = NULL; | ||
254 | put_page(page); | ||
250 | } | 255 | } |
251 | 256 | ||
252 | put_aio_ring_file(ctx); | 257 | put_aio_ring_file(ctx); |
@@ -280,18 +285,38 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, | |||
280 | unsigned long flags; | 285 | unsigned long flags; |
281 | int rc; | 286 | int rc; |
282 | 287 | ||
288 | rc = 0; | ||
289 | |||
290 | /* Make sure the old page hasn't already been changed */ | ||
291 | spin_lock(&mapping->private_lock); | ||
292 | ctx = mapping->private_data; | ||
293 | if (ctx) { | ||
294 | pgoff_t idx; | ||
295 | spin_lock_irqsave(&ctx->completion_lock, flags); | ||
296 | idx = old->index; | ||
297 | if (idx < (pgoff_t)ctx->nr_pages) { | ||
298 | if (ctx->ring_pages[idx] != old) | ||
299 | rc = -EAGAIN; | ||
300 | } else | ||
301 | rc = -EINVAL; | ||
302 | spin_unlock_irqrestore(&ctx->completion_lock, flags); | ||
303 | } else | ||
304 | rc = -EINVAL; | ||
305 | spin_unlock(&mapping->private_lock); | ||
306 | |||
307 | if (rc != 0) | ||
308 | return rc; | ||
309 | |||
283 | /* Writeback must be complete */ | 310 | /* Writeback must be complete */ |
284 | BUG_ON(PageWriteback(old)); | 311 | BUG_ON(PageWriteback(old)); |
285 | put_page(old); | 312 | get_page(new); |
286 | 313 | ||
287 | rc = migrate_page_move_mapping(mapping, new, old, NULL, mode); | 314 | rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1); |
288 | if (rc != MIGRATEPAGE_SUCCESS) { | 315 | if (rc != MIGRATEPAGE_SUCCESS) { |
289 | get_page(old); | 316 | put_page(new); |
290 | return rc; | 317 | return rc; |
291 | } | 318 | } |
292 | 319 | ||
293 | get_page(new); | ||
294 | |||
295 | /* We can potentially race against kioctx teardown here. Use the | 320 | /* We can potentially race against kioctx teardown here. Use the |
296 | * address_space's private data lock to protect the mapping's | 321 | * address_space's private data lock to protect the mapping's |
297 | * private_data. | 322 | * private_data. |
@@ -303,13 +328,24 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, | |||
303 | spin_lock_irqsave(&ctx->completion_lock, flags); | 328 | spin_lock_irqsave(&ctx->completion_lock, flags); |
304 | migrate_page_copy(new, old); | 329 | migrate_page_copy(new, old); |
305 | idx = old->index; | 330 | idx = old->index; |
306 | if (idx < (pgoff_t)ctx->nr_pages) | 331 | if (idx < (pgoff_t)ctx->nr_pages) { |
307 | ctx->ring_pages[idx] = new; | 332 | /* And only do the move if things haven't changed */ |
333 | if (ctx->ring_pages[idx] == old) | ||
334 | ctx->ring_pages[idx] = new; | ||
335 | else | ||
336 | rc = -EAGAIN; | ||
337 | } else | ||
338 | rc = -EINVAL; | ||
308 | spin_unlock_irqrestore(&ctx->completion_lock, flags); | 339 | spin_unlock_irqrestore(&ctx->completion_lock, flags); |
309 | } else | 340 | } else |
310 | rc = -EBUSY; | 341 | rc = -EBUSY; |
311 | spin_unlock(&mapping->private_lock); | 342 | spin_unlock(&mapping->private_lock); |
312 | 343 | ||
344 | if (rc == MIGRATEPAGE_SUCCESS) | ||
345 | put_page(old); | ||
346 | else | ||
347 | put_page(new); | ||
348 | |||
313 | return rc; | 349 | return rc; |
314 | } | 350 | } |
315 | #endif | 351 | #endif |
@@ -326,7 +362,7 @@ static int aio_setup_ring(struct kioctx *ctx) | |||
326 | struct aio_ring *ring; | 362 | struct aio_ring *ring; |
327 | unsigned nr_events = ctx->max_reqs; | 363 | unsigned nr_events = ctx->max_reqs; |
328 | struct mm_struct *mm = current->mm; | 364 | struct mm_struct *mm = current->mm; |
329 | unsigned long size, populate; | 365 | unsigned long size, unused; |
330 | int nr_pages; | 366 | int nr_pages; |
331 | int i; | 367 | int i; |
332 | struct file *file; | 368 | struct file *file; |
@@ -347,6 +383,20 @@ static int aio_setup_ring(struct kioctx *ctx) | |||
347 | return -EAGAIN; | 383 | return -EAGAIN; |
348 | } | 384 | } |
349 | 385 | ||
386 | ctx->aio_ring_file = file; | ||
387 | nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) | ||
388 | / sizeof(struct io_event); | ||
389 | |||
390 | ctx->ring_pages = ctx->internal_pages; | ||
391 | if (nr_pages > AIO_RING_PAGES) { | ||
392 | ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *), | ||
393 | GFP_KERNEL); | ||
394 | if (!ctx->ring_pages) { | ||
395 | put_aio_ring_file(ctx); | ||
396 | return -ENOMEM; | ||
397 | } | ||
398 | } | ||
399 | |||
350 | for (i = 0; i < nr_pages; i++) { | 400 | for (i = 0; i < nr_pages; i++) { |
351 | struct page *page; | 401 | struct page *page; |
352 | page = find_or_create_page(file->f_inode->i_mapping, | 402 | page = find_or_create_page(file->f_inode->i_mapping, |
@@ -358,19 +408,14 @@ static int aio_setup_ring(struct kioctx *ctx) | |||
358 | SetPageUptodate(page); | 408 | SetPageUptodate(page); |
359 | SetPageDirty(page); | 409 | SetPageDirty(page); |
360 | unlock_page(page); | 410 | unlock_page(page); |
411 | |||
412 | ctx->ring_pages[i] = page; | ||
361 | } | 413 | } |
362 | ctx->aio_ring_file = file; | 414 | ctx->nr_pages = i; |
363 | nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) | ||
364 | / sizeof(struct io_event); | ||
365 | 415 | ||
366 | ctx->ring_pages = ctx->internal_pages; | 416 | if (unlikely(i != nr_pages)) { |
367 | if (nr_pages > AIO_RING_PAGES) { | 417 | aio_free_ring(ctx); |
368 | ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *), | 418 | return -EAGAIN; |
369 | GFP_KERNEL); | ||
370 | if (!ctx->ring_pages) { | ||
371 | put_aio_ring_file(ctx); | ||
372 | return -ENOMEM; | ||
373 | } | ||
374 | } | 419 | } |
375 | 420 | ||
376 | ctx->mmap_size = nr_pages * PAGE_SIZE; | 421 | ctx->mmap_size = nr_pages * PAGE_SIZE; |
@@ -379,9 +424,9 @@ static int aio_setup_ring(struct kioctx *ctx) | |||
379 | down_write(&mm->mmap_sem); | 424 | down_write(&mm->mmap_sem); |
380 | ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size, | 425 | ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size, |
381 | PROT_READ | PROT_WRITE, | 426 | PROT_READ | PROT_WRITE, |
382 | MAP_SHARED | MAP_POPULATE, 0, &populate); | 427 | MAP_SHARED, 0, &unused); |
428 | up_write(&mm->mmap_sem); | ||
383 | if (IS_ERR((void *)ctx->mmap_base)) { | 429 | if (IS_ERR((void *)ctx->mmap_base)) { |
384 | up_write(&mm->mmap_sem); | ||
385 | ctx->mmap_size = 0; | 430 | ctx->mmap_size = 0; |
386 | aio_free_ring(ctx); | 431 | aio_free_ring(ctx); |
387 | return -EAGAIN; | 432 | return -EAGAIN; |
@@ -389,27 +434,6 @@ static int aio_setup_ring(struct kioctx *ctx) | |||
389 | 434 | ||
390 | pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base); | 435 | pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base); |
391 | 436 | ||
392 | /* We must do this while still holding mmap_sem for write, as we | ||
393 | * need to be protected against userspace attempting to mremap() | ||
394 | * or munmap() the ring buffer. | ||
395 | */ | ||
396 | ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages, | ||
397 | 1, 0, ctx->ring_pages, NULL); | ||
398 | |||
399 | /* Dropping the reference here is safe as the page cache will hold | ||
400 | * onto the pages for us. It is also required so that page migration | ||
401 | * can unmap the pages and get the right reference count. | ||
402 | */ | ||
403 | for (i = 0; i < ctx->nr_pages; i++) | ||
404 | put_page(ctx->ring_pages[i]); | ||
405 | |||
406 | up_write(&mm->mmap_sem); | ||
407 | |||
408 | if (unlikely(ctx->nr_pages != nr_pages)) { | ||
409 | aio_free_ring(ctx); | ||
410 | return -EAGAIN; | ||
411 | } | ||
412 | |||
413 | ctx->user_id = ctx->mmap_base; | 437 | ctx->user_id = ctx->mmap_base; |
414 | ctx->nr_events = nr_events; /* trusted copy */ | 438 | ctx->nr_events = nr_events; /* trusted copy */ |
415 | 439 | ||
@@ -652,7 +676,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
652 | aio_nr += ctx->max_reqs; | 676 | aio_nr += ctx->max_reqs; |
653 | spin_unlock(&aio_nr_lock); | 677 | spin_unlock(&aio_nr_lock); |
654 | 678 | ||
655 | percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */ | 679 | percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */ |
680 | percpu_ref_get(&ctx->reqs); /* free_ioctx_users() will drop this */ | ||
656 | 681 | ||
657 | err = ioctx_add_table(ctx, mm); | 682 | err = ioctx_add_table(ctx, mm); |
658 | if (err) | 683 | if (err) |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 1e561c059539..ec3ba43b9faa 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -210,9 +210,13 @@ static int readpage_nounlock(struct file *filp, struct page *page) | |||
210 | if (err < 0) { | 210 | if (err < 0) { |
211 | SetPageError(page); | 211 | SetPageError(page); |
212 | goto out; | 212 | goto out; |
213 | } else if (err < PAGE_CACHE_SIZE) { | 213 | } else { |
214 | if (err < PAGE_CACHE_SIZE) { | ||
214 | /* zero fill remainder of page */ | 215 | /* zero fill remainder of page */ |
215 | zero_user_segment(page, err, PAGE_CACHE_SIZE); | 216 | zero_user_segment(page, err, PAGE_CACHE_SIZE); |
217 | } else { | ||
218 | flush_dcache_page(page); | ||
219 | } | ||
216 | } | 220 | } |
217 | SetPageUptodate(page); | 221 | SetPageUptodate(page); |
218 | 222 | ||
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 9a8e396aed89..278fd2891288 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -978,7 +978,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
978 | struct ceph_mds_reply_inode *ininfo; | 978 | struct ceph_mds_reply_inode *ininfo; |
979 | struct ceph_vino vino; | 979 | struct ceph_vino vino; |
980 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); | 980 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
981 | int i = 0; | ||
982 | int err = 0; | 981 | int err = 0; |
983 | 982 | ||
984 | dout("fill_trace %p is_dentry %d is_target %d\n", req, | 983 | dout("fill_trace %p is_dentry %d is_target %d\n", req, |
@@ -1039,6 +1038,29 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1039 | } | 1038 | } |
1040 | } | 1039 | } |
1041 | 1040 | ||
1041 | if (rinfo->head->is_target) { | ||
1042 | vino.ino = le64_to_cpu(rinfo->targeti.in->ino); | ||
1043 | vino.snap = le64_to_cpu(rinfo->targeti.in->snapid); | ||
1044 | |||
1045 | in = ceph_get_inode(sb, vino); | ||
1046 | if (IS_ERR(in)) { | ||
1047 | err = PTR_ERR(in); | ||
1048 | goto done; | ||
1049 | } | ||
1050 | req->r_target_inode = in; | ||
1051 | |||
1052 | err = fill_inode(in, &rinfo->targeti, NULL, | ||
1053 | session, req->r_request_started, | ||
1054 | (le32_to_cpu(rinfo->head->result) == 0) ? | ||
1055 | req->r_fmode : -1, | ||
1056 | &req->r_caps_reservation); | ||
1057 | if (err < 0) { | ||
1058 | pr_err("fill_inode badness %p %llx.%llx\n", | ||
1059 | in, ceph_vinop(in)); | ||
1060 | goto done; | ||
1061 | } | ||
1062 | } | ||
1063 | |||
1042 | /* | 1064 | /* |
1043 | * ignore null lease/binding on snapdir ENOENT, or else we | 1065 | * ignore null lease/binding on snapdir ENOENT, or else we |
1044 | * will have trouble splicing in the virtual snapdir later | 1066 | * will have trouble splicing in the virtual snapdir later |
@@ -1108,7 +1130,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1108 | ceph_dentry(req->r_old_dentry)->offset); | 1130 | ceph_dentry(req->r_old_dentry)->offset); |
1109 | 1131 | ||
1110 | dn = req->r_old_dentry; /* use old_dentry */ | 1132 | dn = req->r_old_dentry; /* use old_dentry */ |
1111 | in = dn->d_inode; | ||
1112 | } | 1133 | } |
1113 | 1134 | ||
1114 | /* null dentry? */ | 1135 | /* null dentry? */ |
@@ -1130,44 +1151,28 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1130 | } | 1151 | } |
1131 | 1152 | ||
1132 | /* attach proper inode */ | 1153 | /* attach proper inode */ |
1133 | ininfo = rinfo->targeti.in; | 1154 | if (!dn->d_inode) { |
1134 | vino.ino = le64_to_cpu(ininfo->ino); | 1155 | ihold(in); |
1135 | vino.snap = le64_to_cpu(ininfo->snapid); | ||
1136 | in = dn->d_inode; | ||
1137 | if (!in) { | ||
1138 | in = ceph_get_inode(sb, vino); | ||
1139 | if (IS_ERR(in)) { | ||
1140 | pr_err("fill_trace bad get_inode " | ||
1141 | "%llx.%llx\n", vino.ino, vino.snap); | ||
1142 | err = PTR_ERR(in); | ||
1143 | d_drop(dn); | ||
1144 | goto done; | ||
1145 | } | ||
1146 | dn = splice_dentry(dn, in, &have_lease, true); | 1156 | dn = splice_dentry(dn, in, &have_lease, true); |
1147 | if (IS_ERR(dn)) { | 1157 | if (IS_ERR(dn)) { |
1148 | err = PTR_ERR(dn); | 1158 | err = PTR_ERR(dn); |
1149 | goto done; | 1159 | goto done; |
1150 | } | 1160 | } |
1151 | req->r_dentry = dn; /* may have spliced */ | 1161 | req->r_dentry = dn; /* may have spliced */ |
1152 | ihold(in); | 1162 | } else if (dn->d_inode && dn->d_inode != in) { |
1153 | } else if (ceph_ino(in) == vino.ino && | ||
1154 | ceph_snap(in) == vino.snap) { | ||
1155 | ihold(in); | ||
1156 | } else { | ||
1157 | dout(" %p links to %p %llx.%llx, not %llx.%llx\n", | 1163 | dout(" %p links to %p %llx.%llx, not %llx.%llx\n", |
1158 | dn, in, ceph_ino(in), ceph_snap(in), | 1164 | dn, dn->d_inode, ceph_vinop(dn->d_inode), |
1159 | vino.ino, vino.snap); | 1165 | ceph_vinop(in)); |
1160 | have_lease = false; | 1166 | have_lease = false; |
1161 | in = NULL; | ||
1162 | } | 1167 | } |
1163 | 1168 | ||
1164 | if (have_lease) | 1169 | if (have_lease) |
1165 | update_dentry_lease(dn, rinfo->dlease, session, | 1170 | update_dentry_lease(dn, rinfo->dlease, session, |
1166 | req->r_request_started); | 1171 | req->r_request_started); |
1167 | dout(" final dn %p\n", dn); | 1172 | dout(" final dn %p\n", dn); |
1168 | i++; | 1173 | } else if (!req->r_aborted && |
1169 | } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP || | 1174 | (req->r_op == CEPH_MDS_OP_LOOKUPSNAP || |
1170 | req->r_op == CEPH_MDS_OP_MKSNAP) && !req->r_aborted) { | 1175 | req->r_op == CEPH_MDS_OP_MKSNAP)) { |
1171 | struct dentry *dn = req->r_dentry; | 1176 | struct dentry *dn = req->r_dentry; |
1172 | 1177 | ||
1173 | /* fill out a snapdir LOOKUPSNAP dentry */ | 1178 | /* fill out a snapdir LOOKUPSNAP dentry */ |
@@ -1177,52 +1182,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1177 | ininfo = rinfo->targeti.in; | 1182 | ininfo = rinfo->targeti.in; |
1178 | vino.ino = le64_to_cpu(ininfo->ino); | 1183 | vino.ino = le64_to_cpu(ininfo->ino); |
1179 | vino.snap = le64_to_cpu(ininfo->snapid); | 1184 | vino.snap = le64_to_cpu(ininfo->snapid); |
1180 | in = ceph_get_inode(sb, vino); | ||
1181 | if (IS_ERR(in)) { | ||
1182 | pr_err("fill_inode get_inode badness %llx.%llx\n", | ||
1183 | vino.ino, vino.snap); | ||
1184 | err = PTR_ERR(in); | ||
1185 | d_delete(dn); | ||
1186 | goto done; | ||
1187 | } | ||
1188 | dout(" linking snapped dir %p to dn %p\n", in, dn); | 1185 | dout(" linking snapped dir %p to dn %p\n", in, dn); |
1186 | ihold(in); | ||
1189 | dn = splice_dentry(dn, in, NULL, true); | 1187 | dn = splice_dentry(dn, in, NULL, true); |
1190 | if (IS_ERR(dn)) { | 1188 | if (IS_ERR(dn)) { |
1191 | err = PTR_ERR(dn); | 1189 | err = PTR_ERR(dn); |
1192 | goto done; | 1190 | goto done; |
1193 | } | 1191 | } |
1194 | req->r_dentry = dn; /* may have spliced */ | 1192 | req->r_dentry = dn; /* may have spliced */ |
1195 | ihold(in); | ||
1196 | rinfo->head->is_dentry = 1; /* fool notrace handlers */ | ||
1197 | } | ||
1198 | |||
1199 | if (rinfo->head->is_target) { | ||
1200 | vino.ino = le64_to_cpu(rinfo->targeti.in->ino); | ||
1201 | vino.snap = le64_to_cpu(rinfo->targeti.in->snapid); | ||
1202 | |||
1203 | if (in == NULL || ceph_ino(in) != vino.ino || | ||
1204 | ceph_snap(in) != vino.snap) { | ||
1205 | in = ceph_get_inode(sb, vino); | ||
1206 | if (IS_ERR(in)) { | ||
1207 | err = PTR_ERR(in); | ||
1208 | goto done; | ||
1209 | } | ||
1210 | } | ||
1211 | req->r_target_inode = in; | ||
1212 | |||
1213 | err = fill_inode(in, | ||
1214 | &rinfo->targeti, NULL, | ||
1215 | session, req->r_request_started, | ||
1216 | (le32_to_cpu(rinfo->head->result) == 0) ? | ||
1217 | req->r_fmode : -1, | ||
1218 | &req->r_caps_reservation); | ||
1219 | if (err < 0) { | ||
1220 | pr_err("fill_inode badness %p %llx.%llx\n", | ||
1221 | in, ceph_vinop(in)); | ||
1222 | goto done; | ||
1223 | } | ||
1224 | } | 1193 | } |
1225 | |||
1226 | done: | 1194 | done: |
1227 | dout("fill_trace done err=%d\n", err); | 1195 | dout("fill_trace done err=%d\n", err); |
1228 | return err; | 1196 | return err; |
@@ -1272,7 +1240,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, | |||
1272 | struct qstr dname; | 1240 | struct qstr dname; |
1273 | struct dentry *dn; | 1241 | struct dentry *dn; |
1274 | struct inode *in; | 1242 | struct inode *in; |
1275 | int err = 0, i; | 1243 | int err = 0, ret, i; |
1276 | struct inode *snapdir = NULL; | 1244 | struct inode *snapdir = NULL; |
1277 | struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; | 1245 | struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; |
1278 | struct ceph_dentry_info *di; | 1246 | struct ceph_dentry_info *di; |
@@ -1305,6 +1273,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, | |||
1305 | ceph_fill_dirfrag(parent->d_inode, rinfo->dir_dir); | 1273 | ceph_fill_dirfrag(parent->d_inode, rinfo->dir_dir); |
1306 | } | 1274 | } |
1307 | 1275 | ||
1276 | /* FIXME: release caps/leases if error occurs */ | ||
1308 | for (i = 0; i < rinfo->dir_nr; i++) { | 1277 | for (i = 0; i < rinfo->dir_nr; i++) { |
1309 | struct ceph_vino vino; | 1278 | struct ceph_vino vino; |
1310 | 1279 | ||
@@ -1329,9 +1298,10 @@ retry_lookup: | |||
1329 | err = -ENOMEM; | 1298 | err = -ENOMEM; |
1330 | goto out; | 1299 | goto out; |
1331 | } | 1300 | } |
1332 | err = ceph_init_dentry(dn); | 1301 | ret = ceph_init_dentry(dn); |
1333 | if (err < 0) { | 1302 | if (ret < 0) { |
1334 | dput(dn); | 1303 | dput(dn); |
1304 | err = ret; | ||
1335 | goto out; | 1305 | goto out; |
1336 | } | 1306 | } |
1337 | } else if (dn->d_inode && | 1307 | } else if (dn->d_inode && |
@@ -1351,9 +1321,6 @@ retry_lookup: | |||
1351 | spin_unlock(&parent->d_lock); | 1321 | spin_unlock(&parent->d_lock); |
1352 | } | 1322 | } |
1353 | 1323 | ||
1354 | di = dn->d_fsdata; | ||
1355 | di->offset = ceph_make_fpos(frag, i + r_readdir_offset); | ||
1356 | |||
1357 | /* inode */ | 1324 | /* inode */ |
1358 | if (dn->d_inode) { | 1325 | if (dn->d_inode) { |
1359 | in = dn->d_inode; | 1326 | in = dn->d_inode; |
@@ -1366,26 +1333,39 @@ retry_lookup: | |||
1366 | err = PTR_ERR(in); | 1333 | err = PTR_ERR(in); |
1367 | goto out; | 1334 | goto out; |
1368 | } | 1335 | } |
1369 | dn = splice_dentry(dn, in, NULL, false); | ||
1370 | if (IS_ERR(dn)) | ||
1371 | dn = NULL; | ||
1372 | } | 1336 | } |
1373 | 1337 | ||
1374 | if (fill_inode(in, &rinfo->dir_in[i], NULL, session, | 1338 | if (fill_inode(in, &rinfo->dir_in[i], NULL, session, |
1375 | req->r_request_started, -1, | 1339 | req->r_request_started, -1, |
1376 | &req->r_caps_reservation) < 0) { | 1340 | &req->r_caps_reservation) < 0) { |
1377 | pr_err("fill_inode badness on %p\n", in); | 1341 | pr_err("fill_inode badness on %p\n", in); |
1342 | if (!dn->d_inode) | ||
1343 | iput(in); | ||
1344 | d_drop(dn); | ||
1378 | goto next_item; | 1345 | goto next_item; |
1379 | } | 1346 | } |
1380 | if (dn) | 1347 | |
1381 | update_dentry_lease(dn, rinfo->dir_dlease[i], | 1348 | if (!dn->d_inode) { |
1382 | req->r_session, | 1349 | dn = splice_dentry(dn, in, NULL, false); |
1383 | req->r_request_started); | 1350 | if (IS_ERR(dn)) { |
1351 | err = PTR_ERR(dn); | ||
1352 | dn = NULL; | ||
1353 | goto next_item; | ||
1354 | } | ||
1355 | } | ||
1356 | |||
1357 | di = dn->d_fsdata; | ||
1358 | di->offset = ceph_make_fpos(frag, i + r_readdir_offset); | ||
1359 | |||
1360 | update_dentry_lease(dn, rinfo->dir_dlease[i], | ||
1361 | req->r_session, | ||
1362 | req->r_request_started); | ||
1384 | next_item: | 1363 | next_item: |
1385 | if (dn) | 1364 | if (dn) |
1386 | dput(dn); | 1365 | dput(dn); |
1387 | } | 1366 | } |
1388 | req->r_did_prepopulate = true; | 1367 | if (err == 0) |
1368 | req->r_did_prepopulate = true; | ||
1389 | 1369 | ||
1390 | out: | 1370 | out: |
1391 | if (snapdir) { | 1371 | if (snapdir) { |
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index aa3397620342..2c29db6a247e 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
@@ -477,9 +477,10 @@ extern int CIFSGetExtAttr(const unsigned int xid, struct cifs_tcon *tcon, | |||
477 | const int netfid, __u64 *pExtAttrBits, __u64 *pMask); | 477 | const int netfid, __u64 *pExtAttrBits, __u64 *pMask); |
478 | extern void cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb); | 478 | extern void cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb); |
479 | extern bool CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr); | 479 | extern bool CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr); |
480 | extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr, | 480 | extern int CIFSCheckMFSymlink(unsigned int xid, struct cifs_tcon *tcon, |
481 | const unsigned char *path, | 481 | struct cifs_sb_info *cifs_sb, |
482 | struct cifs_sb_info *cifs_sb, unsigned int xid); | 482 | struct cifs_fattr *fattr, |
483 | const unsigned char *path); | ||
483 | extern int mdfour(unsigned char *, unsigned char *, int); | 484 | extern int mdfour(unsigned char *, unsigned char *, int); |
484 | extern int E_md4hash(const unsigned char *passwd, unsigned char *p16, | 485 | extern int E_md4hash(const unsigned char *passwd, unsigned char *p16, |
485 | const struct nls_table *codepage); | 486 | const struct nls_table *codepage); |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 124aa0230c1b..d707edb6b852 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -4010,7 +4010,7 @@ QFileInfoRetry: | |||
4010 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, | 4010 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, |
4011 | (struct smb_hdr *) pSMBr, &bytes_returned, 0); | 4011 | (struct smb_hdr *) pSMBr, &bytes_returned, 0); |
4012 | if (rc) { | 4012 | if (rc) { |
4013 | cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc); | 4013 | cifs_dbg(FYI, "Send error in QFileInfo = %d", rc); |
4014 | } else { /* decode response */ | 4014 | } else { /* decode response */ |
4015 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); | 4015 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); |
4016 | 4016 | ||
@@ -4179,7 +4179,7 @@ UnixQFileInfoRetry: | |||
4179 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, | 4179 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, |
4180 | (struct smb_hdr *) pSMBr, &bytes_returned, 0); | 4180 | (struct smb_hdr *) pSMBr, &bytes_returned, 0); |
4181 | if (rc) { | 4181 | if (rc) { |
4182 | cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc); | 4182 | cifs_dbg(FYI, "Send error in UnixQFileInfo = %d", rc); |
4183 | } else { /* decode response */ | 4183 | } else { /* decode response */ |
4184 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); | 4184 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); |
4185 | 4185 | ||
@@ -4263,7 +4263,7 @@ UnixQPathInfoRetry: | |||
4263 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, | 4263 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, |
4264 | (struct smb_hdr *) pSMBr, &bytes_returned, 0); | 4264 | (struct smb_hdr *) pSMBr, &bytes_returned, 0); |
4265 | if (rc) { | 4265 | if (rc) { |
4266 | cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc); | 4266 | cifs_dbg(FYI, "Send error in UnixQPathInfo = %d", rc); |
4267 | } else { /* decode response */ | 4267 | } else { /* decode response */ |
4268 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); | 4268 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); |
4269 | 4269 | ||
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 11ff5f116b20..a514e0a65f69 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
@@ -193,7 +193,7 @@ check_name(struct dentry *direntry) | |||
193 | static int | 193 | static int |
194 | cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, | 194 | cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, |
195 | struct tcon_link *tlink, unsigned oflags, umode_t mode, | 195 | struct tcon_link *tlink, unsigned oflags, umode_t mode, |
196 | __u32 *oplock, struct cifs_fid *fid, int *created) | 196 | __u32 *oplock, struct cifs_fid *fid) |
197 | { | 197 | { |
198 | int rc = -ENOENT; | 198 | int rc = -ENOENT; |
199 | int create_options = CREATE_NOT_DIR; | 199 | int create_options = CREATE_NOT_DIR; |
@@ -349,7 +349,6 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, | |||
349 | .device = 0, | 349 | .device = 0, |
350 | }; | 350 | }; |
351 | 351 | ||
352 | *created |= FILE_CREATED; | ||
353 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { | 352 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { |
354 | args.uid = current_fsuid(); | 353 | args.uid = current_fsuid(); |
355 | if (inode->i_mode & S_ISGID) | 354 | if (inode->i_mode & S_ISGID) |
@@ -480,13 +479,16 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, | |||
480 | cifs_add_pending_open(&fid, tlink, &open); | 479 | cifs_add_pending_open(&fid, tlink, &open); |
481 | 480 | ||
482 | rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, | 481 | rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, |
483 | &oplock, &fid, opened); | 482 | &oplock, &fid); |
484 | 483 | ||
485 | if (rc) { | 484 | if (rc) { |
486 | cifs_del_pending_open(&open); | 485 | cifs_del_pending_open(&open); |
487 | goto out; | 486 | goto out; |
488 | } | 487 | } |
489 | 488 | ||
489 | if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) | ||
490 | *opened |= FILE_CREATED; | ||
491 | |||
490 | rc = finish_open(file, direntry, generic_file_open, opened); | 492 | rc = finish_open(file, direntry, generic_file_open, opened); |
491 | if (rc) { | 493 | if (rc) { |
492 | if (server->ops->close) | 494 | if (server->ops->close) |
@@ -529,7 +531,6 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, | |||
529 | struct TCP_Server_Info *server; | 531 | struct TCP_Server_Info *server; |
530 | struct cifs_fid fid; | 532 | struct cifs_fid fid; |
531 | __u32 oplock; | 533 | __u32 oplock; |
532 | int created = FILE_CREATED; | ||
533 | 534 | ||
534 | cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p\n", | 535 | cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p\n", |
535 | inode, direntry->d_name.name, direntry); | 536 | inode, direntry->d_name.name, direntry); |
@@ -546,7 +547,7 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, | |||
546 | server->ops->new_lease_key(&fid); | 547 | server->ops->new_lease_key(&fid); |
547 | 548 | ||
548 | rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, | 549 | rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, |
549 | &oplock, &fid, &created); | 550 | &oplock, &fid); |
550 | if (!rc && server->ops->close) | 551 | if (!rc && server->ops->close) |
551 | server->ops->close(xid, tcon, &fid); | 552 | server->ops->close(xid, tcon, &fid); |
552 | 553 | ||
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 36f9ebb93ceb..49719b8228e5 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -383,7 +383,8 @@ int cifs_get_inode_info_unix(struct inode **pinode, | |||
383 | 383 | ||
384 | /* check for Minshall+French symlinks */ | 384 | /* check for Minshall+French symlinks */ |
385 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { | 385 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { |
386 | int tmprc = CIFSCheckMFSymlink(&fattr, full_path, cifs_sb, xid); | 386 | int tmprc = CIFSCheckMFSymlink(xid, tcon, cifs_sb, &fattr, |
387 | full_path); | ||
387 | if (tmprc) | 388 | if (tmprc) |
388 | cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc); | 389 | cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc); |
389 | } | 390 | } |
@@ -799,7 +800,8 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, | |||
799 | 800 | ||
800 | /* check for Minshall+French symlinks */ | 801 | /* check for Minshall+French symlinks */ |
801 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { | 802 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { |
802 | tmprc = CIFSCheckMFSymlink(&fattr, full_path, cifs_sb, xid); | 803 | tmprc = CIFSCheckMFSymlink(xid, tcon, cifs_sb, &fattr, |
804 | full_path); | ||
803 | if (tmprc) | 805 | if (tmprc) |
804 | cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc); | 806 | cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc); |
805 | } | 807 | } |
diff --git a/fs/cifs/link.c b/fs/cifs/link.c index cc0234710ddb..92aee08483a5 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c | |||
@@ -354,34 +354,30 @@ open_query_close_cifs_symlink(const unsigned char *path, char *pbuf, | |||
354 | 354 | ||
355 | 355 | ||
356 | int | 356 | int |
357 | CIFSCheckMFSymlink(struct cifs_fattr *fattr, | 357 | CIFSCheckMFSymlink(unsigned int xid, struct cifs_tcon *tcon, |
358 | const unsigned char *path, | 358 | struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, |
359 | struct cifs_sb_info *cifs_sb, unsigned int xid) | 359 | const unsigned char *path) |
360 | { | 360 | { |
361 | int rc = 0; | 361 | int rc; |
362 | u8 *buf = NULL; | 362 | u8 *buf = NULL; |
363 | unsigned int link_len = 0; | 363 | unsigned int link_len = 0; |
364 | unsigned int bytes_read = 0; | 364 | unsigned int bytes_read = 0; |
365 | struct cifs_tcon *ptcon; | ||
366 | 365 | ||
367 | if (!CIFSCouldBeMFSymlink(fattr)) | 366 | if (!CIFSCouldBeMFSymlink(fattr)) |
368 | /* it's not a symlink */ | 367 | /* it's not a symlink */ |
369 | return 0; | 368 | return 0; |
370 | 369 | ||
371 | buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL); | 370 | buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL); |
372 | if (!buf) { | 371 | if (!buf) |
373 | rc = -ENOMEM; | 372 | return -ENOMEM; |
374 | goto out; | ||
375 | } | ||
376 | 373 | ||
377 | ptcon = tlink_tcon(cifs_sb_tlink(cifs_sb)); | 374 | if (tcon->ses->server->ops->query_mf_symlink) |
378 | if ((ptcon->ses) && (ptcon->ses->server->ops->query_mf_symlink)) | 375 | rc = tcon->ses->server->ops->query_mf_symlink(path, buf, |
379 | rc = ptcon->ses->server->ops->query_mf_symlink(path, buf, | 376 | &bytes_read, cifs_sb, xid); |
380 | &bytes_read, cifs_sb, xid); | ||
381 | else | 377 | else |
382 | goto out; | 378 | rc = -ENOSYS; |
383 | 379 | ||
384 | if (rc != 0) | 380 | if (rc) |
385 | goto out; | 381 | goto out; |
386 | 382 | ||
387 | if (bytes_read == 0) /* not a symlink */ | 383 | if (bytes_read == 0) /* not a symlink */ |
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 8b5e2584c840..af903128891c 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -1907,10 +1907,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1907 | } | 1907 | } |
1908 | } | 1908 | } |
1909 | } | 1909 | } |
1910 | if (op == EPOLL_CTL_DEL && is_file_epoll(tf.file)) { | ||
1911 | tep = tf.file->private_data; | ||
1912 | mutex_lock_nested(&tep->mtx, 1); | ||
1913 | } | ||
1914 | 1910 | ||
1915 | /* | 1911 | /* |
1916 | * Try to lookup the file inside our RB tree, Since we grabbed "mtx" | 1912 | * Try to lookup the file inside our RB tree, Since we grabbed "mtx" |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 288534920fe5..20d6697bd638 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -1493,6 +1493,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type, | |||
1493 | sb->s_blocksize - offset : towrite; | 1493 | sb->s_blocksize - offset : towrite; |
1494 | 1494 | ||
1495 | tmp_bh.b_state = 0; | 1495 | tmp_bh.b_state = 0; |
1496 | tmp_bh.b_size = sb->s_blocksize; | ||
1496 | err = ext2_get_block(inode, blk, &tmp_bh, 1); | 1497 | err = ext2_get_block(inode, blk, &tmp_bh, 1); |
1497 | if (err < 0) | 1498 | if (err < 0) |
1498 | goto out; | 1499 | goto out; |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index e6185031c1cc..ece55565b9cd 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -268,6 +268,16 @@ struct ext4_io_submit { | |||
268 | /* Translate # of blks to # of clusters */ | 268 | /* Translate # of blks to # of clusters */ |
269 | #define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \ | 269 | #define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \ |
270 | (sbi)->s_cluster_bits) | 270 | (sbi)->s_cluster_bits) |
271 | /* Mask out the low bits to get the starting block of the cluster */ | ||
272 | #define EXT4_PBLK_CMASK(s, pblk) ((pblk) & \ | ||
273 | ~((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) | ||
274 | #define EXT4_LBLK_CMASK(s, lblk) ((lblk) & \ | ||
275 | ~((ext4_lblk_t) (s)->s_cluster_ratio - 1)) | ||
276 | /* Get the cluster offset */ | ||
277 | #define EXT4_PBLK_COFF(s, pblk) ((pblk) & \ | ||
278 | ((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) | ||
279 | #define EXT4_LBLK_COFF(s, lblk) ((lblk) & \ | ||
280 | ((ext4_lblk_t) (s)->s_cluster_ratio - 1)) | ||
271 | 281 | ||
272 | /* | 282 | /* |
273 | * Structure of a blocks group descriptor | 283 | * Structure of a blocks group descriptor |
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 17ac112ab101..3fe29de832c8 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
@@ -259,6 +259,15 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, | |||
259 | if (WARN_ON_ONCE(err)) { | 259 | if (WARN_ON_ONCE(err)) { |
260 | ext4_journal_abort_handle(where, line, __func__, bh, | 260 | ext4_journal_abort_handle(where, line, __func__, bh, |
261 | handle, err); | 261 | handle, err); |
262 | ext4_error_inode(inode, where, line, | ||
263 | bh->b_blocknr, | ||
264 | "journal_dirty_metadata failed: " | ||
265 | "handle type %u started at line %u, " | ||
266 | "credits %u/%u, errcode %d", | ||
267 | handle->h_type, | ||
268 | handle->h_line_no, | ||
269 | handle->h_requested_credits, | ||
270 | handle->h_buffer_credits, err); | ||
262 | } | 271 | } |
263 | } else { | 272 | } else { |
264 | if (inode) | 273 | if (inode) |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 35f65cf4f318..3384dc4bed40 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -360,8 +360,10 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) | |||
360 | { | 360 | { |
361 | ext4_fsblk_t block = ext4_ext_pblock(ext); | 361 | ext4_fsblk_t block = ext4_ext_pblock(ext); |
362 | int len = ext4_ext_get_actual_len(ext); | 362 | int len = ext4_ext_get_actual_len(ext); |
363 | ext4_lblk_t lblock = le32_to_cpu(ext->ee_block); | ||
364 | ext4_lblk_t last = lblock + len - 1; | ||
363 | 365 | ||
364 | if (len == 0) | 366 | if (lblock > last) |
365 | return 0; | 367 | return 0; |
366 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); | 368 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); |
367 | } | 369 | } |
@@ -387,11 +389,26 @@ static int ext4_valid_extent_entries(struct inode *inode, | |||
387 | if (depth == 0) { | 389 | if (depth == 0) { |
388 | /* leaf entries */ | 390 | /* leaf entries */ |
389 | struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); | 391 | struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); |
392 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | ||
393 | ext4_fsblk_t pblock = 0; | ||
394 | ext4_lblk_t lblock = 0; | ||
395 | ext4_lblk_t prev = 0; | ||
396 | int len = 0; | ||
390 | while (entries) { | 397 | while (entries) { |
391 | if (!ext4_valid_extent(inode, ext)) | 398 | if (!ext4_valid_extent(inode, ext)) |
392 | return 0; | 399 | return 0; |
400 | |||
401 | /* Check for overlapping extents */ | ||
402 | lblock = le32_to_cpu(ext->ee_block); | ||
403 | len = ext4_ext_get_actual_len(ext); | ||
404 | if ((lblock <= prev) && prev) { | ||
405 | pblock = ext4_ext_pblock(ext); | ||
406 | es->s_last_error_block = cpu_to_le64(pblock); | ||
407 | return 0; | ||
408 | } | ||
393 | ext++; | 409 | ext++; |
394 | entries--; | 410 | entries--; |
411 | prev = lblock + len - 1; | ||
395 | } | 412 | } |
396 | } else { | 413 | } else { |
397 | struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); | 414 | struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); |
@@ -1834,8 +1851,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi, | |||
1834 | depth = ext_depth(inode); | 1851 | depth = ext_depth(inode); |
1835 | if (!path[depth].p_ext) | 1852 | if (!path[depth].p_ext) |
1836 | goto out; | 1853 | goto out; |
1837 | b2 = le32_to_cpu(path[depth].p_ext->ee_block); | 1854 | b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block)); |
1838 | b2 &= ~(sbi->s_cluster_ratio - 1); | ||
1839 | 1855 | ||
1840 | /* | 1856 | /* |
1841 | * get the next allocated block if the extent in the path | 1857 | * get the next allocated block if the extent in the path |
@@ -1845,7 +1861,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi, | |||
1845 | b2 = ext4_ext_next_allocated_block(path); | 1861 | b2 = ext4_ext_next_allocated_block(path); |
1846 | if (b2 == EXT_MAX_BLOCKS) | 1862 | if (b2 == EXT_MAX_BLOCKS) |
1847 | goto out; | 1863 | goto out; |
1848 | b2 &= ~(sbi->s_cluster_ratio - 1); | 1864 | b2 = EXT4_LBLK_CMASK(sbi, b2); |
1849 | } | 1865 | } |
1850 | 1866 | ||
1851 | /* check for wrap through zero on extent logical start block*/ | 1867 | /* check for wrap through zero on extent logical start block*/ |
@@ -2504,7 +2520,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2504 | * extent, we have to mark the cluster as used (store negative | 2520 | * extent, we have to mark the cluster as used (store negative |
2505 | * cluster number in partial_cluster). | 2521 | * cluster number in partial_cluster). |
2506 | */ | 2522 | */ |
2507 | unaligned = pblk & (sbi->s_cluster_ratio - 1); | 2523 | unaligned = EXT4_PBLK_COFF(sbi, pblk); |
2508 | if (unaligned && (ee_len == num) && | 2524 | if (unaligned && (ee_len == num) && |
2509 | (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk)))) | 2525 | (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk)))) |
2510 | *partial_cluster = EXT4_B2C(sbi, pblk); | 2526 | *partial_cluster = EXT4_B2C(sbi, pblk); |
@@ -2598,7 +2614,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2598 | * accidentally freeing it later on | 2614 | * accidentally freeing it later on |
2599 | */ | 2615 | */ |
2600 | pblk = ext4_ext_pblock(ex); | 2616 | pblk = ext4_ext_pblock(ex); |
2601 | if (pblk & (sbi->s_cluster_ratio - 1)) | 2617 | if (EXT4_PBLK_COFF(sbi, pblk)) |
2602 | *partial_cluster = | 2618 | *partial_cluster = |
2603 | -((long long)EXT4_B2C(sbi, pblk)); | 2619 | -((long long)EXT4_B2C(sbi, pblk)); |
2604 | ex--; | 2620 | ex--; |
@@ -3753,7 +3769,7 @@ int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk) | |||
3753 | { | 3769 | { |
3754 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 3770 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
3755 | ext4_lblk_t lblk_start, lblk_end; | 3771 | ext4_lblk_t lblk_start, lblk_end; |
3756 | lblk_start = lblk & (~(sbi->s_cluster_ratio - 1)); | 3772 | lblk_start = EXT4_LBLK_CMASK(sbi, lblk); |
3757 | lblk_end = lblk_start + sbi->s_cluster_ratio - 1; | 3773 | lblk_end = lblk_start + sbi->s_cluster_ratio - 1; |
3758 | 3774 | ||
3759 | return ext4_find_delalloc_range(inode, lblk_start, lblk_end); | 3775 | return ext4_find_delalloc_range(inode, lblk_start, lblk_end); |
@@ -3812,9 +3828,9 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, | |||
3812 | trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks); | 3828 | trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks); |
3813 | 3829 | ||
3814 | /* Check towards left side */ | 3830 | /* Check towards left side */ |
3815 | c_offset = lblk_start & (sbi->s_cluster_ratio - 1); | 3831 | c_offset = EXT4_LBLK_COFF(sbi, lblk_start); |
3816 | if (c_offset) { | 3832 | if (c_offset) { |
3817 | lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1)); | 3833 | lblk_from = EXT4_LBLK_CMASK(sbi, lblk_start); |
3818 | lblk_to = lblk_from + c_offset - 1; | 3834 | lblk_to = lblk_from + c_offset - 1; |
3819 | 3835 | ||
3820 | if (ext4_find_delalloc_range(inode, lblk_from, lblk_to)) | 3836 | if (ext4_find_delalloc_range(inode, lblk_from, lblk_to)) |
@@ -3822,7 +3838,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, | |||
3822 | } | 3838 | } |
3823 | 3839 | ||
3824 | /* Now check towards right. */ | 3840 | /* Now check towards right. */ |
3825 | c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1); | 3841 | c_offset = EXT4_LBLK_COFF(sbi, lblk_start + num_blks); |
3826 | if (allocated_clusters && c_offset) { | 3842 | if (allocated_clusters && c_offset) { |
3827 | lblk_from = lblk_start + num_blks; | 3843 | lblk_from = lblk_start + num_blks; |
3828 | lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1; | 3844 | lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1; |
@@ -4030,7 +4046,7 @@ static int get_implied_cluster_alloc(struct super_block *sb, | |||
4030 | struct ext4_ext_path *path) | 4046 | struct ext4_ext_path *path) |
4031 | { | 4047 | { |
4032 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4048 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
4033 | ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1); | 4049 | ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk); |
4034 | ext4_lblk_t ex_cluster_start, ex_cluster_end; | 4050 | ext4_lblk_t ex_cluster_start, ex_cluster_end; |
4035 | ext4_lblk_t rr_cluster_start; | 4051 | ext4_lblk_t rr_cluster_start; |
4036 | ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); | 4052 | ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); |
@@ -4048,8 +4064,7 @@ static int get_implied_cluster_alloc(struct super_block *sb, | |||
4048 | (rr_cluster_start == ex_cluster_start)) { | 4064 | (rr_cluster_start == ex_cluster_start)) { |
4049 | if (rr_cluster_start == ex_cluster_end) | 4065 | if (rr_cluster_start == ex_cluster_end) |
4050 | ee_start += ee_len - 1; | 4066 | ee_start += ee_len - 1; |
4051 | map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) + | 4067 | map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset; |
4052 | c_offset; | ||
4053 | map->m_len = min(map->m_len, | 4068 | map->m_len = min(map->m_len, |
4054 | (unsigned) sbi->s_cluster_ratio - c_offset); | 4069 | (unsigned) sbi->s_cluster_ratio - c_offset); |
4055 | /* | 4070 | /* |
@@ -4203,7 +4218,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4203 | */ | 4218 | */ |
4204 | map->m_flags &= ~EXT4_MAP_FROM_CLUSTER; | 4219 | map->m_flags &= ~EXT4_MAP_FROM_CLUSTER; |
4205 | newex.ee_block = cpu_to_le32(map->m_lblk); | 4220 | newex.ee_block = cpu_to_le32(map->m_lblk); |
4206 | cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1); | 4221 | cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk); |
4207 | 4222 | ||
4208 | /* | 4223 | /* |
4209 | * If we are doing bigalloc, check to see if the extent returned | 4224 | * If we are doing bigalloc, check to see if the extent returned |
@@ -4271,7 +4286,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4271 | * needed so that future calls to get_implied_cluster_alloc() | 4286 | * needed so that future calls to get_implied_cluster_alloc() |
4272 | * work correctly. | 4287 | * work correctly. |
4273 | */ | 4288 | */ |
4274 | offset = map->m_lblk & (sbi->s_cluster_ratio - 1); | 4289 | offset = EXT4_LBLK_COFF(sbi, map->m_lblk); |
4275 | ar.len = EXT4_NUM_B2C(sbi, offset+allocated); | 4290 | ar.len = EXT4_NUM_B2C(sbi, offset+allocated); |
4276 | ar.goal -= offset; | 4291 | ar.goal -= offset; |
4277 | ar.logical -= offset; | 4292 | ar.logical -= offset; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 075763474118..61d49ff22c81 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -1206,7 +1206,6 @@ static int ext4_journalled_write_end(struct file *file, | |||
1206 | */ | 1206 | */ |
1207 | static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock) | 1207 | static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock) |
1208 | { | 1208 | { |
1209 | int retries = 0; | ||
1210 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1209 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1211 | struct ext4_inode_info *ei = EXT4_I(inode); | 1210 | struct ext4_inode_info *ei = EXT4_I(inode); |
1212 | unsigned int md_needed; | 1211 | unsigned int md_needed; |
@@ -1218,7 +1217,6 @@ static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock) | |||
1218 | * in order to allocate nrblocks | 1217 | * in order to allocate nrblocks |
1219 | * worse case is one extent per block | 1218 | * worse case is one extent per block |
1220 | */ | 1219 | */ |
1221 | repeat: | ||
1222 | spin_lock(&ei->i_block_reservation_lock); | 1220 | spin_lock(&ei->i_block_reservation_lock); |
1223 | /* | 1221 | /* |
1224 | * ext4_calc_metadata_amount() has side effects, which we have | 1222 | * ext4_calc_metadata_amount() has side effects, which we have |
@@ -1238,10 +1236,6 @@ repeat: | |||
1238 | ei->i_da_metadata_calc_len = save_len; | 1236 | ei->i_da_metadata_calc_len = save_len; |
1239 | ei->i_da_metadata_calc_last_lblock = save_last_lblock; | 1237 | ei->i_da_metadata_calc_last_lblock = save_last_lblock; |
1240 | spin_unlock(&ei->i_block_reservation_lock); | 1238 | spin_unlock(&ei->i_block_reservation_lock); |
1241 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | ||
1242 | cond_resched(); | ||
1243 | goto repeat; | ||
1244 | } | ||
1245 | return -ENOSPC; | 1239 | return -ENOSPC; |
1246 | } | 1240 | } |
1247 | ei->i_reserved_meta_blocks += md_needed; | 1241 | ei->i_reserved_meta_blocks += md_needed; |
@@ -1255,7 +1249,6 @@ repeat: | |||
1255 | */ | 1249 | */ |
1256 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) | 1250 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) |
1257 | { | 1251 | { |
1258 | int retries = 0; | ||
1259 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1252 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1260 | struct ext4_inode_info *ei = EXT4_I(inode); | 1253 | struct ext4_inode_info *ei = EXT4_I(inode); |
1261 | unsigned int md_needed; | 1254 | unsigned int md_needed; |
@@ -1277,7 +1270,6 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) | |||
1277 | * in order to allocate nrblocks | 1270 | * in order to allocate nrblocks |
1278 | * worse case is one extent per block | 1271 | * worse case is one extent per block |
1279 | */ | 1272 | */ |
1280 | repeat: | ||
1281 | spin_lock(&ei->i_block_reservation_lock); | 1273 | spin_lock(&ei->i_block_reservation_lock); |
1282 | /* | 1274 | /* |
1283 | * ext4_calc_metadata_amount() has side effects, which we have | 1275 | * ext4_calc_metadata_amount() has side effects, which we have |
@@ -1297,10 +1289,6 @@ repeat: | |||
1297 | ei->i_da_metadata_calc_len = save_len; | 1289 | ei->i_da_metadata_calc_len = save_len; |
1298 | ei->i_da_metadata_calc_last_lblock = save_last_lblock; | 1290 | ei->i_da_metadata_calc_last_lblock = save_last_lblock; |
1299 | spin_unlock(&ei->i_block_reservation_lock); | 1291 | spin_unlock(&ei->i_block_reservation_lock); |
1300 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | ||
1301 | cond_resched(); | ||
1302 | goto repeat; | ||
1303 | } | ||
1304 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); | 1292 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); |
1305 | return -ENOSPC; | 1293 | return -ENOSPC; |
1306 | } | 1294 | } |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 4d113efa024c..04a5c7504be9 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -3442,6 +3442,9 @@ static void ext4_mb_pa_callback(struct rcu_head *head) | |||
3442 | { | 3442 | { |
3443 | struct ext4_prealloc_space *pa; | 3443 | struct ext4_prealloc_space *pa; |
3444 | pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu); | 3444 | pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu); |
3445 | |||
3446 | BUG_ON(atomic_read(&pa->pa_count)); | ||
3447 | BUG_ON(pa->pa_deleted == 0); | ||
3445 | kmem_cache_free(ext4_pspace_cachep, pa); | 3448 | kmem_cache_free(ext4_pspace_cachep, pa); |
3446 | } | 3449 | } |
3447 | 3450 | ||
@@ -3455,11 +3458,13 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac, | |||
3455 | ext4_group_t grp; | 3458 | ext4_group_t grp; |
3456 | ext4_fsblk_t grp_blk; | 3459 | ext4_fsblk_t grp_blk; |
3457 | 3460 | ||
3458 | if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) | ||
3459 | return; | ||
3460 | |||
3461 | /* in this short window concurrent discard can set pa_deleted */ | 3461 | /* in this short window concurrent discard can set pa_deleted */ |
3462 | spin_lock(&pa->pa_lock); | 3462 | spin_lock(&pa->pa_lock); |
3463 | if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) { | ||
3464 | spin_unlock(&pa->pa_lock); | ||
3465 | return; | ||
3466 | } | ||
3467 | |||
3463 | if (pa->pa_deleted == 1) { | 3468 | if (pa->pa_deleted == 1) { |
3464 | spin_unlock(&pa->pa_lock); | 3469 | spin_unlock(&pa->pa_lock); |
3465 | return; | 3470 | return; |
@@ -4121,7 +4126,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, | |||
4121 | ext4_get_group_no_and_offset(sb, goal, &group, &block); | 4126 | ext4_get_group_no_and_offset(sb, goal, &group, &block); |
4122 | 4127 | ||
4123 | /* set up allocation goals */ | 4128 | /* set up allocation goals */ |
4124 | ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1); | 4129 | ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical); |
4125 | ac->ac_status = AC_STATUS_CONTINUE; | 4130 | ac->ac_status = AC_STATUS_CONTINUE; |
4126 | ac->ac_sb = sb; | 4131 | ac->ac_sb = sb; |
4127 | ac->ac_inode = ar->inode; | 4132 | ac->ac_inode = ar->inode; |
@@ -4663,7 +4668,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4663 | * blocks at the beginning or the end unless we are explicitly | 4668 | * blocks at the beginning or the end unless we are explicitly |
4664 | * requested to avoid doing so. | 4669 | * requested to avoid doing so. |
4665 | */ | 4670 | */ |
4666 | overflow = block & (sbi->s_cluster_ratio - 1); | 4671 | overflow = EXT4_PBLK_COFF(sbi, block); |
4667 | if (overflow) { | 4672 | if (overflow) { |
4668 | if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { | 4673 | if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { |
4669 | overflow = sbi->s_cluster_ratio - overflow; | 4674 | overflow = sbi->s_cluster_ratio - overflow; |
@@ -4677,7 +4682,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4677 | count += overflow; | 4682 | count += overflow; |
4678 | } | 4683 | } |
4679 | } | 4684 | } |
4680 | overflow = count & (sbi->s_cluster_ratio - 1); | 4685 | overflow = EXT4_LBLK_COFF(sbi, count); |
4681 | if (overflow) { | 4686 | if (overflow) { |
4682 | if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { | 4687 | if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { |
4683 | if (count > overflow) | 4688 | if (count > overflow) |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c977f4e4e63b..1f7784de05b6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -792,7 +792,7 @@ static void ext4_put_super(struct super_block *sb) | |||
792 | } | 792 | } |
793 | 793 | ||
794 | ext4_es_unregister_shrinker(sbi); | 794 | ext4_es_unregister_shrinker(sbi); |
795 | del_timer(&sbi->s_err_report); | 795 | del_timer_sync(&sbi->s_err_report); |
796 | ext4_release_system_zone(sb); | 796 | ext4_release_system_zone(sb); |
797 | ext4_mb_release(sb); | 797 | ext4_mb_release(sb); |
798 | ext4_ext_release(sb); | 798 | ext4_ext_release(sb); |
@@ -3316,11 +3316,19 @@ int ext4_calculate_overhead(struct super_block *sb) | |||
3316 | } | 3316 | } |
3317 | 3317 | ||
3318 | 3318 | ||
3319 | static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi) | 3319 | static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb) |
3320 | { | 3320 | { |
3321 | ext4_fsblk_t resv_clusters; | 3321 | ext4_fsblk_t resv_clusters; |
3322 | 3322 | ||
3323 | /* | 3323 | /* |
3324 | * There's no need to reserve anything when we aren't using extents. | ||
3325 | * The space estimates are exact, there are no unwritten extents, | ||
3326 | * hole punching doesn't need new metadata... This is needed especially | ||
3327 | * to keep ext2/3 backward compatibility. | ||
3328 | */ | ||
3329 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) | ||
3330 | return 0; | ||
3331 | /* | ||
3324 | * By default we reserve 2% or 4096 clusters, whichever is smaller. | 3332 | * By default we reserve 2% or 4096 clusters, whichever is smaller. |
3325 | * This should cover the situations where we can not afford to run | 3333 | * This should cover the situations where we can not afford to run |
3326 | * out of space like for example punch hole, or converting | 3334 | * out of space like for example punch hole, or converting |
@@ -3328,7 +3336,8 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi) | |||
3328 | * allocation would require 1, or 2 blocks, higher numbers are | 3336 | * allocation would require 1, or 2 blocks, higher numbers are |
3329 | * very rare. | 3337 | * very rare. |
3330 | */ | 3338 | */ |
3331 | resv_clusters = ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits; | 3339 | resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >> |
3340 | EXT4_SB(sb)->s_cluster_bits; | ||
3332 | 3341 | ||
3333 | do_div(resv_clusters, 50); | 3342 | do_div(resv_clusters, 50); |
3334 | resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096); | 3343 | resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096); |
@@ -4071,10 +4080,10 @@ no_journal: | |||
4071 | "available"); | 4080 | "available"); |
4072 | } | 4081 | } |
4073 | 4082 | ||
4074 | err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sbi)); | 4083 | err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb)); |
4075 | if (err) { | 4084 | if (err) { |
4076 | ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " | 4085 | ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " |
4077 | "reserved pool", ext4_calculate_resv_clusters(sbi)); | 4086 | "reserved pool", ext4_calculate_resv_clusters(sb)); |
4078 | goto failed_mount4a; | 4087 | goto failed_mount4a; |
4079 | } | 4088 | } |
4080 | 4089 | ||
@@ -4184,7 +4193,7 @@ failed_mount_wq: | |||
4184 | } | 4193 | } |
4185 | failed_mount3: | 4194 | failed_mount3: |
4186 | ext4_es_unregister_shrinker(sbi); | 4195 | ext4_es_unregister_shrinker(sbi); |
4187 | del_timer(&sbi->s_err_report); | 4196 | del_timer_sync(&sbi->s_err_report); |
4188 | if (sbi->s_flex_groups) | 4197 | if (sbi->s_flex_groups) |
4189 | ext4_kvfree(sbi->s_flex_groups); | 4198 | ext4_kvfree(sbi->s_flex_groups); |
4190 | percpu_counter_destroy(&sbi->s_freeclusters_counter); | 4199 | percpu_counter_destroy(&sbi->s_freeclusters_counter); |
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index b7fc035a6943..73f3e4ee4037 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -986,6 +986,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | |||
986 | { | 986 | { |
987 | struct file *file = iocb->ki_filp; | 987 | struct file *file = iocb->ki_filp; |
988 | struct inode *inode = file->f_mapping->host; | 988 | struct inode *inode = file->f_mapping->host; |
989 | struct address_space *mapping = inode->i_mapping; | ||
989 | struct gfs2_inode *ip = GFS2_I(inode); | 990 | struct gfs2_inode *ip = GFS2_I(inode); |
990 | struct gfs2_holder gh; | 991 | struct gfs2_holder gh; |
991 | int rv; | 992 | int rv; |
@@ -1006,6 +1007,35 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | |||
1006 | if (rv != 1) | 1007 | if (rv != 1) |
1007 | goto out; /* dio not valid, fall back to buffered i/o */ | 1008 | goto out; /* dio not valid, fall back to buffered i/o */ |
1008 | 1009 | ||
1010 | /* | ||
1011 | * Now since we are holding a deferred (CW) lock at this point, you | ||
1012 | * might be wondering why this is ever needed. There is a case however | ||
1013 | * where we've granted a deferred local lock against a cached exclusive | ||
1014 | * glock. That is ok provided all granted local locks are deferred, but | ||
1015 | * it also means that it is possible to encounter pages which are | ||
1016 | * cached and possibly also mapped. So here we check for that and sort | ||
1017 | * them out ahead of the dio. The glock state machine will take care of | ||
1018 | * everything else. | ||
1019 | * | ||
1020 | * If in fact the cached glock state (gl->gl_state) is deferred (CW) in | ||
1021 | * the first place, mapping->nr_pages will always be zero. | ||
1022 | */ | ||
1023 | if (mapping->nrpages) { | ||
1024 | loff_t lstart = offset & (PAGE_CACHE_SIZE - 1); | ||
1025 | loff_t len = iov_length(iov, nr_segs); | ||
1026 | loff_t end = PAGE_ALIGN(offset + len) - 1; | ||
1027 | |||
1028 | rv = 0; | ||
1029 | if (len == 0) | ||
1030 | goto out; | ||
1031 | if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) | ||
1032 | unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len); | ||
1033 | rv = filemap_write_and_wait_range(mapping, lstart, end); | ||
1034 | if (rv) | ||
1035 | return rv; | ||
1036 | truncate_inode_pages_range(mapping, lstart, end); | ||
1037 | } | ||
1038 | |||
1009 | rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 1039 | rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, |
1010 | offset, nr_segs, gfs2_get_block_direct, | 1040 | offset, nr_segs, gfs2_get_block_direct, |
1011 | NULL, NULL, 0); | 1041 | NULL, NULL, 0); |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index c8420f7e4db6..6f7a47c05259 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -1655,6 +1655,7 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) | |||
1655 | struct task_struct *gh_owner = NULL; | 1655 | struct task_struct *gh_owner = NULL; |
1656 | char flags_buf[32]; | 1656 | char flags_buf[32]; |
1657 | 1657 | ||
1658 | rcu_read_lock(); | ||
1658 | if (gh->gh_owner_pid) | 1659 | if (gh->gh_owner_pid) |
1659 | gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); | 1660 | gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); |
1660 | gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n", | 1661 | gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n", |
@@ -1664,6 +1665,7 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) | |||
1664 | gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, | 1665 | gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, |
1665 | gh_owner ? gh_owner->comm : "(ended)", | 1666 | gh_owner ? gh_owner->comm : "(ended)", |
1666 | (void *)gh->gh_ip); | 1667 | (void *)gh->gh_ip); |
1668 | rcu_read_unlock(); | ||
1667 | return 0; | 1669 | return 0; |
1668 | } | 1670 | } |
1669 | 1671 | ||
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index db908f697139..f88dcd925010 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -192,8 +192,11 @@ static void inode_go_sync(struct gfs2_glock *gl) | |||
192 | 192 | ||
193 | if (ip && !S_ISREG(ip->i_inode.i_mode)) | 193 | if (ip && !S_ISREG(ip->i_inode.i_mode)) |
194 | ip = NULL; | 194 | ip = NULL; |
195 | if (ip && test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) | 195 | if (ip) { |
196 | unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0); | 196 | if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) |
197 | unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0); | ||
198 | inode_dio_wait(&ip->i_inode); | ||
199 | } | ||
197 | if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) | 200 | if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) |
198 | return; | 201 | return; |
199 | 202 | ||
@@ -410,6 +413,9 @@ static int inode_go_lock(struct gfs2_holder *gh) | |||
410 | return error; | 413 | return error; |
411 | } | 414 | } |
412 | 415 | ||
416 | if (gh->gh_state != LM_ST_DEFERRED) | ||
417 | inode_dio_wait(&ip->i_inode); | ||
418 | |||
413 | if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) && | 419 | if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) && |
414 | (gl->gl_state == LM_ST_EXCLUSIVE) && | 420 | (gl->gl_state == LM_ST_EXCLUSIVE) && |
415 | (gh->gh_state == LM_ST_EXCLUSIVE)) { | 421 | (gh->gh_state == LM_ST_EXCLUSIVE)) { |
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 610613fb65b5..9dcb9777a5f8 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -551,10 +551,10 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) | |||
551 | struct buffer_head *bh = bd->bd_bh; | 551 | struct buffer_head *bh = bd->bd_bh; |
552 | struct gfs2_glock *gl = bd->bd_gl; | 552 | struct gfs2_glock *gl = bd->bd_gl; |
553 | 553 | ||
554 | gfs2_remove_from_ail(bd); | ||
555 | bd->bd_bh = NULL; | ||
556 | bh->b_private = NULL; | 554 | bh->b_private = NULL; |
557 | bd->bd_blkno = bh->b_blocknr; | 555 | bd->bd_blkno = bh->b_blocknr; |
556 | gfs2_remove_from_ail(bd); /* drops ref on bh */ | ||
557 | bd->bd_bh = NULL; | ||
558 | bd->bd_ops = &gfs2_revoke_lops; | 558 | bd->bd_ops = &gfs2_revoke_lops; |
559 | sdp->sd_log_num_revoke++; | 559 | sdp->sd_log_num_revoke++; |
560 | atomic_inc(&gl->gl_revokes); | 560 | atomic_inc(&gl->gl_revokes); |
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 932415050540..52f177be3bf8 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c | |||
@@ -258,6 +258,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int | |||
258 | struct address_space *mapping = bh->b_page->mapping; | 258 | struct address_space *mapping = bh->b_page->mapping; |
259 | struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); | 259 | struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); |
260 | struct gfs2_bufdata *bd = bh->b_private; | 260 | struct gfs2_bufdata *bd = bh->b_private; |
261 | int was_pinned = 0; | ||
261 | 262 | ||
262 | if (test_clear_buffer_pinned(bh)) { | 263 | if (test_clear_buffer_pinned(bh)) { |
263 | trace_gfs2_pin(bd, 0); | 264 | trace_gfs2_pin(bd, 0); |
@@ -273,12 +274,16 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int | |||
273 | tr->tr_num_databuf_rm++; | 274 | tr->tr_num_databuf_rm++; |
274 | } | 275 | } |
275 | tr->tr_touched = 1; | 276 | tr->tr_touched = 1; |
277 | was_pinned = 1; | ||
276 | brelse(bh); | 278 | brelse(bh); |
277 | } | 279 | } |
278 | if (bd) { | 280 | if (bd) { |
279 | spin_lock(&sdp->sd_ail_lock); | 281 | spin_lock(&sdp->sd_ail_lock); |
280 | if (bd->bd_tr) { | 282 | if (bd->bd_tr) { |
281 | gfs2_trans_add_revoke(sdp, bd); | 283 | gfs2_trans_add_revoke(sdp, bd); |
284 | } else if (was_pinned) { | ||
285 | bh->b_private = NULL; | ||
286 | kmem_cache_free(gfs2_bufdata_cachep, bd); | ||
282 | } | 287 | } |
283 | spin_unlock(&sdp->sd_ail_lock); | 288 | spin_unlock(&sdp->sd_ail_lock); |
284 | } | 289 | } |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 82303b474958..52fa88314f5c 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -1366,8 +1366,18 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, | |||
1366 | if (IS_ERR(s)) | 1366 | if (IS_ERR(s)) |
1367 | goto error_bdev; | 1367 | goto error_bdev; |
1368 | 1368 | ||
1369 | if (s->s_root) | 1369 | if (s->s_root) { |
1370 | /* | ||
1371 | * s_umount nests inside bd_mutex during | ||
1372 | * __invalidate_device(). blkdev_put() acquires | ||
1373 | * bd_mutex and can't be called under s_umount. Drop | ||
1374 | * s_umount temporarily. This is safe as we're | ||
1375 | * holding an active reference. | ||
1376 | */ | ||
1377 | up_write(&s->s_umount); | ||
1370 | blkdev_put(bdev, mode); | 1378 | blkdev_put(bdev, mode); |
1379 | down_write(&s->s_umount); | ||
1380 | } | ||
1371 | 1381 | ||
1372 | memset(&args, 0, sizeof(args)); | 1382 | memset(&args, 0, sizeof(args)); |
1373 | args.ar_quota = GFS2_QUOTA_DEFAULT; | 1383 | args.ar_quota = GFS2_QUOTA_DEFAULT; |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 52032647dd4a..5fa344afb49a 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -702,7 +702,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) | |||
702 | read_lock(&journal->j_state_lock); | 702 | read_lock(&journal->j_state_lock); |
703 | #ifdef CONFIG_JBD2_DEBUG | 703 | #ifdef CONFIG_JBD2_DEBUG |
704 | if (!tid_geq(journal->j_commit_request, tid)) { | 704 | if (!tid_geq(journal->j_commit_request, tid)) { |
705 | printk(KERN_EMERG | 705 | printk(KERN_ERR |
706 | "%s: error: j_commit_request=%d, tid=%d\n", | 706 | "%s: error: j_commit_request=%d, tid=%d\n", |
707 | __func__, journal->j_commit_request, tid); | 707 | __func__, journal->j_commit_request, tid); |
708 | } | 708 | } |
@@ -718,10 +718,8 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) | |||
718 | } | 718 | } |
719 | read_unlock(&journal->j_state_lock); | 719 | read_unlock(&journal->j_state_lock); |
720 | 720 | ||
721 | if (unlikely(is_journal_aborted(journal))) { | 721 | if (unlikely(is_journal_aborted(journal))) |
722 | printk(KERN_EMERG "journal commit I/O error\n"); | ||
723 | err = -EIO; | 722 | err = -EIO; |
724 | } | ||
725 | return err; | 723 | return err; |
726 | } | 724 | } |
727 | 725 | ||
@@ -1527,13 +1525,13 @@ static int journal_get_superblock(journal_t *journal) | |||
1527 | if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && | 1525 | if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && |
1528 | JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { | 1526 | JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { |
1529 | /* Can't have checksum v1 and v2 on at the same time! */ | 1527 | /* Can't have checksum v1 and v2 on at the same time! */ |
1530 | printk(KERN_ERR "JBD: Can't enable checksumming v1 and v2 " | 1528 | printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 " |
1531 | "at the same time!\n"); | 1529 | "at the same time!\n"); |
1532 | goto out; | 1530 | goto out; |
1533 | } | 1531 | } |
1534 | 1532 | ||
1535 | if (!jbd2_verify_csum_type(journal, sb)) { | 1533 | if (!jbd2_verify_csum_type(journal, sb)) { |
1536 | printk(KERN_ERR "JBD: Unknown checksum type\n"); | 1534 | printk(KERN_ERR "JBD2: Unknown checksum type\n"); |
1537 | goto out; | 1535 | goto out; |
1538 | } | 1536 | } |
1539 | 1537 | ||
@@ -1541,7 +1539,7 @@ static int journal_get_superblock(journal_t *journal) | |||
1541 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { | 1539 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { |
1542 | journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); | 1540 | journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); |
1543 | if (IS_ERR(journal->j_chksum_driver)) { | 1541 | if (IS_ERR(journal->j_chksum_driver)) { |
1544 | printk(KERN_ERR "JBD: Cannot load crc32c driver.\n"); | 1542 | printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); |
1545 | err = PTR_ERR(journal->j_chksum_driver); | 1543 | err = PTR_ERR(journal->j_chksum_driver); |
1546 | journal->j_chksum_driver = NULL; | 1544 | journal->j_chksum_driver = NULL; |
1547 | goto out; | 1545 | goto out; |
@@ -1550,7 +1548,7 @@ static int journal_get_superblock(journal_t *journal) | |||
1550 | 1548 | ||
1551 | /* Check superblock checksum */ | 1549 | /* Check superblock checksum */ |
1552 | if (!jbd2_superblock_csum_verify(journal, sb)) { | 1550 | if (!jbd2_superblock_csum_verify(journal, sb)) { |
1553 | printk(KERN_ERR "JBD: journal checksum error\n"); | 1551 | printk(KERN_ERR "JBD2: journal checksum error\n"); |
1554 | goto out; | 1552 | goto out; |
1555 | } | 1553 | } |
1556 | 1554 | ||
@@ -1836,7 +1834,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, | |||
1836 | journal->j_chksum_driver = crypto_alloc_shash("crc32c", | 1834 | journal->j_chksum_driver = crypto_alloc_shash("crc32c", |
1837 | 0, 0); | 1835 | 0, 0); |
1838 | if (IS_ERR(journal->j_chksum_driver)) { | 1836 | if (IS_ERR(journal->j_chksum_driver)) { |
1839 | printk(KERN_ERR "JBD: Cannot load crc32c " | 1837 | printk(KERN_ERR "JBD2: Cannot load crc32c " |
1840 | "driver.\n"); | 1838 | "driver.\n"); |
1841 | journal->j_chksum_driver = NULL; | 1839 | journal->j_chksum_driver = NULL; |
1842 | return 0; | 1840 | return 0; |
@@ -2645,7 +2643,7 @@ static void __exit journal_exit(void) | |||
2645 | #ifdef CONFIG_JBD2_DEBUG | 2643 | #ifdef CONFIG_JBD2_DEBUG |
2646 | int n = atomic_read(&nr_journal_heads); | 2644 | int n = atomic_read(&nr_journal_heads); |
2647 | if (n) | 2645 | if (n) |
2648 | printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n); | 2646 | printk(KERN_ERR "JBD2: leaked %d journal_heads!\n", n); |
2649 | #endif | 2647 | #endif |
2650 | jbd2_remove_jbd_stats_proc_entry(); | 2648 | jbd2_remove_jbd_stats_proc_entry(); |
2651 | jbd2_journal_destroy_caches(); | 2649 | jbd2_journal_destroy_caches(); |
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 3929c50428b1..3b6bb19d60b1 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
@@ -594,7 +594,7 @@ static int do_one_pass(journal_t *journal, | |||
594 | be32_to_cpu(tmp->h_sequence))) { | 594 | be32_to_cpu(tmp->h_sequence))) { |
595 | brelse(obh); | 595 | brelse(obh); |
596 | success = -EIO; | 596 | success = -EIO; |
597 | printk(KERN_ERR "JBD: Invalid " | 597 | printk(KERN_ERR "JBD2: Invalid " |
598 | "checksum recovering " | 598 | "checksum recovering " |
599 | "block %llu in log\n", | 599 | "block %llu in log\n", |
600 | blocknr); | 600 | blocknr); |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 7aa9a32573bb..8360674c85bc 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -932,7 +932,7 @@ repeat: | |||
932 | jbd2_alloc(jh2bh(jh)->b_size, | 932 | jbd2_alloc(jh2bh(jh)->b_size, |
933 | GFP_NOFS); | 933 | GFP_NOFS); |
934 | if (!frozen_buffer) { | 934 | if (!frozen_buffer) { |
935 | printk(KERN_EMERG | 935 | printk(KERN_ERR |
936 | "%s: OOM for frozen_buffer\n", | 936 | "%s: OOM for frozen_buffer\n", |
937 | __func__); | 937 | __func__); |
938 | JBUFFER_TRACE(jh, "oom!"); | 938 | JBUFFER_TRACE(jh, "oom!"); |
@@ -1166,7 +1166,7 @@ repeat: | |||
1166 | if (!jh->b_committed_data) { | 1166 | if (!jh->b_committed_data) { |
1167 | committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); | 1167 | committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); |
1168 | if (!committed_data) { | 1168 | if (!committed_data) { |
1169 | printk(KERN_EMERG "%s: No memory for committed data\n", | 1169 | printk(KERN_ERR "%s: No memory for committed data\n", |
1170 | __func__); | 1170 | __func__); |
1171 | err = -ENOMEM; | 1171 | err = -ENOMEM; |
1172 | goto out; | 1172 | goto out; |
@@ -1290,7 +1290,10 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
1290 | * once a transaction -bzzz | 1290 | * once a transaction -bzzz |
1291 | */ | 1291 | */ |
1292 | jh->b_modified = 1; | 1292 | jh->b_modified = 1; |
1293 | J_ASSERT_JH(jh, handle->h_buffer_credits > 0); | 1293 | if (handle->h_buffer_credits <= 0) { |
1294 | ret = -ENOSPC; | ||
1295 | goto out_unlock_bh; | ||
1296 | } | ||
1294 | handle->h_buffer_credits--; | 1297 | handle->h_buffer_credits--; |
1295 | } | 1298 | } |
1296 | 1299 | ||
@@ -1305,7 +1308,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
1305 | JBUFFER_TRACE(jh, "fastpath"); | 1308 | JBUFFER_TRACE(jh, "fastpath"); |
1306 | if (unlikely(jh->b_transaction != | 1309 | if (unlikely(jh->b_transaction != |
1307 | journal->j_running_transaction)) { | 1310 | journal->j_running_transaction)) { |
1308 | printk(KERN_EMERG "JBD: %s: " | 1311 | printk(KERN_ERR "JBD2: %s: " |
1309 | "jh->b_transaction (%llu, %p, %u) != " | 1312 | "jh->b_transaction (%llu, %p, %u) != " |
1310 | "journal->j_running_transaction (%p, %u)", | 1313 | "journal->j_running_transaction (%p, %u)", |
1311 | journal->j_devname, | 1314 | journal->j_devname, |
@@ -1332,7 +1335,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
1332 | JBUFFER_TRACE(jh, "already on other transaction"); | 1335 | JBUFFER_TRACE(jh, "already on other transaction"); |
1333 | if (unlikely(jh->b_transaction != | 1336 | if (unlikely(jh->b_transaction != |
1334 | journal->j_committing_transaction)) { | 1337 | journal->j_committing_transaction)) { |
1335 | printk(KERN_EMERG "JBD: %s: " | 1338 | printk(KERN_ERR "JBD2: %s: " |
1336 | "jh->b_transaction (%llu, %p, %u) != " | 1339 | "jh->b_transaction (%llu, %p, %u) != " |
1337 | "journal->j_committing_transaction (%p, %u)", | 1340 | "journal->j_committing_transaction (%p, %u)", |
1338 | journal->j_devname, | 1341 | journal->j_devname, |
@@ -1345,7 +1348,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
1345 | ret = -EINVAL; | 1348 | ret = -EINVAL; |
1346 | } | 1349 | } |
1347 | if (unlikely(jh->b_next_transaction != transaction)) { | 1350 | if (unlikely(jh->b_next_transaction != transaction)) { |
1348 | printk(KERN_EMERG "JBD: %s: " | 1351 | printk(KERN_ERR "JBD2: %s: " |
1349 | "jh->b_next_transaction (%llu, %p, %u) != " | 1352 | "jh->b_next_transaction (%llu, %p, %u) != " |
1350 | "transaction (%p, %u)", | 1353 | "transaction (%p, %u)", |
1351 | journal->j_devname, | 1354 | journal->j_devname, |
@@ -1373,7 +1376,6 @@ out_unlock_bh: | |||
1373 | jbd2_journal_put_journal_head(jh); | 1376 | jbd2_journal_put_journal_head(jh); |
1374 | out: | 1377 | out: |
1375 | JBUFFER_TRACE(jh, "exit"); | 1378 | JBUFFER_TRACE(jh, "exit"); |
1376 | WARN_ON(ret); /* All errors are bugs, so dump the stack */ | ||
1377 | return ret; | 1379 | return ret; |
1378 | } | 1380 | } |
1379 | 1381 | ||
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index b8e93a40a5d3..78c3c2097787 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c | |||
@@ -443,8 +443,11 @@ int pstore_register(struct pstore_info *psi) | |||
443 | pstore_get_records(0); | 443 | pstore_get_records(0); |
444 | 444 | ||
445 | kmsg_dump_register(&pstore_dumper); | 445 | kmsg_dump_register(&pstore_dumper); |
446 | pstore_register_console(); | 446 | |
447 | pstore_register_ftrace(); | 447 | if ((psi->flags & PSTORE_FLAGS_FRAGILE) == 0) { |
448 | pstore_register_console(); | ||
449 | pstore_register_ftrace(); | ||
450 | } | ||
448 | 451 | ||
449 | if (pstore_update_ms >= 0) { | 452 | if (pstore_update_ms >= 0) { |
450 | pstore_timer.expires = jiffies + | 453 | pstore_timer.expires = jiffies + |
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index b94f93685093..35e7d08fe629 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
@@ -609,7 +609,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file) | |||
609 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; | 609 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; |
610 | struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; | 610 | struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; |
611 | struct sysfs_open_file *of; | 611 | struct sysfs_open_file *of; |
612 | bool has_read, has_write, has_mmap; | 612 | bool has_read, has_write; |
613 | int error = -EACCES; | 613 | int error = -EACCES; |
614 | 614 | ||
615 | /* need attr_sd for attr and ops, its parent for kobj */ | 615 | /* need attr_sd for attr and ops, its parent for kobj */ |
@@ -621,7 +621,6 @@ static int sysfs_open_file(struct inode *inode, struct file *file) | |||
621 | 621 | ||
622 | has_read = battr->read || battr->mmap; | 622 | has_read = battr->read || battr->mmap; |
623 | has_write = battr->write || battr->mmap; | 623 | has_write = battr->write || battr->mmap; |
624 | has_mmap = battr->mmap; | ||
625 | } else { | 624 | } else { |
626 | const struct sysfs_ops *ops = sysfs_file_ops(attr_sd); | 625 | const struct sysfs_ops *ops = sysfs_file_ops(attr_sd); |
627 | 626 | ||
@@ -633,7 +632,6 @@ static int sysfs_open_file(struct inode *inode, struct file *file) | |||
633 | 632 | ||
634 | has_read = ops->show; | 633 | has_read = ops->show; |
635 | has_write = ops->store; | 634 | has_write = ops->store; |
636 | has_mmap = false; | ||
637 | } | 635 | } |
638 | 636 | ||
639 | /* check perms and supported operations */ | 637 | /* check perms and supported operations */ |
@@ -661,9 +659,9 @@ static int sysfs_open_file(struct inode *inode, struct file *file) | |||
661 | * open file has a separate mutex, it's okay as long as those don't | 659 | * open file has a separate mutex, it's okay as long as those don't |
662 | * happen on the same file. At this point, we can't easily give | 660 | * happen on the same file. At this point, we can't easily give |
663 | * each file a separate locking class. Let's differentiate on | 661 | * each file a separate locking class. Let's differentiate on |
664 | * whether the file has mmap or not for now. | 662 | * whether the file is bin or not for now. |
665 | */ | 663 | */ |
666 | if (has_mmap) | 664 | if (sysfs_is_bin(attr_sd)) |
667 | mutex_init(&of->mutex); | 665 | mutex_init(&of->mutex); |
668 | else | 666 | else |
669 | mutex_init(&of->mutex); | 667 | mutex_init(&of->mutex); |
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c index 739e0a52deda..5549d69ddb45 100644 --- a/fs/xfs/xfs_attr_remote.c +++ b/fs/xfs/xfs_attr_remote.c | |||
@@ -110,7 +110,7 @@ xfs_attr3_rmt_verify( | |||
110 | if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) | 110 | if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) |
111 | return false; | 111 | return false; |
112 | if (be32_to_cpu(rmt->rm_offset) + | 112 | if (be32_to_cpu(rmt->rm_offset) + |
113 | be32_to_cpu(rmt->rm_bytes) >= XATTR_SIZE_MAX) | 113 | be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX) |
114 | return false; | 114 | return false; |
115 | if (rmt->rm_owner == 0) | 115 | if (rmt->rm_owner == 0) |
116 | return false; | 116 | return false; |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 3ef11b22e750..3b2c14b6f0fb 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -1635,7 +1635,7 @@ xfs_bmap_last_extent( | |||
1635 | * blocks at the end of the file which do not start at the previous data block, | 1635 | * blocks at the end of the file which do not start at the previous data block, |
1636 | * we will try to align the new blocks at stripe unit boundaries. | 1636 | * we will try to align the new blocks at stripe unit boundaries. |
1637 | * | 1637 | * |
1638 | * Returns 0 in bma->aeof if the file (fork) is empty as any new write will be | 1638 | * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be |
1639 | * at, or past the EOF. | 1639 | * at, or past the EOF. |
1640 | */ | 1640 | */ |
1641 | STATIC int | 1641 | STATIC int |
@@ -1650,9 +1650,14 @@ xfs_bmap_isaeof( | |||
1650 | bma->aeof = 0; | 1650 | bma->aeof = 0; |
1651 | error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, | 1651 | error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, |
1652 | &is_empty); | 1652 | &is_empty); |
1653 | if (error || is_empty) | 1653 | if (error) |
1654 | return error; | 1654 | return error; |
1655 | 1655 | ||
1656 | if (is_empty) { | ||
1657 | bma->aeof = 1; | ||
1658 | return 0; | ||
1659 | } | ||
1660 | |||
1656 | /* | 1661 | /* |
1657 | * Check if we are allocation or past the last extent, or at least into | 1662 | * Check if we are allocation or past the last extent, or at least into |
1658 | * the last delayed allocated extent. | 1663 | * the last delayed allocated extent. |
@@ -3643,10 +3648,19 @@ xfs_bmap_btalloc( | |||
3643 | int isaligned; | 3648 | int isaligned; |
3644 | int tryagain; | 3649 | int tryagain; |
3645 | int error; | 3650 | int error; |
3651 | int stripe_align; | ||
3646 | 3652 | ||
3647 | ASSERT(ap->length); | 3653 | ASSERT(ap->length); |
3648 | 3654 | ||
3649 | mp = ap->ip->i_mount; | 3655 | mp = ap->ip->i_mount; |
3656 | |||
3657 | /* stripe alignment for allocation is determined by mount parameters */ | ||
3658 | stripe_align = 0; | ||
3659 | if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC)) | ||
3660 | stripe_align = mp->m_swidth; | ||
3661 | else if (mp->m_dalign) | ||
3662 | stripe_align = mp->m_dalign; | ||
3663 | |||
3650 | align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0; | 3664 | align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0; |
3651 | if (unlikely(align)) { | 3665 | if (unlikely(align)) { |
3652 | error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, | 3666 | error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, |
@@ -3655,6 +3669,8 @@ xfs_bmap_btalloc( | |||
3655 | ASSERT(!error); | 3669 | ASSERT(!error); |
3656 | ASSERT(ap->length); | 3670 | ASSERT(ap->length); |
3657 | } | 3671 | } |
3672 | |||
3673 | |||
3658 | nullfb = *ap->firstblock == NULLFSBLOCK; | 3674 | nullfb = *ap->firstblock == NULLFSBLOCK; |
3659 | fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); | 3675 | fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); |
3660 | if (nullfb) { | 3676 | if (nullfb) { |
@@ -3730,7 +3746,7 @@ xfs_bmap_btalloc( | |||
3730 | */ | 3746 | */ |
3731 | if (!ap->flist->xbf_low && ap->aeof) { | 3747 | if (!ap->flist->xbf_low && ap->aeof) { |
3732 | if (!ap->offset) { | 3748 | if (!ap->offset) { |
3733 | args.alignment = mp->m_dalign; | 3749 | args.alignment = stripe_align; |
3734 | atype = args.type; | 3750 | atype = args.type; |
3735 | isaligned = 1; | 3751 | isaligned = 1; |
3736 | /* | 3752 | /* |
@@ -3755,13 +3771,13 @@ xfs_bmap_btalloc( | |||
3755 | * of minlen+alignment+slop doesn't go up | 3771 | * of minlen+alignment+slop doesn't go up |
3756 | * between the calls. | 3772 | * between the calls. |
3757 | */ | 3773 | */ |
3758 | if (blen > mp->m_dalign && blen <= args.maxlen) | 3774 | if (blen > stripe_align && blen <= args.maxlen) |
3759 | nextminlen = blen - mp->m_dalign; | 3775 | nextminlen = blen - stripe_align; |
3760 | else | 3776 | else |
3761 | nextminlen = args.minlen; | 3777 | nextminlen = args.minlen; |
3762 | if (nextminlen + mp->m_dalign > args.minlen + 1) | 3778 | if (nextminlen + stripe_align > args.minlen + 1) |
3763 | args.minalignslop = | 3779 | args.minalignslop = |
3764 | nextminlen + mp->m_dalign - | 3780 | nextminlen + stripe_align - |
3765 | args.minlen - 1; | 3781 | args.minlen - 1; |
3766 | else | 3782 | else |
3767 | args.minalignslop = 0; | 3783 | args.minalignslop = 0; |
@@ -3783,7 +3799,7 @@ xfs_bmap_btalloc( | |||
3783 | */ | 3799 | */ |
3784 | args.type = atype; | 3800 | args.type = atype; |
3785 | args.fsbno = ap->blkno; | 3801 | args.fsbno = ap->blkno; |
3786 | args.alignment = mp->m_dalign; | 3802 | args.alignment = stripe_align; |
3787 | args.minlen = nextminlen; | 3803 | args.minlen = nextminlen; |
3788 | args.minalignslop = 0; | 3804 | args.minalignslop = 0; |
3789 | isaligned = 1; | 3805 | isaligned = 1; |
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 5887e41c0323..82e0dab46ee5 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c | |||
@@ -287,6 +287,7 @@ xfs_bmapi_allocate( | |||
287 | INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker); | 287 | INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker); |
288 | queue_work(xfs_alloc_wq, &args->work); | 288 | queue_work(xfs_alloc_wq, &args->work); |
289 | wait_for_completion(&done); | 289 | wait_for_completion(&done); |
290 | destroy_work_on_stack(&args->work); | ||
290 | return args->result; | 291 | return args->result; |
291 | } | 292 | } |
292 | 293 | ||
@@ -1187,7 +1188,12 @@ xfs_zero_remaining_bytes( | |||
1187 | XFS_BUF_UNWRITE(bp); | 1188 | XFS_BUF_UNWRITE(bp); |
1188 | XFS_BUF_READ(bp); | 1189 | XFS_BUF_READ(bp); |
1189 | XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); | 1190 | XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); |
1190 | xfsbdstrat(mp, bp); | 1191 | |
1192 | if (XFS_FORCED_SHUTDOWN(mp)) { | ||
1193 | error = XFS_ERROR(EIO); | ||
1194 | break; | ||
1195 | } | ||
1196 | xfs_buf_iorequest(bp); | ||
1191 | error = xfs_buf_iowait(bp); | 1197 | error = xfs_buf_iowait(bp); |
1192 | if (error) { | 1198 | if (error) { |
1193 | xfs_buf_ioerror_alert(bp, | 1199 | xfs_buf_ioerror_alert(bp, |
@@ -1200,7 +1206,12 @@ xfs_zero_remaining_bytes( | |||
1200 | XFS_BUF_UNDONE(bp); | 1206 | XFS_BUF_UNDONE(bp); |
1201 | XFS_BUF_UNREAD(bp); | 1207 | XFS_BUF_UNREAD(bp); |
1202 | XFS_BUF_WRITE(bp); | 1208 | XFS_BUF_WRITE(bp); |
1203 | xfsbdstrat(mp, bp); | 1209 | |
1210 | if (XFS_FORCED_SHUTDOWN(mp)) { | ||
1211 | error = XFS_ERROR(EIO); | ||
1212 | break; | ||
1213 | } | ||
1214 | xfs_buf_iorequest(bp); | ||
1204 | error = xfs_buf_iowait(bp); | 1215 | error = xfs_buf_iowait(bp); |
1205 | if (error) { | 1216 | if (error) { |
1206 | xfs_buf_ioerror_alert(bp, | 1217 | xfs_buf_ioerror_alert(bp, |
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index c7f0b77dcb00..afe7645e4b2b 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -698,7 +698,11 @@ xfs_buf_read_uncached( | |||
698 | bp->b_flags |= XBF_READ; | 698 | bp->b_flags |= XBF_READ; |
699 | bp->b_ops = ops; | 699 | bp->b_ops = ops; |
700 | 700 | ||
701 | xfsbdstrat(target->bt_mount, bp); | 701 | if (XFS_FORCED_SHUTDOWN(target->bt_mount)) { |
702 | xfs_buf_relse(bp); | ||
703 | return NULL; | ||
704 | } | ||
705 | xfs_buf_iorequest(bp); | ||
702 | xfs_buf_iowait(bp); | 706 | xfs_buf_iowait(bp); |
703 | return bp; | 707 | return bp; |
704 | } | 708 | } |
@@ -1089,7 +1093,7 @@ xfs_bioerror( | |||
1089 | * This is meant for userdata errors; metadata bufs come with | 1093 | * This is meant for userdata errors; metadata bufs come with |
1090 | * iodone functions attached, so that we can track down errors. | 1094 | * iodone functions attached, so that we can track down errors. |
1091 | */ | 1095 | */ |
1092 | STATIC int | 1096 | int |
1093 | xfs_bioerror_relse( | 1097 | xfs_bioerror_relse( |
1094 | struct xfs_buf *bp) | 1098 | struct xfs_buf *bp) |
1095 | { | 1099 | { |
@@ -1152,7 +1156,7 @@ xfs_bwrite( | |||
1152 | ASSERT(xfs_buf_islocked(bp)); | 1156 | ASSERT(xfs_buf_islocked(bp)); |
1153 | 1157 | ||
1154 | bp->b_flags |= XBF_WRITE; | 1158 | bp->b_flags |= XBF_WRITE; |
1155 | bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q); | 1159 | bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL); |
1156 | 1160 | ||
1157 | xfs_bdstrat_cb(bp); | 1161 | xfs_bdstrat_cb(bp); |
1158 | 1162 | ||
@@ -1164,25 +1168,6 @@ xfs_bwrite( | |||
1164 | return error; | 1168 | return error; |
1165 | } | 1169 | } |
1166 | 1170 | ||
1167 | /* | ||
1168 | * Wrapper around bdstrat so that we can stop data from going to disk in case | ||
1169 | * we are shutting down the filesystem. Typically user data goes thru this | ||
1170 | * path; one of the exceptions is the superblock. | ||
1171 | */ | ||
1172 | void | ||
1173 | xfsbdstrat( | ||
1174 | struct xfs_mount *mp, | ||
1175 | struct xfs_buf *bp) | ||
1176 | { | ||
1177 | if (XFS_FORCED_SHUTDOWN(mp)) { | ||
1178 | trace_xfs_bdstrat_shut(bp, _RET_IP_); | ||
1179 | xfs_bioerror_relse(bp); | ||
1180 | return; | ||
1181 | } | ||
1182 | |||
1183 | xfs_buf_iorequest(bp); | ||
1184 | } | ||
1185 | |||
1186 | STATIC void | 1171 | STATIC void |
1187 | _xfs_buf_ioend( | 1172 | _xfs_buf_ioend( |
1188 | xfs_buf_t *bp, | 1173 | xfs_buf_t *bp, |
@@ -1516,6 +1501,12 @@ xfs_wait_buftarg( | |||
1516 | struct xfs_buf *bp; | 1501 | struct xfs_buf *bp; |
1517 | bp = list_first_entry(&dispose, struct xfs_buf, b_lru); | 1502 | bp = list_first_entry(&dispose, struct xfs_buf, b_lru); |
1518 | list_del_init(&bp->b_lru); | 1503 | list_del_init(&bp->b_lru); |
1504 | if (bp->b_flags & XBF_WRITE_FAIL) { | ||
1505 | xfs_alert(btp->bt_mount, | ||
1506 | "Corruption Alert: Buffer at block 0x%llx had permanent write failures!\n" | ||
1507 | "Please run xfs_repair to determine the extent of the problem.", | ||
1508 | (long long)bp->b_bn); | ||
1509 | } | ||
1519 | xfs_buf_rele(bp); | 1510 | xfs_buf_rele(bp); |
1520 | } | 1511 | } |
1521 | if (loop++ != 0) | 1512 | if (loop++ != 0) |
@@ -1799,7 +1790,7 @@ __xfs_buf_delwri_submit( | |||
1799 | 1790 | ||
1800 | blk_start_plug(&plug); | 1791 | blk_start_plug(&plug); |
1801 | list_for_each_entry_safe(bp, n, io_list, b_list) { | 1792 | list_for_each_entry_safe(bp, n, io_list, b_list) { |
1802 | bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC); | 1793 | bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL); |
1803 | bp->b_flags |= XBF_WRITE; | 1794 | bp->b_flags |= XBF_WRITE; |
1804 | 1795 | ||
1805 | if (!wait) { | 1796 | if (!wait) { |
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index e65683361017..1cf21a4a9f22 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h | |||
@@ -45,6 +45,7 @@ typedef enum { | |||
45 | #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ | 45 | #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ |
46 | #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ | 46 | #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ |
47 | #define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ | 47 | #define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ |
48 | #define XBF_WRITE_FAIL (1 << 24)/* async writes have failed on this buffer */ | ||
48 | 49 | ||
49 | /* I/O hints for the BIO layer */ | 50 | /* I/O hints for the BIO layer */ |
50 | #define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ | 51 | #define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ |
@@ -70,6 +71,7 @@ typedef unsigned int xfs_buf_flags_t; | |||
70 | { XBF_ASYNC, "ASYNC" }, \ | 71 | { XBF_ASYNC, "ASYNC" }, \ |
71 | { XBF_DONE, "DONE" }, \ | 72 | { XBF_DONE, "DONE" }, \ |
72 | { XBF_STALE, "STALE" }, \ | 73 | { XBF_STALE, "STALE" }, \ |
74 | { XBF_WRITE_FAIL, "WRITE_FAIL" }, \ | ||
73 | { XBF_SYNCIO, "SYNCIO" }, \ | 75 | { XBF_SYNCIO, "SYNCIO" }, \ |
74 | { XBF_FUA, "FUA" }, \ | 76 | { XBF_FUA, "FUA" }, \ |
75 | { XBF_FLUSH, "FLUSH" }, \ | 77 | { XBF_FLUSH, "FLUSH" }, \ |
@@ -80,6 +82,7 @@ typedef unsigned int xfs_buf_flags_t; | |||
80 | { _XBF_DELWRI_Q, "DELWRI_Q" }, \ | 82 | { _XBF_DELWRI_Q, "DELWRI_Q" }, \ |
81 | { _XBF_COMPOUND, "COMPOUND" } | 83 | { _XBF_COMPOUND, "COMPOUND" } |
82 | 84 | ||
85 | |||
83 | /* | 86 | /* |
84 | * Internal state flags. | 87 | * Internal state flags. |
85 | */ | 88 | */ |
@@ -269,9 +272,6 @@ extern void xfs_buf_unlock(xfs_buf_t *); | |||
269 | 272 | ||
270 | /* Buffer Read and Write Routines */ | 273 | /* Buffer Read and Write Routines */ |
271 | extern int xfs_bwrite(struct xfs_buf *bp); | 274 | extern int xfs_bwrite(struct xfs_buf *bp); |
272 | |||
273 | extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); | ||
274 | |||
275 | extern void xfs_buf_ioend(xfs_buf_t *, int); | 275 | extern void xfs_buf_ioend(xfs_buf_t *, int); |
276 | extern void xfs_buf_ioerror(xfs_buf_t *, int); | 276 | extern void xfs_buf_ioerror(xfs_buf_t *, int); |
277 | extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); | 277 | extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); |
@@ -282,6 +282,8 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, | |||
282 | #define xfs_buf_zero(bp, off, len) \ | 282 | #define xfs_buf_zero(bp, off, len) \ |
283 | xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) | 283 | xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) |
284 | 284 | ||
285 | extern int xfs_bioerror_relse(struct xfs_buf *); | ||
286 | |||
285 | static inline int xfs_buf_geterror(xfs_buf_t *bp) | 287 | static inline int xfs_buf_geterror(xfs_buf_t *bp) |
286 | { | 288 | { |
287 | return bp ? bp->b_error : ENOMEM; | 289 | return bp ? bp->b_error : ENOMEM; |
@@ -301,7 +303,8 @@ extern void xfs_buf_terminate(void); | |||
301 | 303 | ||
302 | #define XFS_BUF_ZEROFLAGS(bp) \ | 304 | #define XFS_BUF_ZEROFLAGS(bp) \ |
303 | ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \ | 305 | ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \ |
304 | XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) | 306 | XBF_SYNCIO|XBF_FUA|XBF_FLUSH| \ |
307 | XBF_WRITE_FAIL)) | ||
305 | 308 | ||
306 | void xfs_buf_stale(struct xfs_buf *bp); | 309 | void xfs_buf_stale(struct xfs_buf *bp); |
307 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) | 310 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index a64f67ba25d3..2227b9b050bb 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -496,6 +496,14 @@ xfs_buf_item_unpin( | |||
496 | } | 496 | } |
497 | } | 497 | } |
498 | 498 | ||
499 | /* | ||
500 | * Buffer IO error rate limiting. Limit it to no more than 10 messages per 30 | ||
501 | * seconds so as to not spam logs too much on repeated detection of the same | ||
502 | * buffer being bad.. | ||
503 | */ | ||
504 | |||
505 | DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10); | ||
506 | |||
499 | STATIC uint | 507 | STATIC uint |
500 | xfs_buf_item_push( | 508 | xfs_buf_item_push( |
501 | struct xfs_log_item *lip, | 509 | struct xfs_log_item *lip, |
@@ -524,6 +532,14 @@ xfs_buf_item_push( | |||
524 | 532 | ||
525 | trace_xfs_buf_item_push(bip); | 533 | trace_xfs_buf_item_push(bip); |
526 | 534 | ||
535 | /* has a previous flush failed due to IO errors? */ | ||
536 | if ((bp->b_flags & XBF_WRITE_FAIL) && | ||
537 | ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) { | ||
538 | xfs_warn(bp->b_target->bt_mount, | ||
539 | "Detected failing async write on buffer block 0x%llx. Retrying async write.\n", | ||
540 | (long long)bp->b_bn); | ||
541 | } | ||
542 | |||
527 | if (!xfs_buf_delwri_queue(bp, buffer_list)) | 543 | if (!xfs_buf_delwri_queue(bp, buffer_list)) |
528 | rval = XFS_ITEM_FLUSHING; | 544 | rval = XFS_ITEM_FLUSHING; |
529 | xfs_buf_unlock(bp); | 545 | xfs_buf_unlock(bp); |
@@ -1096,8 +1112,9 @@ xfs_buf_iodone_callbacks( | |||
1096 | 1112 | ||
1097 | xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */ | 1113 | xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */ |
1098 | 1114 | ||
1099 | if (!XFS_BUF_ISSTALE(bp)) { | 1115 | if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) { |
1100 | bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE; | 1116 | bp->b_flags |= XBF_WRITE | XBF_ASYNC | |
1117 | XBF_DONE | XBF_WRITE_FAIL; | ||
1101 | xfs_buf_iorequest(bp); | 1118 | xfs_buf_iorequest(bp); |
1102 | } else { | 1119 | } else { |
1103 | xfs_buf_relse(bp); | 1120 | xfs_buf_relse(bp); |
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index 56369d4509d5..48c7d18f68c3 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c | |||
@@ -2067,12 +2067,12 @@ xfs_dir2_node_lookup( | |||
2067 | */ | 2067 | */ |
2068 | int /* error */ | 2068 | int /* error */ |
2069 | xfs_dir2_node_removename( | 2069 | xfs_dir2_node_removename( |
2070 | xfs_da_args_t *args) /* operation arguments */ | 2070 | struct xfs_da_args *args) /* operation arguments */ |
2071 | { | 2071 | { |
2072 | xfs_da_state_blk_t *blk; /* leaf block */ | 2072 | struct xfs_da_state_blk *blk; /* leaf block */ |
2073 | int error; /* error return value */ | 2073 | int error; /* error return value */ |
2074 | int rval; /* operation return value */ | 2074 | int rval; /* operation return value */ |
2075 | xfs_da_state_t *state; /* btree cursor */ | 2075 | struct xfs_da_state *state; /* btree cursor */ |
2076 | 2076 | ||
2077 | trace_xfs_dir2_node_removename(args); | 2077 | trace_xfs_dir2_node_removename(args); |
2078 | 2078 | ||
@@ -2084,19 +2084,18 @@ xfs_dir2_node_removename( | |||
2084 | state->mp = args->dp->i_mount; | 2084 | state->mp = args->dp->i_mount; |
2085 | state->blocksize = state->mp->m_dirblksize; | 2085 | state->blocksize = state->mp->m_dirblksize; |
2086 | state->node_ents = state->mp->m_dir_node_ents; | 2086 | state->node_ents = state->mp->m_dir_node_ents; |
2087 | /* | 2087 | |
2088 | * Look up the entry we're deleting, set up the cursor. | 2088 | /* Look up the entry we're deleting, set up the cursor. */ |
2089 | */ | ||
2090 | error = xfs_da3_node_lookup_int(state, &rval); | 2089 | error = xfs_da3_node_lookup_int(state, &rval); |
2091 | if (error) | 2090 | if (error) |
2092 | rval = error; | 2091 | goto out_free; |
2093 | /* | 2092 | |
2094 | * Didn't find it, upper layer screwed up. | 2093 | /* Didn't find it, upper layer screwed up. */ |
2095 | */ | ||
2096 | if (rval != EEXIST) { | 2094 | if (rval != EEXIST) { |
2097 | xfs_da_state_free(state); | 2095 | error = rval; |
2098 | return rval; | 2096 | goto out_free; |
2099 | } | 2097 | } |
2098 | |||
2100 | blk = &state->path.blk[state->path.active - 1]; | 2099 | blk = &state->path.blk[state->path.active - 1]; |
2101 | ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); | 2100 | ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); |
2102 | ASSERT(state->extravalid); | 2101 | ASSERT(state->extravalid); |
@@ -2107,7 +2106,7 @@ xfs_dir2_node_removename( | |||
2107 | error = xfs_dir2_leafn_remove(args, blk->bp, blk->index, | 2106 | error = xfs_dir2_leafn_remove(args, blk->bp, blk->index, |
2108 | &state->extrablk, &rval); | 2107 | &state->extrablk, &rval); |
2109 | if (error) | 2108 | if (error) |
2110 | return error; | 2109 | goto out_free; |
2111 | /* | 2110 | /* |
2112 | * Fix the hash values up the btree. | 2111 | * Fix the hash values up the btree. |
2113 | */ | 2112 | */ |
@@ -2122,6 +2121,7 @@ xfs_dir2_node_removename( | |||
2122 | */ | 2121 | */ |
2123 | if (!error) | 2122 | if (!error) |
2124 | error = xfs_dir2_node_to_leaf(state); | 2123 | error = xfs_dir2_node_to_leaf(state); |
2124 | out_free: | ||
2125 | xfs_da_state_free(state); | 2125 | xfs_da_state_free(state); |
2126 | return error; | 2126 | return error; |
2127 | } | 2127 | } |
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 27e0e544e963..104455b8046c 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c | |||
@@ -618,7 +618,8 @@ xfs_setattr_nonsize( | |||
618 | } | 618 | } |
619 | if (!gid_eq(igid, gid)) { | 619 | if (!gid_eq(igid, gid)) { |
620 | if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { | 620 | if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { |
621 | ASSERT(!XFS_IS_PQUOTA_ON(mp)); | 621 | ASSERT(xfs_sb_version_has_pquotino(&mp->m_sb) || |
622 | !XFS_IS_PQUOTA_ON(mp)); | ||
622 | ASSERT(mask & ATTR_GID); | 623 | ASSERT(mask & ATTR_GID); |
623 | ASSERT(gdqp); | 624 | ASSERT(gdqp); |
624 | olddquot2 = xfs_qm_vop_chown(tp, ip, | 625 | olddquot2 = xfs_qm_vop_chown(tp, ip, |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index b6b669df40f3..eae16920655b 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -193,7 +193,10 @@ xlog_bread_noalign( | |||
193 | bp->b_io_length = nbblks; | 193 | bp->b_io_length = nbblks; |
194 | bp->b_error = 0; | 194 | bp->b_error = 0; |
195 | 195 | ||
196 | xfsbdstrat(log->l_mp, bp); | 196 | if (XFS_FORCED_SHUTDOWN(log->l_mp)) |
197 | return XFS_ERROR(EIO); | ||
198 | |||
199 | xfs_buf_iorequest(bp); | ||
197 | error = xfs_buf_iowait(bp); | 200 | error = xfs_buf_iowait(bp); |
198 | if (error) | 201 | if (error) |
199 | xfs_buf_ioerror_alert(bp, __func__); | 202 | xfs_buf_ioerror_alert(bp, __func__); |
@@ -4397,7 +4400,13 @@ xlog_do_recover( | |||
4397 | XFS_BUF_READ(bp); | 4400 | XFS_BUF_READ(bp); |
4398 | XFS_BUF_UNASYNC(bp); | 4401 | XFS_BUF_UNASYNC(bp); |
4399 | bp->b_ops = &xfs_sb_buf_ops; | 4402 | bp->b_ops = &xfs_sb_buf_ops; |
4400 | xfsbdstrat(log->l_mp, bp); | 4403 | |
4404 | if (XFS_FORCED_SHUTDOWN(log->l_mp)) { | ||
4405 | xfs_buf_relse(bp); | ||
4406 | return XFS_ERROR(EIO); | ||
4407 | } | ||
4408 | |||
4409 | xfs_buf_iorequest(bp); | ||
4401 | error = xfs_buf_iowait(bp); | 4410 | error = xfs_buf_iowait(bp); |
4402 | if (error) { | 4411 | if (error) { |
4403 | xfs_buf_ioerror_alert(bp, __func__); | 4412 | xfs_buf_ioerror_alert(bp, __func__); |
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 14a4996cfec6..dd88f0e27bd8 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c | |||
@@ -134,8 +134,6 @@ xfs_qm_dqpurge( | |||
134 | { | 134 | { |
135 | struct xfs_mount *mp = dqp->q_mount; | 135 | struct xfs_mount *mp = dqp->q_mount; |
136 | struct xfs_quotainfo *qi = mp->m_quotainfo; | 136 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
137 | struct xfs_dquot *gdqp = NULL; | ||
138 | struct xfs_dquot *pdqp = NULL; | ||
139 | 137 | ||
140 | xfs_dqlock(dqp); | 138 | xfs_dqlock(dqp); |
141 | if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { | 139 | if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { |
@@ -143,21 +141,6 @@ xfs_qm_dqpurge( | |||
143 | return EAGAIN; | 141 | return EAGAIN; |
144 | } | 142 | } |
145 | 143 | ||
146 | /* | ||
147 | * If this quota has a hint attached, prepare for releasing it now. | ||
148 | */ | ||
149 | gdqp = dqp->q_gdquot; | ||
150 | if (gdqp) { | ||
151 | xfs_dqlock(gdqp); | ||
152 | dqp->q_gdquot = NULL; | ||
153 | } | ||
154 | |||
155 | pdqp = dqp->q_pdquot; | ||
156 | if (pdqp) { | ||
157 | xfs_dqlock(pdqp); | ||
158 | dqp->q_pdquot = NULL; | ||
159 | } | ||
160 | |||
161 | dqp->dq_flags |= XFS_DQ_FREEING; | 144 | dqp->dq_flags |= XFS_DQ_FREEING; |
162 | 145 | ||
163 | xfs_dqflock(dqp); | 146 | xfs_dqflock(dqp); |
@@ -206,11 +189,47 @@ xfs_qm_dqpurge( | |||
206 | XFS_STATS_DEC(xs_qm_dquot_unused); | 189 | XFS_STATS_DEC(xs_qm_dquot_unused); |
207 | 190 | ||
208 | xfs_qm_dqdestroy(dqp); | 191 | xfs_qm_dqdestroy(dqp); |
192 | return 0; | ||
193 | } | ||
194 | |||
195 | /* | ||
196 | * Release the group or project dquot pointers the user dquots maybe carrying | ||
197 | * around as a hint, and proceed to purge the user dquot cache if requested. | ||
198 | */ | ||
199 | STATIC int | ||
200 | xfs_qm_dqpurge_hints( | ||
201 | struct xfs_dquot *dqp, | ||
202 | void *data) | ||
203 | { | ||
204 | struct xfs_dquot *gdqp = NULL; | ||
205 | struct xfs_dquot *pdqp = NULL; | ||
206 | uint flags = *((uint *)data); | ||
207 | |||
208 | xfs_dqlock(dqp); | ||
209 | if (dqp->dq_flags & XFS_DQ_FREEING) { | ||
210 | xfs_dqunlock(dqp); | ||
211 | return EAGAIN; | ||
212 | } | ||
213 | |||
214 | /* If this quota has a hint attached, prepare for releasing it now */ | ||
215 | gdqp = dqp->q_gdquot; | ||
216 | if (gdqp) | ||
217 | dqp->q_gdquot = NULL; | ||
218 | |||
219 | pdqp = dqp->q_pdquot; | ||
220 | if (pdqp) | ||
221 | dqp->q_pdquot = NULL; | ||
222 | |||
223 | xfs_dqunlock(dqp); | ||
209 | 224 | ||
210 | if (gdqp) | 225 | if (gdqp) |
211 | xfs_qm_dqput(gdqp); | 226 | xfs_qm_dqrele(gdqp); |
212 | if (pdqp) | 227 | if (pdqp) |
213 | xfs_qm_dqput(pdqp); | 228 | xfs_qm_dqrele(pdqp); |
229 | |||
230 | if (flags & XFS_QMOPT_UQUOTA) | ||
231 | return xfs_qm_dqpurge(dqp, NULL); | ||
232 | |||
214 | return 0; | 233 | return 0; |
215 | } | 234 | } |
216 | 235 | ||
@@ -222,8 +241,18 @@ xfs_qm_dqpurge_all( | |||
222 | struct xfs_mount *mp, | 241 | struct xfs_mount *mp, |
223 | uint flags) | 242 | uint flags) |
224 | { | 243 | { |
225 | if (flags & XFS_QMOPT_UQUOTA) | 244 | /* |
226 | xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL); | 245 | * We have to release group/project dquot hint(s) from the user dquot |
246 | * at first if they are there, otherwise we would run into an infinite | ||
247 | * loop while walking through radix tree to purge other type of dquots | ||
248 | * since their refcount is not zero if the user dquot refers to them | ||
249 | * as hint. | ||
250 | * | ||
251 | * Call the special xfs_qm_dqpurge_hints() will end up go through the | ||
252 | * general xfs_qm_dqpurge() against user dquot cache if requested. | ||
253 | */ | ||
254 | xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge_hints, &flags); | ||
255 | |||
227 | if (flags & XFS_QMOPT_GQUOTA) | 256 | if (flags & XFS_QMOPT_GQUOTA) |
228 | xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL); | 257 | xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL); |
229 | if (flags & XFS_QMOPT_PQUOTA) | 258 | if (flags & XFS_QMOPT_PQUOTA) |
@@ -2082,24 +2111,21 @@ xfs_qm_vop_create_dqattach( | |||
2082 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 2111 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
2083 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); | 2112 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); |
2084 | 2113 | ||
2085 | if (udqp) { | 2114 | if (udqp && XFS_IS_UQUOTA_ON(mp)) { |
2086 | ASSERT(ip->i_udquot == NULL); | 2115 | ASSERT(ip->i_udquot == NULL); |
2087 | ASSERT(XFS_IS_UQUOTA_ON(mp)); | ||
2088 | ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); | 2116 | ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); |
2089 | 2117 | ||
2090 | ip->i_udquot = xfs_qm_dqhold(udqp); | 2118 | ip->i_udquot = xfs_qm_dqhold(udqp); |
2091 | xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); | 2119 | xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); |
2092 | } | 2120 | } |
2093 | if (gdqp) { | 2121 | if (gdqp && XFS_IS_GQUOTA_ON(mp)) { |
2094 | ASSERT(ip->i_gdquot == NULL); | 2122 | ASSERT(ip->i_gdquot == NULL); |
2095 | ASSERT(XFS_IS_GQUOTA_ON(mp)); | ||
2096 | ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id)); | 2123 | ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id)); |
2097 | ip->i_gdquot = xfs_qm_dqhold(gdqp); | 2124 | ip->i_gdquot = xfs_qm_dqhold(gdqp); |
2098 | xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); | 2125 | xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); |
2099 | } | 2126 | } |
2100 | if (pdqp) { | 2127 | if (pdqp && XFS_IS_PQUOTA_ON(mp)) { |
2101 | ASSERT(ip->i_pdquot == NULL); | 2128 | ASSERT(ip->i_pdquot == NULL); |
2102 | ASSERT(XFS_IS_PQUOTA_ON(mp)); | ||
2103 | ASSERT(xfs_get_projid(ip) == be32_to_cpu(pdqp->q_core.d_id)); | 2129 | ASSERT(xfs_get_projid(ip) == be32_to_cpu(pdqp->q_core.d_id)); |
2104 | 2130 | ||
2105 | ip->i_pdquot = xfs_qm_dqhold(pdqp); | 2131 | ip->i_pdquot = xfs_qm_dqhold(pdqp); |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index c035d11b7734..647b6f1d8923 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -314,7 +314,18 @@ xfs_trans_read_buf_map( | |||
314 | ASSERT(bp->b_iodone == NULL); | 314 | ASSERT(bp->b_iodone == NULL); |
315 | XFS_BUF_READ(bp); | 315 | XFS_BUF_READ(bp); |
316 | bp->b_ops = ops; | 316 | bp->b_ops = ops; |
317 | xfsbdstrat(tp->t_mountp, bp); | 317 | |
318 | /* | ||
319 | * XXX(hch): clean up the error handling here to be less | ||
320 | * of a mess.. | ||
321 | */ | ||
322 | if (XFS_FORCED_SHUTDOWN(mp)) { | ||
323 | trace_xfs_bdstrat_shut(bp, _RET_IP_); | ||
324 | xfs_bioerror_relse(bp); | ||
325 | } else { | ||
326 | xfs_buf_iorequest(bp); | ||
327 | } | ||
328 | |||
318 | error = xfs_buf_iowait(bp); | 329 | error = xfs_buf_iowait(bp); |
319 | if (error) { | 330 | if (error) { |
320 | xfs_buf_ioerror_alert(bp, __func__); | 331 | xfs_buf_ioerror_alert(bp, __func__); |