aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/cmservice.c78
-rw-r--r--fs/afs/fsclient.c221
-rw-r--r--fs/afs/internal.h14
-rw-r--r--fs/afs/rxrpc.c73
-rw-r--r--fs/afs/vlclient.c11
-rw-r--r--fs/block_dev.c5
-rw-r--r--fs/btrfs/backref.c1
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/delayed-ref.c34
-rw-r--r--fs/btrfs/delayed-ref.h3
-rw-r--r--fs/btrfs/disk-io.c56
-rw-r--r--fs/btrfs/disk-io.h2
-rw-r--r--fs/btrfs/extent-tree.c185
-rw-r--r--fs/btrfs/extent_io.h1
-rw-r--r--fs/btrfs/file.c36
-rw-r--r--fs/btrfs/inode-map.c3
-rw-r--r--fs/btrfs/inode.c83
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/qgroup.c62
-rw-r--r--fs/btrfs/qgroup.h36
-rw-r--r--fs/btrfs/relocation.c126
-rw-r--r--fs/btrfs/root-tree.c27
-rw-r--r--fs/btrfs/send.c173
-rw-r--r--fs/btrfs/super.c16
-rw-r--r--fs/btrfs/transaction.c7
-rw-r--r--fs/btrfs/tree-log.c106
-rw-r--r--fs/btrfs/tree-log.h5
-rw-r--r--fs/btrfs/volumes.c27
-rw-r--r--fs/ceph/caps.c5
-rw-r--r--fs/ceph/mds_client.c1
-rw-r--r--fs/dlm/debug_fs.c62
-rw-r--r--fs/f2fs/data.c2
-rw-r--r--fs/f2fs/f2fs.h12
-rw-r--r--fs/f2fs/file.c13
-rw-r--r--fs/f2fs/node.c47
-rw-r--r--fs/f2fs/super.c6
-rw-r--r--fs/fs-writeback.c6
-rw-r--r--fs/iomap.c21
-rw-r--r--fs/nfs/nfs42proc.c2
-rw-r--r--fs/nfs/nfs4_fs.h4
-rw-r--r--fs/nfs/nfs4proc.c9
-rw-r--r--fs/nfs/nfs4renewd.c20
-rw-r--r--fs/nfs/nfs4state.c9
-rw-r--r--fs/nfsd/nfs4state.c65
-rw-r--r--fs/nfsd/vfs.c9
-rw-r--r--fs/proc/meminfo.c2
-rw-r--r--fs/seq_file.c4
-rw-r--r--fs/ubifs/tnc_commit.c2
-rw-r--r--fs/ubifs/xattr.c5
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c14
-rw-r--r--fs/xfs/libxfs/xfs_format.h11
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c6
-rw-r--r--fs/xfs/xfs_buf.c1
-rw-r--r--fs/xfs/xfs_file.c13
-rw-r--r--fs/xfs/xfs_fsops.c1
-rw-r--r--fs/xfs/xfs_iomap.c69
-rw-r--r--fs/xfs/xfs_iomap.h1
-rw-r--r--fs/xfs/xfs_iops.c9
-rw-r--r--fs/xfs/xfs_trace.h1
59 files changed, 1240 insertions, 590 deletions
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 4b0eff6da674..85737e96ab8b 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -189,11 +189,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
189 case 1: 189 case 1:
190 _debug("extract FID count"); 190 _debug("extract FID count");
191 ret = afs_extract_data(call, skb, last, &call->tmp, 4); 191 ret = afs_extract_data(call, skb, last, &call->tmp, 4);
192 switch (ret) { 192 if (ret < 0)
193 case 0: break; 193 return ret;
194 case -EAGAIN: return 0;
195 default: return ret;
196 }
197 194
198 call->count = ntohl(call->tmp); 195 call->count = ntohl(call->tmp);
199 _debug("FID count: %u", call->count); 196 _debug("FID count: %u", call->count);
@@ -210,11 +207,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
210 _debug("extract FID array"); 207 _debug("extract FID array");
211 ret = afs_extract_data(call, skb, last, call->buffer, 208 ret = afs_extract_data(call, skb, last, call->buffer,
212 call->count * 3 * 4); 209 call->count * 3 * 4);
213 switch (ret) { 210 if (ret < 0)
214 case 0: break; 211 return ret;
215 case -EAGAIN: return 0;
216 default: return ret;
217 }
218 212
219 _debug("unmarshall FID array"); 213 _debug("unmarshall FID array");
220 call->request = kcalloc(call->count, 214 call->request = kcalloc(call->count,
@@ -239,11 +233,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
239 case 3: 233 case 3:
240 _debug("extract CB count"); 234 _debug("extract CB count");
241 ret = afs_extract_data(call, skb, last, &call->tmp, 4); 235 ret = afs_extract_data(call, skb, last, &call->tmp, 4);
242 switch (ret) { 236 if (ret < 0)
243 case 0: break; 237 return ret;
244 case -EAGAIN: return 0;
245 default: return ret;
246 }
247 238
248 tmp = ntohl(call->tmp); 239 tmp = ntohl(call->tmp);
249 _debug("CB count: %u", tmp); 240 _debug("CB count: %u", tmp);
@@ -258,11 +249,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
258 _debug("extract CB array"); 249 _debug("extract CB array");
259 ret = afs_extract_data(call, skb, last, call->request, 250 ret = afs_extract_data(call, skb, last, call->request,
260 call->count * 3 * 4); 251 call->count * 3 * 4);
261 switch (ret) { 252 if (ret < 0)
262 case 0: break; 253 return ret;
263 case -EAGAIN: return 0;
264 default: return ret;
265 }
266 254
267 _debug("unmarshall CB array"); 255 _debug("unmarshall CB array");
268 cb = call->request; 256 cb = call->request;
@@ -278,9 +266,9 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
278 call->unmarshall++; 266 call->unmarshall++;
279 267
280 case 5: 268 case 5:
281 _debug("trailer"); 269 ret = afs_data_complete(call, skb, last);
282 if (skb->len != 0) 270 if (ret < 0)
283 return -EBADMSG; 271 return ret;
284 272
285 /* Record that the message was unmarshalled successfully so 273 /* Record that the message was unmarshalled successfully so
286 * that the call destructor can know do the callback breaking 274 * that the call destructor can know do the callback breaking
@@ -294,8 +282,6 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
294 break; 282 break;
295 } 283 }
296 284
297 if (!last)
298 return 0;
299 285
300 call->state = AFS_CALL_REPLYING; 286 call->state = AFS_CALL_REPLYING;
301 287
@@ -335,13 +321,13 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call,
335{ 321{
336 struct afs_server *server; 322 struct afs_server *server;
337 struct in_addr addr; 323 struct in_addr addr;
324 int ret;
338 325
339 _enter(",{%u},%d", skb->len, last); 326 _enter(",{%u},%d", skb->len, last);
340 327
341 if (skb->len > 0) 328 ret = afs_data_complete(call, skb, last);
342 return -EBADMSG; 329 if (ret < 0)
343 if (!last) 330 return ret;
344 return 0;
345 331
346 /* no unmarshalling required */ 332 /* no unmarshalling required */
347 call->state = AFS_CALL_REPLYING; 333 call->state = AFS_CALL_REPLYING;
@@ -371,8 +357,10 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call,
371 357
372 _enter(",{%u},%d", skb->len, last); 358 _enter(",{%u},%d", skb->len, last);
373 359
360 /* There are some arguments that we ignore */
361 afs_data_consumed(call, skb);
374 if (!last) 362 if (!last)
375 return 0; 363 return -EAGAIN;
376 364
377 /* no unmarshalling required */ 365 /* no unmarshalling required */
378 call->state = AFS_CALL_REPLYING; 366 call->state = AFS_CALL_REPLYING;
@@ -408,12 +396,13 @@ static void SRXAFSCB_Probe(struct work_struct *work)
408static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb, 396static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb,
409 bool last) 397 bool last)
410{ 398{
399 int ret;
400
411 _enter(",{%u},%d", skb->len, last); 401 _enter(",{%u},%d", skb->len, last);
412 402
413 if (skb->len > 0) 403 ret = afs_data_complete(call, skb, last);
414 return -EBADMSG; 404 if (ret < 0)
415 if (!last) 405 return ret;
416 return 0;
417 406
418 /* no unmarshalling required */ 407 /* no unmarshalling required */
419 call->state = AFS_CALL_REPLYING; 408 call->state = AFS_CALL_REPLYING;
@@ -460,10 +449,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
460 449
461 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); 450 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
462 451
463 if (skb->len > 0) 452 ret = afs_data_complete(call, skb, last);
464 return -EBADMSG; 453 if (ret < 0)
465 if (!last) 454 return ret;
466 return 0;
467 455
468 switch (call->unmarshall) { 456 switch (call->unmarshall) {
469 case 0: 457 case 0:
@@ -509,8 +497,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
509 break; 497 break;
510 } 498 }
511 499
512 if (!last) 500 ret = afs_data_complete(call, skb, last);
513 return 0; 501 if (ret < 0)
502 return ret;
514 503
515 call->state = AFS_CALL_REPLYING; 504 call->state = AFS_CALL_REPLYING;
516 505
@@ -588,12 +577,13 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
588static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call, 577static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call,
589 struct sk_buff *skb, bool last) 578 struct sk_buff *skb, bool last)
590{ 579{
580 int ret;
581
591 _enter(",{%u},%d", skb->len, last); 582 _enter(",{%u},%d", skb->len, last);
592 583
593 if (skb->len > 0) 584 ret = afs_data_complete(call, skb, last);
594 return -EBADMSG; 585 if (ret < 0)
595 if (!last) 586 return ret;
596 return 0;
597 587
598 /* no unmarshalling required */ 588 /* no unmarshalling required */
599 call->state = AFS_CALL_REPLYING; 589 call->state = AFS_CALL_REPLYING;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index c2e930ec2888..9312b92e54be 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -240,15 +240,13 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call,
240{ 240{
241 struct afs_vnode *vnode = call->reply; 241 struct afs_vnode *vnode = call->reply;
242 const __be32 *bp; 242 const __be32 *bp;
243 int ret;
243 244
244 _enter(",,%u", last); 245 _enter(",,%u", last);
245 246
246 afs_transfer_reply(call, skb); 247 ret = afs_transfer_reply(call, skb, last);
247 if (!last) 248 if (ret < 0)
248 return 0; 249 return ret;
249
250 if (call->reply_size != call->reply_max)
251 return -EBADMSG;
252 250
253 /* unmarshall the reply once we've received all of it */ 251 /* unmarshall the reply once we've received all of it */
254 bp = call->buffer; 252 bp = call->buffer;
@@ -335,11 +333,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
335 case 1: 333 case 1:
336 _debug("extract data length (MSW)"); 334 _debug("extract data length (MSW)");
337 ret = afs_extract_data(call, skb, last, &call->tmp, 4); 335 ret = afs_extract_data(call, skb, last, &call->tmp, 4);
338 switch (ret) { 336 if (ret < 0)
339 case 0: break; 337 return ret;
340 case -EAGAIN: return 0;
341 default: return ret;
342 }
343 338
344 call->count = ntohl(call->tmp); 339 call->count = ntohl(call->tmp);
345 _debug("DATA length MSW: %u", call->count); 340 _debug("DATA length MSW: %u", call->count);
@@ -353,11 +348,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
353 case 2: 348 case 2:
354 _debug("extract data length"); 349 _debug("extract data length");
355 ret = afs_extract_data(call, skb, last, &call->tmp, 4); 350 ret = afs_extract_data(call, skb, last, &call->tmp, 4);
356 switch (ret) { 351 if (ret < 0)
357 case 0: break; 352 return ret;
358 case -EAGAIN: return 0;
359 default: return ret;
360 }
361 353
362 call->count = ntohl(call->tmp); 354 call->count = ntohl(call->tmp);
363 _debug("DATA length: %u", call->count); 355 _debug("DATA length: %u", call->count);
@@ -375,11 +367,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
375 ret = afs_extract_data(call, skb, last, buffer, 367 ret = afs_extract_data(call, skb, last, buffer,
376 call->count); 368 call->count);
377 kunmap_atomic(buffer); 369 kunmap_atomic(buffer);
378 switch (ret) { 370 if (ret < 0)
379 case 0: break; 371 return ret;
380 case -EAGAIN: return 0;
381 default: return ret;
382 }
383 } 372 }
384 373
385 call->offset = 0; 374 call->offset = 0;
@@ -389,11 +378,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
389 case 4: 378 case 4:
390 ret = afs_extract_data(call, skb, last, call->buffer, 379 ret = afs_extract_data(call, skb, last, call->buffer,
391 (21 + 3 + 6) * 4); 380 (21 + 3 + 6) * 4);
392 switch (ret) { 381 if (ret < 0)
393 case 0: break; 382 return ret;
394 case -EAGAIN: return 0;
395 default: return ret;
396 }
397 383
398 bp = call->buffer; 384 bp = call->buffer;
399 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); 385 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
@@ -405,15 +391,12 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
405 call->unmarshall++; 391 call->unmarshall++;
406 392
407 case 5: 393 case 5:
408 _debug("trailer"); 394 ret = afs_data_complete(call, skb, last);
409 if (skb->len != 0) 395 if (ret < 0)
410 return -EBADMSG; 396 return ret;
411 break; 397 break;
412 } 398 }
413 399
414 if (!last)
415 return 0;
416
417 if (call->count < PAGE_SIZE) { 400 if (call->count < PAGE_SIZE) {
418 _debug("clear"); 401 _debug("clear");
419 page = call->reply3; 402 page = call->reply3;
@@ -537,9 +520,8 @@ static int afs_deliver_fs_give_up_callbacks(struct afs_call *call,
537{ 520{
538 _enter(",{%u},%d", skb->len, last); 521 _enter(",{%u},%d", skb->len, last);
539 522
540 if (skb->len > 0) 523 /* shouldn't be any reply data */
541 return -EBADMSG; /* shouldn't be any reply data */ 524 return afs_data_complete(call, skb, last);
542 return 0;
543} 525}
544 526
545/* 527/*
@@ -622,15 +604,13 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call,
622{ 604{
623 struct afs_vnode *vnode = call->reply; 605 struct afs_vnode *vnode = call->reply;
624 const __be32 *bp; 606 const __be32 *bp;
607 int ret;
625 608
626 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); 609 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
627 610
628 afs_transfer_reply(call, skb); 611 ret = afs_transfer_reply(call, skb, last);
629 if (!last) 612 if (ret < 0)
630 return 0; 613 return ret;
631
632 if (call->reply_size != call->reply_max)
633 return -EBADMSG;
634 614
635 /* unmarshall the reply once we've received all of it */ 615 /* unmarshall the reply once we've received all of it */
636 bp = call->buffer; 616 bp = call->buffer;
@@ -721,15 +701,13 @@ static int afs_deliver_fs_remove(struct afs_call *call,
721{ 701{
722 struct afs_vnode *vnode = call->reply; 702 struct afs_vnode *vnode = call->reply;
723 const __be32 *bp; 703 const __be32 *bp;
704 int ret;
724 705
725 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); 706 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
726 707
727 afs_transfer_reply(call, skb); 708 ret = afs_transfer_reply(call, skb, last);
728 if (!last) 709 if (ret < 0)
729 return 0; 710 return ret;
730
731 if (call->reply_size != call->reply_max)
732 return -EBADMSG;
733 711
734 /* unmarshall the reply once we've received all of it */ 712 /* unmarshall the reply once we've received all of it */
735 bp = call->buffer; 713 bp = call->buffer;
@@ -804,15 +782,13 @@ static int afs_deliver_fs_link(struct afs_call *call,
804{ 782{
805 struct afs_vnode *dvnode = call->reply, *vnode = call->reply2; 783 struct afs_vnode *dvnode = call->reply, *vnode = call->reply2;
806 const __be32 *bp; 784 const __be32 *bp;
785 int ret;
807 786
808 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); 787 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
809 788
810 afs_transfer_reply(call, skb); 789 ret = afs_transfer_reply(call, skb, last);
811 if (!last) 790 if (ret < 0)
812 return 0; 791 return ret;
813
814 if (call->reply_size != call->reply_max)
815 return -EBADMSG;
816 792
817 /* unmarshall the reply once we've received all of it */ 793 /* unmarshall the reply once we've received all of it */
818 bp = call->buffer; 794 bp = call->buffer;
@@ -892,15 +868,13 @@ static int afs_deliver_fs_symlink(struct afs_call *call,
892{ 868{
893 struct afs_vnode *vnode = call->reply; 869 struct afs_vnode *vnode = call->reply;
894 const __be32 *bp; 870 const __be32 *bp;
871 int ret;
895 872
896 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); 873 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
897 874
898 afs_transfer_reply(call, skb); 875 ret = afs_transfer_reply(call, skb, last);
899 if (!last) 876 if (ret < 0)
900 return 0; 877 return ret;
901
902 if (call->reply_size != call->reply_max)
903 return -EBADMSG;
904 878
905 /* unmarshall the reply once we've received all of it */ 879 /* unmarshall the reply once we've received all of it */
906 bp = call->buffer; 880 bp = call->buffer;
@@ -999,15 +973,13 @@ static int afs_deliver_fs_rename(struct afs_call *call,
999{ 973{
1000 struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2; 974 struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2;
1001 const __be32 *bp; 975 const __be32 *bp;
976 int ret;
1002 977
1003 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); 978 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
1004 979
1005 afs_transfer_reply(call, skb); 980 ret = afs_transfer_reply(call, skb, last);
1006 if (!last) 981 if (ret < 0)
1007 return 0; 982 return ret;
1008
1009 if (call->reply_size != call->reply_max)
1010 return -EBADMSG;
1011 983
1012 /* unmarshall the reply once we've received all of it */ 984 /* unmarshall the reply once we've received all of it */
1013 bp = call->buffer; 985 bp = call->buffer;
@@ -1105,20 +1077,13 @@ static int afs_deliver_fs_store_data(struct afs_call *call,
1105{ 1077{
1106 struct afs_vnode *vnode = call->reply; 1078 struct afs_vnode *vnode = call->reply;
1107 const __be32 *bp; 1079 const __be32 *bp;
1080 int ret;
1108 1081
1109 _enter(",,%u", last); 1082 _enter(",,%u", last);
1110 1083
1111 afs_transfer_reply(call, skb); 1084 ret = afs_transfer_reply(call, skb, last);
1112 if (!last) { 1085 if (ret < 0)
1113 _leave(" = 0 [more]"); 1086 return ret;
1114 return 0;
1115 }
1116
1117 if (call->reply_size != call->reply_max) {
1118 _leave(" = -EBADMSG [%u != %u]",
1119 call->reply_size, call->reply_max);
1120 return -EBADMSG;
1121 }
1122 1087
1123 /* unmarshall the reply once we've received all of it */ 1088 /* unmarshall the reply once we've received all of it */
1124 bp = call->buffer; 1089 bp = call->buffer;
@@ -1292,20 +1257,13 @@ static int afs_deliver_fs_store_status(struct afs_call *call,
1292 afs_dataversion_t *store_version; 1257 afs_dataversion_t *store_version;
1293 struct afs_vnode *vnode = call->reply; 1258 struct afs_vnode *vnode = call->reply;
1294 const __be32 *bp; 1259 const __be32 *bp;
1260 int ret;
1295 1261
1296 _enter(",,%u", last); 1262 _enter(",,%u", last);
1297 1263
1298 afs_transfer_reply(call, skb); 1264 ret = afs_transfer_reply(call, skb, last);
1299 if (!last) { 1265 if (ret < 0)
1300 _leave(" = 0 [more]"); 1266 return ret;
1301 return 0;
1302 }
1303
1304 if (call->reply_size != call->reply_max) {
1305 _leave(" = -EBADMSG [%u != %u]",
1306 call->reply_size, call->reply_max);
1307 return -EBADMSG;
1308 }
1309 1267
1310 /* unmarshall the reply once we've received all of it */ 1268 /* unmarshall the reply once we've received all of it */
1311 store_version = NULL; 1269 store_version = NULL;
@@ -1504,11 +1462,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
1504 _debug("extract status"); 1462 _debug("extract status");
1505 ret = afs_extract_data(call, skb, last, call->buffer, 1463 ret = afs_extract_data(call, skb, last, call->buffer,
1506 12 * 4); 1464 12 * 4);
1507 switch (ret) { 1465 if (ret < 0)
1508 case 0: break; 1466 return ret;
1509 case -EAGAIN: return 0;
1510 default: return ret;
1511 }
1512 1467
1513 bp = call->buffer; 1468 bp = call->buffer;
1514 xdr_decode_AFSFetchVolumeStatus(&bp, call->reply2); 1469 xdr_decode_AFSFetchVolumeStatus(&bp, call->reply2);
@@ -1518,11 +1473,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
1518 /* extract the volume name length */ 1473 /* extract the volume name length */
1519 case 2: 1474 case 2:
1520 ret = afs_extract_data(call, skb, last, &call->tmp, 4); 1475 ret = afs_extract_data(call, skb, last, &call->tmp, 4);
1521 switch (ret) { 1476 if (ret < 0)
1522 case 0: break; 1477 return ret;
1523 case -EAGAIN: return 0;
1524 default: return ret;
1525 }
1526 1478
1527 call->count = ntohl(call->tmp); 1479 call->count = ntohl(call->tmp);
1528 _debug("volname length: %u", call->count); 1480 _debug("volname length: %u", call->count);
@@ -1537,11 +1489,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
1537 if (call->count > 0) { 1489 if (call->count > 0) {
1538 ret = afs_extract_data(call, skb, last, call->reply3, 1490 ret = afs_extract_data(call, skb, last, call->reply3,
1539 call->count); 1491 call->count);
1540 switch (ret) { 1492 if (ret < 0)
1541 case 0: break; 1493 return ret;
1542 case -EAGAIN: return 0;
1543 default: return ret;
1544 }
1545 } 1494 }
1546 1495
1547 p = call->reply3; 1496 p = call->reply3;
@@ -1561,11 +1510,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
1561 case 4: 1510 case 4:
1562 ret = afs_extract_data(call, skb, last, call->buffer, 1511 ret = afs_extract_data(call, skb, last, call->buffer,
1563 call->count); 1512 call->count);
1564 switch (ret) { 1513 if (ret < 0)
1565 case 0: break; 1514 return ret;
1566 case -EAGAIN: return 0;
1567 default: return ret;
1568 }
1569 1515
1570 call->offset = 0; 1516 call->offset = 0;
1571 call->unmarshall++; 1517 call->unmarshall++;
@@ -1574,11 +1520,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
1574 /* extract the offline message length */ 1520 /* extract the offline message length */
1575 case 5: 1521 case 5:
1576 ret = afs_extract_data(call, skb, last, &call->tmp, 4); 1522 ret = afs_extract_data(call, skb, last, &call->tmp, 4);
1577 switch (ret) { 1523 if (ret < 0)
1578 case 0: break; 1524 return ret;
1579 case -EAGAIN: return 0;
1580 default: return ret;
1581 }
1582 1525
1583 call->count = ntohl(call->tmp); 1526 call->count = ntohl(call->tmp);
1584 _debug("offline msg length: %u", call->count); 1527 _debug("offline msg length: %u", call->count);
@@ -1593,11 +1536,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
1593 if (call->count > 0) { 1536 if (call->count > 0) {
1594 ret = afs_extract_data(call, skb, last, call->reply3, 1537 ret = afs_extract_data(call, skb, last, call->reply3,
1595 call->count); 1538 call->count);
1596 switch (ret) { 1539 if (ret < 0)
1597 case 0: break; 1540 return ret;
1598 case -EAGAIN: return 0;
1599 default: return ret;
1600 }
1601 } 1541 }
1602 1542
1603 p = call->reply3; 1543 p = call->reply3;
@@ -1617,11 +1557,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
1617 case 7: 1557 case 7:
1618 ret = afs_extract_data(call, skb, last, call->buffer, 1558 ret = afs_extract_data(call, skb, last, call->buffer,
1619 call->count); 1559 call->count);
1620 switch (ret) { 1560 if (ret < 0)
1621 case 0: break; 1561 return ret;
1622 case -EAGAIN: return 0;
1623 default: return ret;
1624 }
1625 1562
1626 call->offset = 0; 1563 call->offset = 0;
1627 call->unmarshall++; 1564 call->unmarshall++;
@@ -1630,11 +1567,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
1630 /* extract the message of the day length */ 1567 /* extract the message of the day length */
1631 case 8: 1568 case 8:
1632 ret = afs_extract_data(call, skb, last, &call->tmp, 4); 1569 ret = afs_extract_data(call, skb, last, &call->tmp, 4);
1633 switch (ret) { 1570 if (ret < 0)
1634 case 0: break; 1571 return ret;
1635 case -EAGAIN: return 0;
1636 default: return ret;
1637 }
1638 1572
1639 call->count = ntohl(call->tmp); 1573 call->count = ntohl(call->tmp);
1640 _debug("motd length: %u", call->count); 1574 _debug("motd length: %u", call->count);
@@ -1649,11 +1583,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
1649 if (call->count > 0) { 1583 if (call->count > 0) {
1650 ret = afs_extract_data(call, skb, last, call->reply3, 1584 ret = afs_extract_data(call, skb, last, call->reply3,
1651 call->count); 1585 call->count);
1652 switch (ret) { 1586 if (ret < 0)
1653 case 0: break; 1587 return ret;
1654 case -EAGAIN: return 0;
1655 default: return ret;
1656 }
1657 } 1588 }
1658 1589
1659 p = call->reply3; 1590 p = call->reply3;
@@ -1673,26 +1604,20 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
1673 case 10: 1604 case 10:
1674 ret = afs_extract_data(call, skb, last, call->buffer, 1605 ret = afs_extract_data(call, skb, last, call->buffer,
1675 call->count); 1606 call->count);
1676 switch (ret) { 1607 if (ret < 0)
1677 case 0: break; 1608 return ret;
1678 case -EAGAIN: return 0;
1679 default: return ret;
1680 }
1681 1609
1682 call->offset = 0; 1610 call->offset = 0;
1683 call->unmarshall++; 1611 call->unmarshall++;
1684 no_motd_padding: 1612 no_motd_padding:
1685 1613
1686 case 11: 1614 case 11:
1687 _debug("trailer %d", skb->len); 1615 ret = afs_data_complete(call, skb, last);
1688 if (skb->len != 0) 1616 if (ret < 0)
1689 return -EBADMSG; 1617 return ret;
1690 break; 1618 break;
1691 } 1619 }
1692 1620
1693 if (!last)
1694 return 0;
1695
1696 _leave(" = 0 [done]"); 1621 _leave(" = 0 [done]");
1697 return 0; 1622 return 0;
1698} 1623}
@@ -1764,15 +1689,13 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call,
1764 struct sk_buff *skb, bool last) 1689 struct sk_buff *skb, bool last)
1765{ 1690{
1766 const __be32 *bp; 1691 const __be32 *bp;
1692 int ret;
1767 1693
1768 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); 1694 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
1769 1695
1770 afs_transfer_reply(call, skb); 1696 ret = afs_transfer_reply(call, skb, last);
1771 if (!last) 1697 if (ret < 0)
1772 return 0; 1698 return ret;
1773
1774 if (call->reply_size != call->reply_max)
1775 return -EBADMSG;
1776 1699
1777 /* unmarshall the reply once we've received all of it */ 1700 /* unmarshall the reply once we've received all of it */
1778 bp = call->buffer; 1701 bp = call->buffer;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 71d5982312f3..df976b2a7f40 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -609,17 +609,29 @@ extern void afs_proc_cell_remove(struct afs_cell *);
609 */ 609 */
610extern int afs_open_socket(void); 610extern int afs_open_socket(void);
611extern void afs_close_socket(void); 611extern void afs_close_socket(void);
612extern void afs_data_consumed(struct afs_call *, struct sk_buff *);
612extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t, 613extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t,
613 const struct afs_wait_mode *); 614 const struct afs_wait_mode *);
614extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *, 615extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *,
615 size_t, size_t); 616 size_t, size_t);
616extern void afs_flat_call_destructor(struct afs_call *); 617extern void afs_flat_call_destructor(struct afs_call *);
617extern void afs_transfer_reply(struct afs_call *, struct sk_buff *); 618extern int afs_transfer_reply(struct afs_call *, struct sk_buff *, bool);
618extern void afs_send_empty_reply(struct afs_call *); 619extern void afs_send_empty_reply(struct afs_call *);
619extern void afs_send_simple_reply(struct afs_call *, const void *, size_t); 620extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
620extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *, 621extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *,
621 size_t); 622 size_t);
622 623
624static inline int afs_data_complete(struct afs_call *call, struct sk_buff *skb,
625 bool last)
626{
627 if (skb->len > 0)
628 return -EBADMSG;
629 afs_data_consumed(call, skb);
630 if (!last)
631 return -EAGAIN;
632 return 0;
633}
634
623/* 635/*
624 * security.c 636 * security.c
625 */ 637 */
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 4832de84d52c..14d04c848465 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -150,10 +150,9 @@ void afs_close_socket(void)
150} 150}
151 151
152/* 152/*
153 * note that the data in a socket buffer is now delivered and that the buffer 153 * Note that the data in a socket buffer is now consumed.
154 * should be freed
155 */ 154 */
156static void afs_data_delivered(struct sk_buff *skb) 155void afs_data_consumed(struct afs_call *call, struct sk_buff *skb)
157{ 156{
158 if (!skb) { 157 if (!skb) {
159 _debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs)); 158 _debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs));
@@ -161,9 +160,7 @@ static void afs_data_delivered(struct sk_buff *skb)
161 } else { 160 } else {
162 _debug("DLVR %p{%u} [%d]", 161 _debug("DLVR %p{%u} [%d]",
163 skb, skb->mark, atomic_read(&afs_outstanding_skbs)); 162 skb, skb->mark, atomic_read(&afs_outstanding_skbs));
164 if (atomic_dec_return(&afs_outstanding_skbs) == -1) 163 rxrpc_kernel_data_consumed(call->rxcall, skb);
165 BUG();
166 rxrpc_kernel_data_delivered(skb);
167 } 164 }
168} 165}
169 166
@@ -489,9 +486,15 @@ static void afs_deliver_to_call(struct afs_call *call)
489 last = rxrpc_kernel_is_data_last(skb); 486 last = rxrpc_kernel_is_data_last(skb);
490 ret = call->type->deliver(call, skb, last); 487 ret = call->type->deliver(call, skb, last);
491 switch (ret) { 488 switch (ret) {
489 case -EAGAIN:
490 if (last) {
491 _debug("short data");
492 goto unmarshal_error;
493 }
494 break;
492 case 0: 495 case 0:
493 if (last && 496 ASSERT(last);
494 call->state == AFS_CALL_AWAIT_REPLY) 497 if (call->state == AFS_CALL_AWAIT_REPLY)
495 call->state = AFS_CALL_COMPLETE; 498 call->state = AFS_CALL_COMPLETE;
496 break; 499 break;
497 case -ENOTCONN: 500 case -ENOTCONN:
@@ -501,6 +504,7 @@ static void afs_deliver_to_call(struct afs_call *call)
501 abort_code = RX_INVALID_OPERATION; 504 abort_code = RX_INVALID_OPERATION;
502 goto do_abort; 505 goto do_abort;
503 default: 506 default:
507 unmarshal_error:
504 abort_code = RXGEN_CC_UNMARSHAL; 508 abort_code = RXGEN_CC_UNMARSHAL;
505 if (call->state != AFS_CALL_AWAIT_REPLY) 509 if (call->state != AFS_CALL_AWAIT_REPLY)
506 abort_code = RXGEN_SS_UNMARSHAL; 510 abort_code = RXGEN_SS_UNMARSHAL;
@@ -511,9 +515,7 @@ static void afs_deliver_to_call(struct afs_call *call)
511 call->state = AFS_CALL_ERROR; 515 call->state = AFS_CALL_ERROR;
512 break; 516 break;
513 } 517 }
514 afs_data_delivered(skb); 518 break;
515 skb = NULL;
516 continue;
517 case RXRPC_SKB_MARK_FINAL_ACK: 519 case RXRPC_SKB_MARK_FINAL_ACK:
518 _debug("Rcv ACK"); 520 _debug("Rcv ACK");
519 call->state = AFS_CALL_COMPLETE; 521 call->state = AFS_CALL_COMPLETE;
@@ -685,15 +687,35 @@ static void afs_process_async_call(struct afs_call *call)
685} 687}
686 688
687/* 689/*
688 * empty a socket buffer into a flat reply buffer 690 * Empty a socket buffer into a flat reply buffer.
689 */ 691 */
690void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb) 692int afs_transfer_reply(struct afs_call *call, struct sk_buff *skb, bool last)
691{ 693{
692 size_t len = skb->len; 694 size_t len = skb->len;
693 695
694 if (skb_copy_bits(skb, 0, call->buffer + call->reply_size, len) < 0) 696 if (len > call->reply_max - call->reply_size) {
695 BUG(); 697 _leave(" = -EBADMSG [%zu > %u]",
696 call->reply_size += len; 698 len, call->reply_max - call->reply_size);
699 return -EBADMSG;
700 }
701
702 if (len > 0) {
703 if (skb_copy_bits(skb, 0, call->buffer + call->reply_size,
704 len) < 0)
705 BUG();
706 call->reply_size += len;
707 }
708
709 afs_data_consumed(call, skb);
710 if (!last)
711 return -EAGAIN;
712
713 if (call->reply_size != call->reply_max) {
714 _leave(" = -EBADMSG [%u != %u]",
715 call->reply_size, call->reply_max);
716 return -EBADMSG;
717 }
718 return 0;
697} 719}
698 720
699/* 721/*
@@ -745,7 +767,8 @@ static void afs_collect_incoming_call(struct work_struct *work)
745} 767}
746 768
747/* 769/*
748 * grab the operation ID from an incoming cache manager call 770 * Grab the operation ID from an incoming cache manager call. The socket
771 * buffer is discarded on error or if we don't yet have sufficient data.
749 */ 772 */
750static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb, 773static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
751 bool last) 774 bool last)
@@ -766,12 +789,9 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
766 call->offset += len; 789 call->offset += len;
767 790
768 if (call->offset < 4) { 791 if (call->offset < 4) {
769 if (last) { 792 afs_data_consumed(call, skb);
770 _leave(" = -EBADMSG [op ID short]"); 793 _leave(" = -EAGAIN");
771 return -EBADMSG; 794 return -EAGAIN;
772 }
773 _leave(" = 0 [incomplete]");
774 return 0;
775 } 795 }
776 796
777 call->state = AFS_CALL_AWAIT_REQUEST; 797 call->state = AFS_CALL_AWAIT_REQUEST;
@@ -855,7 +875,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
855} 875}
856 876
857/* 877/*
858 * extract a piece of data from the received data socket buffers 878 * Extract a piece of data from the received data socket buffers.
859 */ 879 */
860int afs_extract_data(struct afs_call *call, struct sk_buff *skb, 880int afs_extract_data(struct afs_call *call, struct sk_buff *skb,
861 bool last, void *buf, size_t count) 881 bool last, void *buf, size_t count)
@@ -873,10 +893,7 @@ int afs_extract_data(struct afs_call *call, struct sk_buff *skb,
873 call->offset += len; 893 call->offset += len;
874 894
875 if (call->offset < count) { 895 if (call->offset < count) {
876 if (last) { 896 afs_data_consumed(call, skb);
877 _leave(" = -EBADMSG [%d < %zu]", call->offset, count);
878 return -EBADMSG;
879 }
880 _leave(" = -EAGAIN"); 897 _leave(" = -EAGAIN");
881 return -EAGAIN; 898 return -EAGAIN;
882 } 899 }
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 340afd0cd182..f94d1abdc3eb 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -64,16 +64,13 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call,
64 struct afs_cache_vlocation *entry; 64 struct afs_cache_vlocation *entry;
65 __be32 *bp; 65 __be32 *bp;
66 u32 tmp; 66 u32 tmp;
67 int loop; 67 int loop, ret;
68 68
69 _enter(",,%u", last); 69 _enter(",,%u", last);
70 70
71 afs_transfer_reply(call, skb); 71 ret = afs_transfer_reply(call, skb, last);
72 if (!last) 72 if (ret < 0)
73 return 0; 73 return ret;
74
75 if (call->reply_size != call->reply_max)
76 return -EBADMSG;
77 74
78 /* unmarshall the reply once we've received all of it */ 75 /* unmarshall the reply once we've received all of it */
79 entry = call->reply; 76 entry = call->reply;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c3cdde87cc8c..08ae99343d92 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -249,7 +249,8 @@ struct super_block *freeze_bdev(struct block_device *bdev)
249 * thaw_bdev drops it. 249 * thaw_bdev drops it.
250 */ 250 */
251 sb = get_super(bdev); 251 sb = get_super(bdev);
252 drop_super(sb); 252 if (sb)
253 drop_super(sb);
253 mutex_unlock(&bdev->bd_fsfreeze_mutex); 254 mutex_unlock(&bdev->bd_fsfreeze_mutex);
254 return sb; 255 return sb;
255 } 256 }
@@ -646,7 +647,7 @@ static struct dentry *bd_mount(struct file_system_type *fs_type,
646{ 647{
647 struct dentry *dent; 648 struct dentry *dent;
648 dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC); 649 dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
649 if (dent) 650 if (!IS_ERR(dent))
650 dent->d_sb->s_iflags |= SB_I_CGROUPWB; 651 dent->d_sb->s_iflags |= SB_I_CGROUPWB;
651 return dent; 652 return dent;
652} 653}
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 2b88439c2ee8..455a6b2fd539 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -589,6 +589,7 @@ static void __merge_refs(struct list_head *head, int mode)
589 589
590 list_del(&ref2->list); 590 list_del(&ref2->list);
591 kmem_cache_free(btrfs_prelim_ref_cache, ref2); 591 kmem_cache_free(btrfs_prelim_ref_cache, ref2);
592 cond_resched();
592 } 593 }
593 594
594 } 595 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2fe8f89091a3..eff3993c77b3 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1028,6 +1028,7 @@ struct btrfs_fs_info {
1028 struct btrfs_workqueue *qgroup_rescan_workers; 1028 struct btrfs_workqueue *qgroup_rescan_workers;
1029 struct completion qgroup_rescan_completion; 1029 struct completion qgroup_rescan_completion;
1030 struct btrfs_work qgroup_rescan_work; 1030 struct btrfs_work qgroup_rescan_work;
1031 bool qgroup_rescan_running; /* protected by qgroup_rescan_lock */
1031 1032
1032 /* filesystem state */ 1033 /* filesystem state */
1033 unsigned long fs_state; 1034 unsigned long fs_state;
@@ -1079,6 +1080,8 @@ struct btrfs_fs_info {
1079 struct list_head pinned_chunks; 1080 struct list_head pinned_chunks;
1080 1081
1081 int creating_free_space_tree; 1082 int creating_free_space_tree;
1083 /* Used to record internally whether fs has been frozen */
1084 int fs_frozen;
1082}; 1085};
1083 1086
1084struct btrfs_subvolume_writers { 1087struct btrfs_subvolume_writers {
@@ -2578,7 +2581,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
2578 struct btrfs_root *root, 2581 struct btrfs_root *root,
2579 u64 root_objectid, u64 owner, u64 offset, 2582 u64 root_objectid, u64 owner, u64 offset,
2580 struct btrfs_key *ins); 2583 struct btrfs_key *ins);
2581int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, 2584int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes,
2582 u64 min_alloc_size, u64 empty_size, u64 hint_byte, 2585 u64 min_alloc_size, u64 empty_size, u64 hint_byte,
2583 struct btrfs_key *ins, int is_data, int delalloc); 2586 struct btrfs_key *ins, int is_data, int delalloc);
2584int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2587int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index b6d210e7a993..ac02e041464b 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -541,7 +541,6 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
541 struct btrfs_delayed_ref_head *existing; 541 struct btrfs_delayed_ref_head *existing;
542 struct btrfs_delayed_ref_head *head_ref = NULL; 542 struct btrfs_delayed_ref_head *head_ref = NULL;
543 struct btrfs_delayed_ref_root *delayed_refs; 543 struct btrfs_delayed_ref_root *delayed_refs;
544 struct btrfs_qgroup_extent_record *qexisting;
545 int count_mod = 1; 544 int count_mod = 1;
546 int must_insert_reserved = 0; 545 int must_insert_reserved = 0;
547 546
@@ -606,10 +605,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
606 qrecord->num_bytes = num_bytes; 605 qrecord->num_bytes = num_bytes;
607 qrecord->old_roots = NULL; 606 qrecord->old_roots = NULL;
608 607
609 qexisting = btrfs_qgroup_insert_dirty_extent(fs_info, 608 if(btrfs_qgroup_insert_dirty_extent_nolock(fs_info,
610 delayed_refs, 609 delayed_refs, qrecord))
611 qrecord);
612 if (qexisting)
613 kfree(qrecord); 610 kfree(qrecord);
614 } 611 }
615 612
@@ -862,33 +859,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
862 return 0; 859 return 0;
863} 860}
864 861
865int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
866 struct btrfs_trans_handle *trans,
867 u64 ref_root, u64 bytenr, u64 num_bytes)
868{
869 struct btrfs_delayed_ref_root *delayed_refs;
870 struct btrfs_delayed_ref_head *ref_head;
871 int ret = 0;
872
873 if (!fs_info->quota_enabled || !is_fstree(ref_root))
874 return 0;
875
876 delayed_refs = &trans->transaction->delayed_refs;
877
878 spin_lock(&delayed_refs->lock);
879 ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0);
880 if (!ref_head) {
881 ret = -ENOENT;
882 goto out;
883 }
884 WARN_ON(ref_head->qgroup_reserved || ref_head->qgroup_ref_root);
885 ref_head->qgroup_ref_root = ref_root;
886 ref_head->qgroup_reserved = num_bytes;
887out:
888 spin_unlock(&delayed_refs->lock);
889 return ret;
890}
891
892int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, 862int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
893 struct btrfs_trans_handle *trans, 863 struct btrfs_trans_handle *trans,
894 u64 bytenr, u64 num_bytes, 864 u64 bytenr, u64 num_bytes,
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 5fca9534a271..43f3629760e9 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -250,9 +250,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
250 u64 parent, u64 ref_root, 250 u64 parent, u64 ref_root,
251 u64 owner, u64 offset, u64 reserved, int action, 251 u64 owner, u64 offset, u64 reserved, int action,
252 struct btrfs_delayed_extent_op *extent_op); 252 struct btrfs_delayed_extent_op *extent_op);
253int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
254 struct btrfs_trans_handle *trans,
255 u64 ref_root, u64 bytenr, u64 num_bytes);
256int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, 253int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
257 struct btrfs_trans_handle *trans, 254 struct btrfs_trans_handle *trans,
258 u64 bytenr, u64 num_bytes, 255 u64 bytenr, u64 num_bytes,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 59febfb8d04a..54bc8c7c6bcd 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -559,8 +559,29 @@ static noinline int check_leaf(struct btrfs_root *root,
559 u32 nritems = btrfs_header_nritems(leaf); 559 u32 nritems = btrfs_header_nritems(leaf);
560 int slot; 560 int slot;
561 561
562 if (nritems == 0) 562 if (nritems == 0) {
563 struct btrfs_root *check_root;
564
565 key.objectid = btrfs_header_owner(leaf);
566 key.type = BTRFS_ROOT_ITEM_KEY;
567 key.offset = (u64)-1;
568
569 check_root = btrfs_get_fs_root(root->fs_info, &key, false);
570 /*
571 * The only reason we also check NULL here is that during
572 * open_ctree() some roots has not yet been set up.
573 */
574 if (!IS_ERR_OR_NULL(check_root)) {
575 /* if leaf is the root, then it's fine */
576 if (leaf->start !=
577 btrfs_root_bytenr(&check_root->root_item)) {
578 CORRUPT("non-root leaf's nritems is 0",
579 leaf, root, 0);
580 return -EIO;
581 }
582 }
563 return 0; 583 return 0;
584 }
564 585
565 /* Check the 0 item */ 586 /* Check the 0 item */
566 if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != 587 if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
@@ -612,6 +633,19 @@ static noinline int check_leaf(struct btrfs_root *root,
612 return 0; 633 return 0;
613} 634}
614 635
636static int check_node(struct btrfs_root *root, struct extent_buffer *node)
637{
638 unsigned long nr = btrfs_header_nritems(node);
639
640 if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) {
641 btrfs_crit(root->fs_info,
642 "corrupt node: block %llu root %llu nritems %lu",
643 node->start, root->objectid, nr);
644 return -EIO;
645 }
646 return 0;
647}
648
615static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, 649static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
616 u64 phy_offset, struct page *page, 650 u64 phy_offset, struct page *page,
617 u64 start, u64 end, int mirror) 651 u64 start, u64 end, int mirror)
@@ -682,6 +716,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
682 ret = -EIO; 716 ret = -EIO;
683 } 717 }
684 718
719 if (found_level > 0 && check_node(root, eb))
720 ret = -EIO;
721
685 if (!ret) 722 if (!ret)
686 set_extent_buffer_uptodate(eb); 723 set_extent_buffer_uptodate(eb);
687err: 724err:
@@ -1618,8 +1655,8 @@ fail:
1618 return ret; 1655 return ret;
1619} 1656}
1620 1657
1621static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, 1658struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
1622 u64 root_id) 1659 u64 root_id)
1623{ 1660{
1624 struct btrfs_root *root; 1661 struct btrfs_root *root;
1625 1662
@@ -2298,6 +2335,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
2298 fs_info->quota_enabled = 0; 2335 fs_info->quota_enabled = 0;
2299 fs_info->pending_quota_state = 0; 2336 fs_info->pending_quota_state = 0;
2300 fs_info->qgroup_ulist = NULL; 2337 fs_info->qgroup_ulist = NULL;
2338 fs_info->qgroup_rescan_running = false;
2301 mutex_init(&fs_info->qgroup_rescan_lock); 2339 mutex_init(&fs_info->qgroup_rescan_lock);
2302} 2340}
2303 2341
@@ -2624,6 +2662,7 @@ int open_ctree(struct super_block *sb,
2624 atomic_set(&fs_info->qgroup_op_seq, 0); 2662 atomic_set(&fs_info->qgroup_op_seq, 0);
2625 atomic_set(&fs_info->reada_works_cnt, 0); 2663 atomic_set(&fs_info->reada_works_cnt, 0);
2626 atomic64_set(&fs_info->tree_mod_seq, 0); 2664 atomic64_set(&fs_info->tree_mod_seq, 0);
2665 fs_info->fs_frozen = 0;
2627 fs_info->sb = sb; 2666 fs_info->sb = sb;
2628 fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; 2667 fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
2629 fs_info->metadata_ratio = 0; 2668 fs_info->metadata_ratio = 0;
@@ -3739,8 +3778,15 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
3739 if (btrfs_root_refs(&root->root_item) == 0) 3778 if (btrfs_root_refs(&root->root_item) == 0)
3740 synchronize_srcu(&fs_info->subvol_srcu); 3779 synchronize_srcu(&fs_info->subvol_srcu);
3741 3780
3742 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) 3781 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
3743 btrfs_free_log(NULL, root); 3782 btrfs_free_log(NULL, root);
3783 if (root->reloc_root) {
3784 free_extent_buffer(root->reloc_root->node);
3785 free_extent_buffer(root->reloc_root->commit_root);
3786 btrfs_put_fs_root(root->reloc_root);
3787 root->reloc_root = NULL;
3788 }
3789 }
3744 3790
3745 if (root->free_ino_pinned) 3791 if (root->free_ino_pinned)
3746 __btrfs_remove_free_space_cache(root->free_ino_pinned); 3792 __btrfs_remove_free_space_cache(root->free_ino_pinned);
@@ -3851,7 +3897,7 @@ void close_ctree(struct btrfs_root *root)
3851 smp_mb(); 3897 smp_mb();
3852 3898
3853 /* wait for the qgroup rescan worker to stop */ 3899 /* wait for the qgroup rescan worker to stop */
3854 btrfs_qgroup_wait_for_completion(fs_info); 3900 btrfs_qgroup_wait_for_completion(fs_info, false);
3855 3901
3856 /* wait for the uuid_scan task to finish */ 3902 /* wait for the uuid_scan task to finish */
3857 down(&fs_info->uuid_tree_rescan_sem); 3903 down(&fs_info->uuid_tree_rescan_sem);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index b3207a0e09f7..f19a982f5a4f 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -68,6 +68,8 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
68struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, 68struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
69 struct btrfs_key *location); 69 struct btrfs_key *location);
70int btrfs_init_fs_root(struct btrfs_root *root); 70int btrfs_init_fs_root(struct btrfs_root *root);
71struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
72 u64 root_id);
71int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, 73int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
72 struct btrfs_root *root); 74 struct btrfs_root *root);
73void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); 75void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 61b494e8e604..0450dc410533 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -60,21 +60,6 @@ enum {
60 CHUNK_ALLOC_FORCE = 2, 60 CHUNK_ALLOC_FORCE = 2,
61}; 61};
62 62
63/*
64 * Control how reservations are dealt with.
65 *
66 * RESERVE_FREE - freeing a reservation.
67 * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for
68 * ENOSPC accounting
69 * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update
70 * bytes_may_use as the ENOSPC accounting is done elsewhere
71 */
72enum {
73 RESERVE_FREE = 0,
74 RESERVE_ALLOC = 1,
75 RESERVE_ALLOC_NO_ACCOUNT = 2,
76};
77
78static int update_block_group(struct btrfs_trans_handle *trans, 63static int update_block_group(struct btrfs_trans_handle *trans,
79 struct btrfs_root *root, u64 bytenr, 64 struct btrfs_root *root, u64 bytenr,
80 u64 num_bytes, int alloc); 65 u64 num_bytes, int alloc);
@@ -104,9 +89,10 @@ static int find_next_key(struct btrfs_path *path, int level,
104 struct btrfs_key *key); 89 struct btrfs_key *key);
105static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 90static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
106 int dump_block_groups); 91 int dump_block_groups);
107static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 92static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
108 u64 num_bytes, int reserve, 93 u64 ram_bytes, u64 num_bytes, int delalloc);
109 int delalloc); 94static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
95 u64 num_bytes, int delalloc);
110static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, 96static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
111 u64 num_bytes); 97 u64 num_bytes);
112int btrfs_pin_extent(struct btrfs_root *root, 98int btrfs_pin_extent(struct btrfs_root *root,
@@ -3501,7 +3487,6 @@ again:
3501 dcs = BTRFS_DC_SETUP; 3487 dcs = BTRFS_DC_SETUP;
3502 else if (ret == -ENOSPC) 3488 else if (ret == -ENOSPC)
3503 set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags); 3489 set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags);
3504 btrfs_free_reserved_data_space(inode, 0, num_pages);
3505 3490
3506out_put: 3491out_put:
3507 iput(inode); 3492 iput(inode);
@@ -4472,6 +4457,15 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
4472 } 4457 }
4473} 4458}
4474 4459
4460/*
4461 * If force is CHUNK_ALLOC_FORCE:
4462 * - return 1 if it successfully allocates a chunk,
4463 * - return errors including -ENOSPC otherwise.
4464 * If force is NOT CHUNK_ALLOC_FORCE:
4465 * - return 0 if it doesn't need to allocate a new chunk,
4466 * - return 1 if it successfully allocates a chunk,
4467 * - return errors including -ENOSPC otherwise.
4468 */
4475static int do_chunk_alloc(struct btrfs_trans_handle *trans, 4469static int do_chunk_alloc(struct btrfs_trans_handle *trans,
4476 struct btrfs_root *extent_root, u64 flags, int force) 4470 struct btrfs_root *extent_root, u64 flags, int force)
4477{ 4471{
@@ -4882,7 +4876,7 @@ static int flush_space(struct btrfs_root *root,
4882 btrfs_get_alloc_profile(root, 0), 4876 btrfs_get_alloc_profile(root, 0),
4883 CHUNK_ALLOC_NO_FORCE); 4877 CHUNK_ALLOC_NO_FORCE);
4884 btrfs_end_transaction(trans, root); 4878 btrfs_end_transaction(trans, root);
4885 if (ret == -ENOSPC) 4879 if (ret > 0 || ret == -ENOSPC)
4886 ret = 0; 4880 ret = 0;
4887 break; 4881 break;
4888 case COMMIT_TRANS: 4882 case COMMIT_TRANS:
@@ -6497,19 +6491,15 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
6497} 6491}
6498 6492
6499/** 6493/**
6500 * btrfs_update_reserved_bytes - update the block_group and space info counters 6494 * btrfs_add_reserved_bytes - update the block_group and space info counters
6501 * @cache: The cache we are manipulating 6495 * @cache: The cache we are manipulating
6496 * @ram_bytes: The number of bytes of file content, and will be same to
6497 * @num_bytes except for the compress path.
6502 * @num_bytes: The number of bytes in question 6498 * @num_bytes: The number of bytes in question
6503 * @reserve: One of the reservation enums
6504 * @delalloc: The blocks are allocated for the delalloc write 6499 * @delalloc: The blocks are allocated for the delalloc write
6505 * 6500 *
6506 * This is called by the allocator when it reserves space, or by somebody who is 6501 * This is called by the allocator when it reserves space. Metadata
6507 * freeing space that was never actually used on disk. For example if you 6502 * reservations should be called with RESERVE_ALLOC so we do the proper
6508 * reserve some space for a new leaf in transaction A and before transaction A
6509 * commits you free that leaf, you call this with reserve set to 0 in order to
6510 * clear the reservation.
6511 *
6512 * Metadata reservations should be called with RESERVE_ALLOC so we do the proper
6513 * ENOSPC accounting. For data we handle the reservation through clearing the 6503 * ENOSPC accounting. For data we handle the reservation through clearing the
6514 * delalloc bits in the io_tree. We have to do this since we could end up 6504 * delalloc bits in the io_tree. We have to do this since we could end up
6515 * allocating less disk space for the amount of data we have reserved in the 6505 * allocating less disk space for the amount of data we have reserved in the
@@ -6519,44 +6509,63 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
6519 * make the reservation and return -EAGAIN, otherwise this function always 6509 * make the reservation and return -EAGAIN, otherwise this function always
6520 * succeeds. 6510 * succeeds.
6521 */ 6511 */
6522static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 6512static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
6523 u64 num_bytes, int reserve, int delalloc) 6513 u64 ram_bytes, u64 num_bytes, int delalloc)
6524{ 6514{
6525 struct btrfs_space_info *space_info = cache->space_info; 6515 struct btrfs_space_info *space_info = cache->space_info;
6526 int ret = 0; 6516 int ret = 0;
6527 6517
6528 spin_lock(&space_info->lock); 6518 spin_lock(&space_info->lock);
6529 spin_lock(&cache->lock); 6519 spin_lock(&cache->lock);
6530 if (reserve != RESERVE_FREE) { 6520 if (cache->ro) {
6531 if (cache->ro) { 6521 ret = -EAGAIN;
6532 ret = -EAGAIN;
6533 } else {
6534 cache->reserved += num_bytes;
6535 space_info->bytes_reserved += num_bytes;
6536 if (reserve == RESERVE_ALLOC) {
6537 trace_btrfs_space_reservation(cache->fs_info,
6538 "space_info", space_info->flags,
6539 num_bytes, 0);
6540 space_info->bytes_may_use -= num_bytes;
6541 }
6542
6543 if (delalloc)
6544 cache->delalloc_bytes += num_bytes;
6545 }
6546 } else { 6522 } else {
6547 if (cache->ro) 6523 cache->reserved += num_bytes;
6548 space_info->bytes_readonly += num_bytes; 6524 space_info->bytes_reserved += num_bytes;
6549 cache->reserved -= num_bytes;
6550 space_info->bytes_reserved -= num_bytes;
6551 6525
6526 trace_btrfs_space_reservation(cache->fs_info,
6527 "space_info", space_info->flags,
6528 ram_bytes, 0);
6529 space_info->bytes_may_use -= ram_bytes;
6552 if (delalloc) 6530 if (delalloc)
6553 cache->delalloc_bytes -= num_bytes; 6531 cache->delalloc_bytes += num_bytes;
6554 } 6532 }
6555 spin_unlock(&cache->lock); 6533 spin_unlock(&cache->lock);
6556 spin_unlock(&space_info->lock); 6534 spin_unlock(&space_info->lock);
6557 return ret; 6535 return ret;
6558} 6536}
6559 6537
6538/**
6539 * btrfs_free_reserved_bytes - update the block_group and space info counters
6540 * @cache: The cache we are manipulating
6541 * @num_bytes: The number of bytes in question
6542 * @delalloc: The blocks are allocated for the delalloc write
6543 *
6544 * This is called by somebody who is freeing space that was never actually used
6545 * on disk. For example if you reserve some space for a new leaf in transaction
6546 * A and before transaction A commits you free that leaf, you call this with
6547 * reserve set to 0 in order to clear the reservation.
6548 */
6549
6550static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
6551 u64 num_bytes, int delalloc)
6552{
6553 struct btrfs_space_info *space_info = cache->space_info;
6554 int ret = 0;
6555
6556 spin_lock(&space_info->lock);
6557 spin_lock(&cache->lock);
6558 if (cache->ro)
6559 space_info->bytes_readonly += num_bytes;
6560 cache->reserved -= num_bytes;
6561 space_info->bytes_reserved -= num_bytes;
6562
6563 if (delalloc)
6564 cache->delalloc_bytes -= num_bytes;
6565 spin_unlock(&cache->lock);
6566 spin_unlock(&space_info->lock);
6567 return ret;
6568}
6560void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, 6569void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
6561 struct btrfs_root *root) 6570 struct btrfs_root *root)
6562{ 6571{
@@ -7191,7 +7200,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
7191 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); 7200 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
7192 7201
7193 btrfs_add_free_space(cache, buf->start, buf->len); 7202 btrfs_add_free_space(cache, buf->start, buf->len);
7194 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0); 7203 btrfs_free_reserved_bytes(cache, buf->len, 0);
7195 btrfs_put_block_group(cache); 7204 btrfs_put_block_group(cache);
7196 trace_btrfs_reserved_extent_free(root, buf->start, buf->len); 7205 trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
7197 pin = 0; 7206 pin = 0;
@@ -7416,9 +7425,9 @@ btrfs_release_block_group(struct btrfs_block_group_cache *cache,
7416 * the free space extent currently. 7425 * the free space extent currently.
7417 */ 7426 */
7418static noinline int find_free_extent(struct btrfs_root *orig_root, 7427static noinline int find_free_extent(struct btrfs_root *orig_root,
7419 u64 num_bytes, u64 empty_size, 7428 u64 ram_bytes, u64 num_bytes, u64 empty_size,
7420 u64 hint_byte, struct btrfs_key *ins, 7429 u64 hint_byte, struct btrfs_key *ins,
7421 u64 flags, int delalloc) 7430 u64 flags, int delalloc)
7422{ 7431{
7423 int ret = 0; 7432 int ret = 0;
7424 struct btrfs_root *root = orig_root->fs_info->extent_root; 7433 struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -7430,8 +7439,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
7430 struct btrfs_space_info *space_info; 7439 struct btrfs_space_info *space_info;
7431 int loop = 0; 7440 int loop = 0;
7432 int index = __get_raid_index(flags); 7441 int index = __get_raid_index(flags);
7433 int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
7434 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
7435 bool failed_cluster_refill = false; 7442 bool failed_cluster_refill = false;
7436 bool failed_alloc = false; 7443 bool failed_alloc = false;
7437 bool use_cluster = true; 7444 bool use_cluster = true;
@@ -7763,8 +7770,8 @@ checks:
7763 search_start - offset); 7770 search_start - offset);
7764 BUG_ON(offset > search_start); 7771 BUG_ON(offset > search_start);
7765 7772
7766 ret = btrfs_update_reserved_bytes(block_group, num_bytes, 7773 ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
7767 alloc_type, delalloc); 7774 num_bytes, delalloc);
7768 if (ret == -EAGAIN) { 7775 if (ret == -EAGAIN) {
7769 btrfs_add_free_space(block_group, offset, num_bytes); 7776 btrfs_add_free_space(block_group, offset, num_bytes);
7770 goto loop; 7777 goto loop;
@@ -7936,7 +7943,7 @@ again:
7936 up_read(&info->groups_sem); 7943 up_read(&info->groups_sem);
7937} 7944}
7938 7945
7939int btrfs_reserve_extent(struct btrfs_root *root, 7946int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
7940 u64 num_bytes, u64 min_alloc_size, 7947 u64 num_bytes, u64 min_alloc_size,
7941 u64 empty_size, u64 hint_byte, 7948 u64 empty_size, u64 hint_byte,
7942 struct btrfs_key *ins, int is_data, int delalloc) 7949 struct btrfs_key *ins, int is_data, int delalloc)
@@ -7948,8 +7955,8 @@ int btrfs_reserve_extent(struct btrfs_root *root,
7948 flags = btrfs_get_alloc_profile(root, is_data); 7955 flags = btrfs_get_alloc_profile(root, is_data);
7949again: 7956again:
7950 WARN_ON(num_bytes < root->sectorsize); 7957 WARN_ON(num_bytes < root->sectorsize);
7951 ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, 7958 ret = find_free_extent(root, ram_bytes, num_bytes, empty_size,
7952 flags, delalloc); 7959 hint_byte, ins, flags, delalloc);
7953 if (!ret && !is_data) { 7960 if (!ret && !is_data) {
7954 btrfs_dec_block_group_reservations(root->fs_info, 7961 btrfs_dec_block_group_reservations(root->fs_info,
7955 ins->objectid); 7962 ins->objectid);
@@ -7958,6 +7965,7 @@ again:
7958 num_bytes = min(num_bytes >> 1, ins->offset); 7965 num_bytes = min(num_bytes >> 1, ins->offset);
7959 num_bytes = round_down(num_bytes, root->sectorsize); 7966 num_bytes = round_down(num_bytes, root->sectorsize);
7960 num_bytes = max(num_bytes, min_alloc_size); 7967 num_bytes = max(num_bytes, min_alloc_size);
7968 ram_bytes = num_bytes;
7961 if (num_bytes == min_alloc_size) 7969 if (num_bytes == min_alloc_size)
7962 final_tried = true; 7970 final_tried = true;
7963 goto again; 7971 goto again;
@@ -7995,7 +8003,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
7995 if (btrfs_test_opt(root->fs_info, DISCARD)) 8003 if (btrfs_test_opt(root->fs_info, DISCARD))
7996 ret = btrfs_discard_extent(root, start, len, NULL); 8004 ret = btrfs_discard_extent(root, start, len, NULL);
7997 btrfs_add_free_space(cache, start, len); 8005 btrfs_add_free_space(cache, start, len);
7998 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); 8006 btrfs_free_reserved_bytes(cache, len, delalloc);
7999 trace_btrfs_reserved_extent_free(root, start, len); 8007 trace_btrfs_reserved_extent_free(root, start, len);
8000 } 8008 }
8001 8009
@@ -8223,8 +8231,8 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
8223 if (!block_group) 8231 if (!block_group)
8224 return -EINVAL; 8232 return -EINVAL;
8225 8233
8226 ret = btrfs_update_reserved_bytes(block_group, ins->offset, 8234 ret = btrfs_add_reserved_bytes(block_group, ins->offset,
8227 RESERVE_ALLOC_NO_ACCOUNT, 0); 8235 ins->offset, 0);
8228 BUG_ON(ret); /* logic error */ 8236 BUG_ON(ret); /* logic error */
8229 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 8237 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
8230 0, owner, offset, ins, 1); 8238 0, owner, offset, ins, 1);
@@ -8368,7 +8376,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
8368 if (IS_ERR(block_rsv)) 8376 if (IS_ERR(block_rsv))
8369 return ERR_CAST(block_rsv); 8377 return ERR_CAST(block_rsv);
8370 8378
8371 ret = btrfs_reserve_extent(root, blocksize, blocksize, 8379 ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
8372 empty_size, hint, &ins, 0, 0); 8380 empty_size, hint, &ins, 0, 0);
8373 if (ret) 8381 if (ret)
8374 goto out_unuse; 8382 goto out_unuse;
@@ -8521,35 +8529,6 @@ reada:
8521 wc->reada_slot = slot; 8529 wc->reada_slot = slot;
8522} 8530}
8523 8531
8524/*
8525 * These may not be seen by the usual inc/dec ref code so we have to
8526 * add them here.
8527 */
8528static int record_one_subtree_extent(struct btrfs_trans_handle *trans,
8529 struct btrfs_root *root, u64 bytenr,
8530 u64 num_bytes)
8531{
8532 struct btrfs_qgroup_extent_record *qrecord;
8533 struct btrfs_delayed_ref_root *delayed_refs;
8534
8535 qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS);
8536 if (!qrecord)
8537 return -ENOMEM;
8538
8539 qrecord->bytenr = bytenr;
8540 qrecord->num_bytes = num_bytes;
8541 qrecord->old_roots = NULL;
8542
8543 delayed_refs = &trans->transaction->delayed_refs;
8544 spin_lock(&delayed_refs->lock);
8545 if (btrfs_qgroup_insert_dirty_extent(trans->fs_info,
8546 delayed_refs, qrecord))
8547 kfree(qrecord);
8548 spin_unlock(&delayed_refs->lock);
8549
8550 return 0;
8551}
8552
8553static int account_leaf_items(struct btrfs_trans_handle *trans, 8532static int account_leaf_items(struct btrfs_trans_handle *trans,
8554 struct btrfs_root *root, 8533 struct btrfs_root *root,
8555 struct extent_buffer *eb) 8534 struct extent_buffer *eb)
@@ -8583,7 +8562,8 @@ static int account_leaf_items(struct btrfs_trans_handle *trans,
8583 8562
8584 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); 8563 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8585 8564
8586 ret = record_one_subtree_extent(trans, root, bytenr, num_bytes); 8565 ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info,
8566 bytenr, num_bytes, GFP_NOFS);
8587 if (ret) 8567 if (ret)
8588 return ret; 8568 return ret;
8589 } 8569 }
@@ -8732,8 +8712,9 @@ walk_down:
8732 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 8712 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
8733 path->locks[level] = BTRFS_READ_LOCK_BLOCKING; 8713 path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
8734 8714
8735 ret = record_one_subtree_extent(trans, root, child_bytenr, 8715 ret = btrfs_qgroup_insert_dirty_extent(trans,
8736 root->nodesize); 8716 root->fs_info, child_bytenr,
8717 root->nodesize, GFP_NOFS);
8737 if (ret) 8718 if (ret)
8738 goto out; 8719 goto out;
8739 } 8720 }
@@ -9906,6 +9887,7 @@ static int find_first_block_group(struct btrfs_root *root,
9906 } else { 9887 } else {
9907 ret = 0; 9888 ret = 0;
9908 } 9889 }
9890 free_extent_map(em);
9909 goto out; 9891 goto out;
9910 } 9892 }
9911 path->slots[0]++; 9893 path->slots[0]++;
@@ -9942,6 +9924,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
9942 block_group->iref = 0; 9924 block_group->iref = 0;
9943 block_group->inode = NULL; 9925 block_group->inode = NULL;
9944 spin_unlock(&block_group->lock); 9926 spin_unlock(&block_group->lock);
9927 ASSERT(block_group->io_ctl.inode == NULL);
9945 iput(inode); 9928 iput(inode);
9946 last = block_group->key.objectid + block_group->key.offset; 9929 last = block_group->key.objectid + block_group->key.offset;
9947 btrfs_put_block_group(block_group); 9930 btrfs_put_block_group(block_group);
@@ -9999,6 +9982,10 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
9999 free_excluded_extents(info->extent_root, block_group); 9982 free_excluded_extents(info->extent_root, block_group);
10000 9983
10001 btrfs_remove_free_space_cache(block_group); 9984 btrfs_remove_free_space_cache(block_group);
9985 ASSERT(list_empty(&block_group->dirty_list));
9986 ASSERT(list_empty(&block_group->io_list));
9987 ASSERT(list_empty(&block_group->bg_list));
9988 ASSERT(atomic_read(&block_group->count) == 1);
10002 btrfs_put_block_group(block_group); 9989 btrfs_put_block_group(block_group);
10003 9990
10004 spin_lock(&info->block_group_cache_lock); 9991 spin_lock(&info->block_group_cache_lock);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index bc2729a7612d..28cd88fccc7e 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -20,6 +20,7 @@
20#define EXTENT_DAMAGED (1U << 14) 20#define EXTENT_DAMAGED (1U << 14)
21#define EXTENT_NORESERVE (1U << 15) 21#define EXTENT_NORESERVE (1U << 15)
22#define EXTENT_QGROUP_RESERVED (1U << 16) 22#define EXTENT_QGROUP_RESERVED (1U << 16)
23#define EXTENT_CLEAR_DATA_RESV (1U << 17)
23#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) 24#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
24#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) 25#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
25 26
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9404121fd5f7..fea31a4a6e36 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2033,6 +2033,14 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
2033 */ 2033 */
2034 clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 2034 clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
2035 &BTRFS_I(inode)->runtime_flags); 2035 &BTRFS_I(inode)->runtime_flags);
2036 /*
2037 * An ordered extent might have started before and completed
2038 * already with io errors, in which case the inode was not
2039 * updated and we end up here. So check the inode's mapping
2040 * flags for any errors that might have happened while doing
2041 * writeback of file data.
2042 */
2043 ret = btrfs_inode_check_errors(inode);
2036 inode_unlock(inode); 2044 inode_unlock(inode);
2037 goto out; 2045 goto out;
2038 } 2046 }
@@ -2062,7 +2070,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
2062 } 2070 }
2063 trans->sync = true; 2071 trans->sync = true;
2064 2072
2065 btrfs_init_log_ctx(&ctx); 2073 btrfs_init_log_ctx(&ctx, inode);
2066 2074
2067 ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx); 2075 ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
2068 if (ret < 0) { 2076 if (ret < 0) {
@@ -2667,6 +2675,7 @@ static long btrfs_fallocate(struct file *file, int mode,
2667 2675
2668 alloc_start = round_down(offset, blocksize); 2676 alloc_start = round_down(offset, blocksize);
2669 alloc_end = round_up(offset + len, blocksize); 2677 alloc_end = round_up(offset + len, blocksize);
2678 cur_offset = alloc_start;
2670 2679
2671 /* Make sure we aren't being give some crap mode */ 2680 /* Make sure we aren't being give some crap mode */
2672 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 2681 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
@@ -2759,7 +2768,6 @@ static long btrfs_fallocate(struct file *file, int mode,
2759 2768
2760 /* First, check if we exceed the qgroup limit */ 2769 /* First, check if we exceed the qgroup limit */
2761 INIT_LIST_HEAD(&reserve_list); 2770 INIT_LIST_HEAD(&reserve_list);
2762 cur_offset = alloc_start;
2763 while (1) { 2771 while (1) {
2764 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 2772 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
2765 alloc_end - cur_offset, 0); 2773 alloc_end - cur_offset, 0);
@@ -2786,6 +2794,14 @@ static long btrfs_fallocate(struct file *file, int mode,
2786 last_byte - cur_offset); 2794 last_byte - cur_offset);
2787 if (ret < 0) 2795 if (ret < 0)
2788 break; 2796 break;
2797 } else {
2798 /*
2799 * Do not need to reserve unwritten extent for this
2800 * range, free reserved data space first, otherwise
2801 * it'll result in false ENOSPC error.
2802 */
2803 btrfs_free_reserved_data_space(inode, cur_offset,
2804 last_byte - cur_offset);
2789 } 2805 }
2790 free_extent_map(em); 2806 free_extent_map(em);
2791 cur_offset = last_byte; 2807 cur_offset = last_byte;
@@ -2803,6 +2819,9 @@ static long btrfs_fallocate(struct file *file, int mode,
2803 range->start, 2819 range->start,
2804 range->len, 1 << inode->i_blkbits, 2820 range->len, 1 << inode->i_blkbits,
2805 offset + len, &alloc_hint); 2821 offset + len, &alloc_hint);
2822 else
2823 btrfs_free_reserved_data_space(inode, range->start,
2824 range->len);
2806 list_del(&range->list); 2825 list_del(&range->list);
2807 kfree(range); 2826 kfree(range);
2808 } 2827 }
@@ -2837,18 +2856,11 @@ out_unlock:
2837 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 2856 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
2838 &cached_state, GFP_KERNEL); 2857 &cached_state, GFP_KERNEL);
2839out: 2858out:
2840 /*
2841 * As we waited the extent range, the data_rsv_map must be empty
2842 * in the range, as written data range will be released from it.
2843 * And for prealloacted extent, it will also be released when
2844 * its metadata is written.
2845 * So this is completely used as cleanup.
2846 */
2847 btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start);
2848 inode_unlock(inode); 2859 inode_unlock(inode);
2849 /* Let go of our reservation. */ 2860 /* Let go of our reservation. */
2850 btrfs_free_reserved_data_space(inode, alloc_start, 2861 if (ret != 0)
2851 alloc_end - alloc_start); 2862 btrfs_free_reserved_data_space(inode, alloc_start,
2863 alloc_end - cur_offset);
2852 return ret; 2864 return ret;
2853} 2865}
2854 2866
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index aa6fabaee72e..359ee861b5a4 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -495,10 +495,9 @@ again:
495 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, 495 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
496 prealloc, prealloc, &alloc_hint); 496 prealloc, prealloc, &alloc_hint);
497 if (ret) { 497 if (ret) {
498 btrfs_delalloc_release_space(inode, 0, prealloc); 498 btrfs_delalloc_release_metadata(inode, prealloc);
499 goto out_put; 499 goto out_put;
500 } 500 }
501 btrfs_free_reserved_data_space(inode, 0, prealloc);
502 501
503 ret = btrfs_write_out_ino_cache(root, trans, path, inode); 502 ret = btrfs_write_out_ino_cache(root, trans, path, inode);
504out_put: 503out_put:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2f5975954ccf..e6811c42e41e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -566,6 +566,8 @@ cont:
566 PAGE_SET_WRITEBACK | 566 PAGE_SET_WRITEBACK |
567 page_error_op | 567 page_error_op |
568 PAGE_END_WRITEBACK); 568 PAGE_END_WRITEBACK);
569 btrfs_free_reserved_data_space_noquota(inode, start,
570 end - start + 1);
569 goto free_pages_out; 571 goto free_pages_out;
570 } 572 }
571 } 573 }
@@ -742,7 +744,7 @@ retry:
742 lock_extent(io_tree, async_extent->start, 744 lock_extent(io_tree, async_extent->start,
743 async_extent->start + async_extent->ram_size - 1); 745 async_extent->start + async_extent->ram_size - 1);
744 746
745 ret = btrfs_reserve_extent(root, 747 ret = btrfs_reserve_extent(root, async_extent->ram_size,
746 async_extent->compressed_size, 748 async_extent->compressed_size,
747 async_extent->compressed_size, 749 async_extent->compressed_size,
748 0, alloc_hint, &ins, 1, 1); 750 0, alloc_hint, &ins, 1, 1);
@@ -969,7 +971,8 @@ static noinline int cow_file_range(struct inode *inode,
969 EXTENT_DEFRAG, PAGE_UNLOCK | 971 EXTENT_DEFRAG, PAGE_UNLOCK |
970 PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | 972 PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
971 PAGE_END_WRITEBACK); 973 PAGE_END_WRITEBACK);
972 974 btrfs_free_reserved_data_space_noquota(inode, start,
975 end - start + 1);
973 *nr_written = *nr_written + 976 *nr_written = *nr_written +
974 (end - start + PAGE_SIZE) / PAGE_SIZE; 977 (end - start + PAGE_SIZE) / PAGE_SIZE;
975 *page_started = 1; 978 *page_started = 1;
@@ -989,7 +992,7 @@ static noinline int cow_file_range(struct inode *inode,
989 unsigned long op; 992 unsigned long op;
990 993
991 cur_alloc_size = disk_num_bytes; 994 cur_alloc_size = disk_num_bytes;
992 ret = btrfs_reserve_extent(root, cur_alloc_size, 995 ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
993 root->sectorsize, 0, alloc_hint, 996 root->sectorsize, 0, alloc_hint,
994 &ins, 1, 1); 997 &ins, 1, 1);
995 if (ret < 0) 998 if (ret < 0)
@@ -1489,8 +1492,10 @@ out_check:
1489 extent_clear_unlock_delalloc(inode, cur_offset, 1492 extent_clear_unlock_delalloc(inode, cur_offset,
1490 cur_offset + num_bytes - 1, 1493 cur_offset + num_bytes - 1,
1491 locked_page, EXTENT_LOCKED | 1494 locked_page, EXTENT_LOCKED |
1492 EXTENT_DELALLOC, PAGE_UNLOCK | 1495 EXTENT_DELALLOC |
1493 PAGE_SET_PRIVATE2); 1496 EXTENT_CLEAR_DATA_RESV,
1497 PAGE_UNLOCK | PAGE_SET_PRIVATE2);
1498
1494 if (!nolock && nocow) 1499 if (!nolock && nocow)
1495 btrfs_end_write_no_snapshoting(root); 1500 btrfs_end_write_no_snapshoting(root);
1496 cur_offset = extent_end; 1501 cur_offset = extent_end;
@@ -1807,7 +1812,9 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1807 return; 1812 return;
1808 1813
1809 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID 1814 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1810 && do_list && !(state->state & EXTENT_NORESERVE)) 1815 && do_list && !(state->state & EXTENT_NORESERVE)
1816 && (*bits & (EXTENT_DO_ACCOUNTING |
1817 EXTENT_CLEAR_DATA_RESV)))
1811 btrfs_free_reserved_data_space_noquota(inode, 1818 btrfs_free_reserved_data_space_noquota(inode,
1812 state->start, len); 1819 state->start, len);
1813 1820
@@ -3435,10 +3442,10 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
3435 found_key.offset = 0; 3442 found_key.offset = 0;
3436 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); 3443 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
3437 ret = PTR_ERR_OR_ZERO(inode); 3444 ret = PTR_ERR_OR_ZERO(inode);
3438 if (ret && ret != -ESTALE) 3445 if (ret && ret != -ENOENT)
3439 goto out; 3446 goto out;
3440 3447
3441 if (ret == -ESTALE && root == root->fs_info->tree_root) { 3448 if (ret == -ENOENT && root == root->fs_info->tree_root) {
3442 struct btrfs_root *dead_root; 3449 struct btrfs_root *dead_root;
3443 struct btrfs_fs_info *fs_info = root->fs_info; 3450 struct btrfs_fs_info *fs_info = root->fs_info;
3444 int is_dead_root = 0; 3451 int is_dead_root = 0;
@@ -3474,7 +3481,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
3474 * Inode is already gone but the orphan item is still there, 3481 * Inode is already gone but the orphan item is still there,
3475 * kill the orphan item. 3482 * kill the orphan item.
3476 */ 3483 */
3477 if (ret == -ESTALE) { 3484 if (ret == -ENOENT) {
3478 trans = btrfs_start_transaction(root, 1); 3485 trans = btrfs_start_transaction(root, 1);
3479 if (IS_ERR(trans)) { 3486 if (IS_ERR(trans)) {
3480 ret = PTR_ERR(trans); 3487 ret = PTR_ERR(trans);
@@ -3633,7 +3640,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3633/* 3640/*
3634 * read an inode from the btree into the in-memory inode 3641 * read an inode from the btree into the in-memory inode
3635 */ 3642 */
3636static void btrfs_read_locked_inode(struct inode *inode) 3643static int btrfs_read_locked_inode(struct inode *inode)
3637{ 3644{
3638 struct btrfs_path *path; 3645 struct btrfs_path *path;
3639 struct extent_buffer *leaf; 3646 struct extent_buffer *leaf;
@@ -3652,14 +3659,19 @@ static void btrfs_read_locked_inode(struct inode *inode)
3652 filled = true; 3659 filled = true;
3653 3660
3654 path = btrfs_alloc_path(); 3661 path = btrfs_alloc_path();
3655 if (!path) 3662 if (!path) {
3663 ret = -ENOMEM;
3656 goto make_bad; 3664 goto make_bad;
3665 }
3657 3666
3658 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); 3667 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
3659 3668
3660 ret = btrfs_lookup_inode(NULL, root, path, &location, 0); 3669 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
3661 if (ret) 3670 if (ret) {
3671 if (ret > 0)
3672 ret = -ENOENT;
3662 goto make_bad; 3673 goto make_bad;
3674 }
3663 3675
3664 leaf = path->nodes[0]; 3676 leaf = path->nodes[0];
3665 3677
@@ -3812,11 +3824,12 @@ cache_acl:
3812 } 3824 }
3813 3825
3814 btrfs_update_iflags(inode); 3826 btrfs_update_iflags(inode);
3815 return; 3827 return 0;
3816 3828
3817make_bad: 3829make_bad:
3818 btrfs_free_path(path); 3830 btrfs_free_path(path);
3819 make_bad_inode(inode); 3831 make_bad_inode(inode);
3832 return ret;
3820} 3833}
3821 3834
3822/* 3835/*
@@ -4204,6 +4217,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
4204 int err = 0; 4217 int err = 0;
4205 struct btrfs_root *root = BTRFS_I(dir)->root; 4218 struct btrfs_root *root = BTRFS_I(dir)->root;
4206 struct btrfs_trans_handle *trans; 4219 struct btrfs_trans_handle *trans;
4220 u64 last_unlink_trans;
4207 4221
4208 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) 4222 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
4209 return -ENOTEMPTY; 4223 return -ENOTEMPTY;
@@ -4226,11 +4240,27 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
4226 if (err) 4240 if (err)
4227 goto out; 4241 goto out;
4228 4242
4243 last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
4244
4229 /* now the directory is empty */ 4245 /* now the directory is empty */
4230 err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry), 4246 err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
4231 dentry->d_name.name, dentry->d_name.len); 4247 dentry->d_name.name, dentry->d_name.len);
4232 if (!err) 4248 if (!err) {
4233 btrfs_i_size_write(inode, 0); 4249 btrfs_i_size_write(inode, 0);
4250 /*
4251 * Propagate the last_unlink_trans value of the deleted dir to
4252 * its parent directory. This is to prevent an unrecoverable
4253 * log tree in the case we do something like this:
4254 * 1) create dir foo
4255 * 2) create snapshot under dir foo
4256 * 3) delete the snapshot
4257 * 4) rmdir foo
4258 * 5) mkdir foo
4259 * 6) fsync foo or some file inside foo
4260 */
4261 if (last_unlink_trans >= trans->transid)
4262 BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
4263 }
4234out: 4264out:
4235 btrfs_end_transaction(trans, root); 4265 btrfs_end_transaction(trans, root);
4236 btrfs_btree_balance_dirty(root); 4266 btrfs_btree_balance_dirty(root);
@@ -5606,7 +5636,9 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
5606 return ERR_PTR(-ENOMEM); 5636 return ERR_PTR(-ENOMEM);
5607 5637
5608 if (inode->i_state & I_NEW) { 5638 if (inode->i_state & I_NEW) {
5609 btrfs_read_locked_inode(inode); 5639 int ret;
5640
5641 ret = btrfs_read_locked_inode(inode);
5610 if (!is_bad_inode(inode)) { 5642 if (!is_bad_inode(inode)) {
5611 inode_tree_add(inode); 5643 inode_tree_add(inode);
5612 unlock_new_inode(inode); 5644 unlock_new_inode(inode);
@@ -5615,7 +5647,8 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
5615 } else { 5647 } else {
5616 unlock_new_inode(inode); 5648 unlock_new_inode(inode);
5617 iput(inode); 5649 iput(inode);
5618 inode = ERR_PTR(-ESTALE); 5650 ASSERT(ret < 0);
5651 inode = ERR_PTR(ret < 0 ? ret : -ESTALE);
5619 } 5652 }
5620 } 5653 }
5621 5654
@@ -7225,7 +7258,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
7225 int ret; 7258 int ret;
7226 7259
7227 alloc_hint = get_extent_allocation_hint(inode, start, len); 7260 alloc_hint = get_extent_allocation_hint(inode, start, len);
7228 ret = btrfs_reserve_extent(root, len, root->sectorsize, 0, 7261 ret = btrfs_reserve_extent(root, len, len, root->sectorsize, 0,
7229 alloc_hint, &ins, 1, 1); 7262 alloc_hint, &ins, 1, 1);
7230 if (ret) 7263 if (ret)
7231 return ERR_PTR(ret); 7264 return ERR_PTR(ret);
@@ -7725,6 +7758,13 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7725 ret = PTR_ERR(em2); 7758 ret = PTR_ERR(em2);
7726 goto unlock_err; 7759 goto unlock_err;
7727 } 7760 }
7761 /*
7762 * For inode marked NODATACOW or extent marked PREALLOC,
7763 * use the existing or preallocated extent, so does not
7764 * need to adjust btrfs_space_info's bytes_may_use.
7765 */
7766 btrfs_free_reserved_data_space_noquota(inode,
7767 start, len);
7728 goto unlock; 7768 goto unlock;
7729 } 7769 }
7730 } 7770 }
@@ -7759,7 +7799,6 @@ unlock:
7759 i_size_write(inode, start + len); 7799 i_size_write(inode, start + len);
7760 7800
7761 adjust_dio_outstanding_extents(inode, dio_data, len); 7801 adjust_dio_outstanding_extents(inode, dio_data, len);
7762 btrfs_free_reserved_data_space(inode, start, len);
7763 WARN_ON(dio_data->reserve < len); 7802 WARN_ON(dio_data->reserve < len);
7764 dio_data->reserve -= len; 7803 dio_data->reserve -= len;
7765 dio_data->unsubmitted_oe_range_end = start + len; 7804 dio_data->unsubmitted_oe_range_end = start + len;
@@ -10280,6 +10319,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
10280 u64 last_alloc = (u64)-1; 10319 u64 last_alloc = (u64)-1;
10281 int ret = 0; 10320 int ret = 0;
10282 bool own_trans = true; 10321 bool own_trans = true;
10322 u64 end = start + num_bytes - 1;
10283 10323
10284 if (trans) 10324 if (trans)
10285 own_trans = false; 10325 own_trans = false;
@@ -10301,8 +10341,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
10301 * sized chunks. 10341 * sized chunks.
10302 */ 10342 */
10303 cur_bytes = min(cur_bytes, last_alloc); 10343 cur_bytes = min(cur_bytes, last_alloc);
10304 ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, 10344 ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes,
10305 *alloc_hint, &ins, 1, 0); 10345 min_size, 0, *alloc_hint, &ins, 1, 0);
10306 if (ret) { 10346 if (ret) {
10307 if (own_trans) 10347 if (own_trans)
10308 btrfs_end_transaction(trans, root); 10348 btrfs_end_transaction(trans, root);
@@ -10388,6 +10428,9 @@ next:
10388 if (own_trans) 10428 if (own_trans)
10389 btrfs_end_transaction(trans, root); 10429 btrfs_end_transaction(trans, root);
10390 } 10430 }
10431 if (cur_offset < end)
10432 btrfs_free_reserved_data_space(inode, cur_offset,
10433 end - cur_offset + 1);
10391 return ret; 10434 return ret;
10392} 10435}
10393 10436
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 14ed1e9e6bc8..b2a2da5893af 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -5084,7 +5084,7 @@ static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg)
5084 if (!capable(CAP_SYS_ADMIN)) 5084 if (!capable(CAP_SYS_ADMIN))
5085 return -EPERM; 5085 return -EPERM;
5086 5086
5087 return btrfs_qgroup_wait_for_completion(root->fs_info); 5087 return btrfs_qgroup_wait_for_completion(root->fs_info, true);
5088} 5088}
5089 5089
5090static long _btrfs_ioctl_set_received_subvol(struct file *file, 5090static long _btrfs_ioctl_set_received_subvol(struct file *file,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 93ee1c18ef9d..8db2e29fdcf4 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -995,7 +995,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
995 goto out; 995 goto out;
996 fs_info->quota_enabled = 0; 996 fs_info->quota_enabled = 0;
997 fs_info->pending_quota_state = 0; 997 fs_info->pending_quota_state = 0;
998 btrfs_qgroup_wait_for_completion(fs_info); 998 btrfs_qgroup_wait_for_completion(fs_info, false);
999 spin_lock(&fs_info->qgroup_lock); 999 spin_lock(&fs_info->qgroup_lock);
1000 quota_root = fs_info->quota_root; 1000 quota_root = fs_info->quota_root;
1001 fs_info->quota_root = NULL; 1001 fs_info->quota_root = NULL;
@@ -1453,10 +1453,9 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
1453 return ret; 1453 return ret;
1454} 1454}
1455 1455
1456struct btrfs_qgroup_extent_record * 1456int btrfs_qgroup_insert_dirty_extent_nolock(struct btrfs_fs_info *fs_info,
1457btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info, 1457 struct btrfs_delayed_ref_root *delayed_refs,
1458 struct btrfs_delayed_ref_root *delayed_refs, 1458 struct btrfs_qgroup_extent_record *record)
1459 struct btrfs_qgroup_extent_record *record)
1460{ 1459{
1461 struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node; 1460 struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
1462 struct rb_node *parent_node = NULL; 1461 struct rb_node *parent_node = NULL;
@@ -1475,12 +1474,42 @@ btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info,
1475 else if (bytenr > entry->bytenr) 1474 else if (bytenr > entry->bytenr)
1476 p = &(*p)->rb_right; 1475 p = &(*p)->rb_right;
1477 else 1476 else
1478 return entry; 1477 return 1;
1479 } 1478 }
1480 1479
1481 rb_link_node(&record->node, parent_node, p); 1480 rb_link_node(&record->node, parent_node, p);
1482 rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); 1481 rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
1483 return NULL; 1482 return 0;
1483}
1484
1485int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans,
1486 struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
1487 gfp_t gfp_flag)
1488{
1489 struct btrfs_qgroup_extent_record *record;
1490 struct btrfs_delayed_ref_root *delayed_refs;
1491 int ret;
1492
1493 if (!fs_info->quota_enabled || bytenr == 0 || num_bytes == 0)
1494 return 0;
1495 if (WARN_ON(trans == NULL))
1496 return -EINVAL;
1497 record = kmalloc(sizeof(*record), gfp_flag);
1498 if (!record)
1499 return -ENOMEM;
1500
1501 delayed_refs = &trans->transaction->delayed_refs;
1502 record->bytenr = bytenr;
1503 record->num_bytes = num_bytes;
1504 record->old_roots = NULL;
1505
1506 spin_lock(&delayed_refs->lock);
1507 ret = btrfs_qgroup_insert_dirty_extent_nolock(fs_info, delayed_refs,
1508 record);
1509 spin_unlock(&delayed_refs->lock);
1510 if (ret > 0)
1511 kfree(record);
1512 return 0;
1484} 1513}
1485 1514
1486#define UPDATE_NEW 0 1515#define UPDATE_NEW 0
@@ -2303,6 +2332,10 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2303 int err = -ENOMEM; 2332 int err = -ENOMEM;
2304 int ret = 0; 2333 int ret = 0;
2305 2334
2335 mutex_lock(&fs_info->qgroup_rescan_lock);
2336 fs_info->qgroup_rescan_running = true;
2337 mutex_unlock(&fs_info->qgroup_rescan_lock);
2338
2306 path = btrfs_alloc_path(); 2339 path = btrfs_alloc_path();
2307 if (!path) 2340 if (!path)
2308 goto out; 2341 goto out;
@@ -2369,6 +2402,9 @@ out:
2369 } 2402 }
2370 2403
2371done: 2404done:
2405 mutex_lock(&fs_info->qgroup_rescan_lock);
2406 fs_info->qgroup_rescan_running = false;
2407 mutex_unlock(&fs_info->qgroup_rescan_lock);
2372 complete_all(&fs_info->qgroup_rescan_completion); 2408 complete_all(&fs_info->qgroup_rescan_completion);
2373} 2409}
2374 2410
@@ -2487,20 +2523,26 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2487 return 0; 2523 return 0;
2488} 2524}
2489 2525
2490int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info) 2526int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
2527 bool interruptible)
2491{ 2528{
2492 int running; 2529 int running;
2493 int ret = 0; 2530 int ret = 0;
2494 2531
2495 mutex_lock(&fs_info->qgroup_rescan_lock); 2532 mutex_lock(&fs_info->qgroup_rescan_lock);
2496 spin_lock(&fs_info->qgroup_lock); 2533 spin_lock(&fs_info->qgroup_lock);
2497 running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2534 running = fs_info->qgroup_rescan_running;
2498 spin_unlock(&fs_info->qgroup_lock); 2535 spin_unlock(&fs_info->qgroup_lock);
2499 mutex_unlock(&fs_info->qgroup_rescan_lock); 2536 mutex_unlock(&fs_info->qgroup_rescan_lock);
2500 2537
2501 if (running) 2538 if (!running)
2539 return 0;
2540
2541 if (interruptible)
2502 ret = wait_for_completion_interruptible( 2542 ret = wait_for_completion_interruptible(
2503 &fs_info->qgroup_rescan_completion); 2543 &fs_info->qgroup_rescan_completion);
2544 else
2545 wait_for_completion(&fs_info->qgroup_rescan_completion);
2504 2546
2505 return ret; 2547 return ret;
2506} 2548}
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 710887c06aaf..1bc64c864b62 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -46,7 +46,8 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
46 struct btrfs_fs_info *fs_info); 46 struct btrfs_fs_info *fs_info);
47int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); 47int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
48void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); 48void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
49int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info); 49int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
50 bool interruptible);
50int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 51int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
51 struct btrfs_fs_info *fs_info, u64 src, u64 dst); 52 struct btrfs_fs_info *fs_info, u64 src, u64 dst);
52int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 53int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
@@ -63,10 +64,35 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
63struct btrfs_delayed_extent_op; 64struct btrfs_delayed_extent_op;
64int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, 65int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
65 struct btrfs_fs_info *fs_info); 66 struct btrfs_fs_info *fs_info);
66struct btrfs_qgroup_extent_record * 67/*
67btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info, 68 * Insert one dirty extent record into @delayed_refs, informing qgroup to
68 struct btrfs_delayed_ref_root *delayed_refs, 69 * account that extent at commit trans time.
69 struct btrfs_qgroup_extent_record *record); 70 *
71 * No lock version, caller must acquire delayed ref lock and allocate memory.
72 *
73 * Return 0 for success insert
74 * Return >0 for existing record, caller can free @record safely.
75 * Error is not possible
76 */
77int btrfs_qgroup_insert_dirty_extent_nolock(
78 struct btrfs_fs_info *fs_info,
79 struct btrfs_delayed_ref_root *delayed_refs,
80 struct btrfs_qgroup_extent_record *record);
81
82/*
83 * Insert one dirty extent record into @delayed_refs, informing qgroup to
84 * account that extent at commit trans time.
85 *
86 * Better encapsulated version.
87 *
88 * Return 0 if the operation is done.
89 * Return <0 for error, like memory allocation failure or invalid parameter
90 * (NULL trans)
91 */
92int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans,
93 struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
94 gfp_t gfp_flag);
95
70int 96int
71btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, 97btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
72 struct btrfs_fs_info *fs_info, 98 struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b26a5aea41b4..8a2c2a07987b 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -31,6 +31,7 @@
31#include "async-thread.h" 31#include "async-thread.h"
32#include "free-space-cache.h" 32#include "free-space-cache.h"
33#include "inode-map.h" 33#include "inode-map.h"
34#include "qgroup.h"
34 35
35/* 36/*
36 * backref_node, mapping_node and tree_block start with this 37 * backref_node, mapping_node and tree_block start with this
@@ -3037,15 +3038,19 @@ int prealloc_file_extent_cluster(struct inode *inode,
3037 u64 num_bytes; 3038 u64 num_bytes;
3038 int nr = 0; 3039 int nr = 0;
3039 int ret = 0; 3040 int ret = 0;
3041 u64 prealloc_start = cluster->start - offset;
3042 u64 prealloc_end = cluster->end - offset;
3043 u64 cur_offset;
3040 3044
3041 BUG_ON(cluster->start != cluster->boundary[0]); 3045 BUG_ON(cluster->start != cluster->boundary[0]);
3042 inode_lock(inode); 3046 inode_lock(inode);
3043 3047
3044 ret = btrfs_check_data_free_space(inode, cluster->start, 3048 ret = btrfs_check_data_free_space(inode, prealloc_start,
3045 cluster->end + 1 - cluster->start); 3049 prealloc_end + 1 - prealloc_start);
3046 if (ret) 3050 if (ret)
3047 goto out; 3051 goto out;
3048 3052
3053 cur_offset = prealloc_start;
3049 while (nr < cluster->nr) { 3054 while (nr < cluster->nr) {
3050 start = cluster->boundary[nr] - offset; 3055 start = cluster->boundary[nr] - offset;
3051 if (nr + 1 < cluster->nr) 3056 if (nr + 1 < cluster->nr)
@@ -3055,16 +3060,21 @@ int prealloc_file_extent_cluster(struct inode *inode,
3055 3060
3056 lock_extent(&BTRFS_I(inode)->io_tree, start, end); 3061 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
3057 num_bytes = end + 1 - start; 3062 num_bytes = end + 1 - start;
3063 if (cur_offset < start)
3064 btrfs_free_reserved_data_space(inode, cur_offset,
3065 start - cur_offset);
3058 ret = btrfs_prealloc_file_range(inode, 0, start, 3066 ret = btrfs_prealloc_file_range(inode, 0, start,
3059 num_bytes, num_bytes, 3067 num_bytes, num_bytes,
3060 end + 1, &alloc_hint); 3068 end + 1, &alloc_hint);
3069 cur_offset = end + 1;
3061 unlock_extent(&BTRFS_I(inode)->io_tree, start, end); 3070 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
3062 if (ret) 3071 if (ret)
3063 break; 3072 break;
3064 nr++; 3073 nr++;
3065 } 3074 }
3066 btrfs_free_reserved_data_space(inode, cluster->start, 3075 if (cur_offset < prealloc_end)
3067 cluster->end + 1 - cluster->start); 3076 btrfs_free_reserved_data_space(inode, cur_offset,
3077 prealloc_end + 1 - cur_offset);
3068out: 3078out:
3069 inode_unlock(inode); 3079 inode_unlock(inode);
3070 return ret; 3080 return ret;
@@ -3916,6 +3926,90 @@ int prepare_to_relocate(struct reloc_control *rc)
3916 return 0; 3926 return 0;
3917} 3927}
3918 3928
3929/*
3930 * Qgroup fixer for data chunk relocation.
3931 * The data relocation is done in the following steps
3932 * 1) Copy data extents into data reloc tree
3933 * 2) Create tree reloc tree(special snapshot) for related subvolumes
3934 * 3) Modify file extents in tree reloc tree
3935 * 4) Merge tree reloc tree with original fs tree, by swapping tree blocks
3936 *
3937 * The problem is, data and tree reloc tree are not accounted to qgroup,
3938 * and 4) will only info qgroup to track tree blocks change, not file extents
3939 * in the tree blocks.
3940 *
3941 * The good news is, related data extents are all in data reloc tree, so we
3942 * only need to info qgroup to track all file extents in data reloc tree
3943 * before commit trans.
3944 */
3945static int qgroup_fix_relocated_data_extents(struct btrfs_trans_handle *trans,
3946 struct reloc_control *rc)
3947{
3948 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3949 struct inode *inode = rc->data_inode;
3950 struct btrfs_root *data_reloc_root = BTRFS_I(inode)->root;
3951 struct btrfs_path *path;
3952 struct btrfs_key key;
3953 int ret = 0;
3954
3955 if (!fs_info->quota_enabled)
3956 return 0;
3957
3958 /*
3959 * Only for stage where we update data pointers the qgroup fix is
3960 * valid.
3961 * For MOVING_DATA stage, we will miss the timing of swapping tree
3962 * blocks, and won't fix it.
3963 */
3964 if (!(rc->stage == UPDATE_DATA_PTRS && rc->extents_found))
3965 return 0;
3966
3967 path = btrfs_alloc_path();
3968 if (!path)
3969 return -ENOMEM;
3970 key.objectid = btrfs_ino(inode);
3971 key.type = BTRFS_EXTENT_DATA_KEY;
3972 key.offset = 0;
3973
3974 ret = btrfs_search_slot(NULL, data_reloc_root, &key, path, 0, 0);
3975 if (ret < 0)
3976 goto out;
3977
3978 lock_extent(&BTRFS_I(inode)->io_tree, 0, (u64)-1);
3979 while (1) {
3980 struct btrfs_file_extent_item *fi;
3981
3982 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3983 if (key.objectid > btrfs_ino(inode))
3984 break;
3985 if (key.type != BTRFS_EXTENT_DATA_KEY)
3986 goto next;
3987 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
3988 struct btrfs_file_extent_item);
3989 if (btrfs_file_extent_type(path->nodes[0], fi) !=
3990 BTRFS_FILE_EXTENT_REG)
3991 goto next;
3992 ret = btrfs_qgroup_insert_dirty_extent(trans, fs_info,
3993 btrfs_file_extent_disk_bytenr(path->nodes[0], fi),
3994 btrfs_file_extent_disk_num_bytes(path->nodes[0], fi),
3995 GFP_NOFS);
3996 if (ret < 0)
3997 break;
3998next:
3999 ret = btrfs_next_item(data_reloc_root, path);
4000 if (ret < 0)
4001 break;
4002 if (ret > 0) {
4003 ret = 0;
4004 break;
4005 }
4006 }
4007 unlock_extent(&BTRFS_I(inode)->io_tree, 0 , (u64)-1);
4008out:
4009 btrfs_free_path(path);
4010 return ret;
4011}
4012
3919static noinline_for_stack int relocate_block_group(struct reloc_control *rc) 4013static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3920{ 4014{
3921 struct rb_root blocks = RB_ROOT; 4015 struct rb_root blocks = RB_ROOT;
@@ -4102,10 +4196,16 @@ restart:
4102 4196
4103 /* get rid of pinned extents */ 4197 /* get rid of pinned extents */
4104 trans = btrfs_join_transaction(rc->extent_root); 4198 trans = btrfs_join_transaction(rc->extent_root);
4105 if (IS_ERR(trans)) 4199 if (IS_ERR(trans)) {
4106 err = PTR_ERR(trans); 4200 err = PTR_ERR(trans);
4107 else 4201 goto out_free;
4108 btrfs_commit_transaction(trans, rc->extent_root); 4202 }
4203 err = qgroup_fix_relocated_data_extents(trans, rc);
4204 if (err < 0) {
4205 btrfs_abort_transaction(trans, err);
4206 goto out_free;
4207 }
4208 btrfs_commit_transaction(trans, rc->extent_root);
4109out_free: 4209out_free:
4110 btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); 4210 btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
4111 btrfs_free_path(path); 4211 btrfs_free_path(path);
@@ -4468,10 +4568,16 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4468 unset_reloc_control(rc); 4568 unset_reloc_control(rc);
4469 4569
4470 trans = btrfs_join_transaction(rc->extent_root); 4570 trans = btrfs_join_transaction(rc->extent_root);
4471 if (IS_ERR(trans)) 4571 if (IS_ERR(trans)) {
4472 err = PTR_ERR(trans); 4572 err = PTR_ERR(trans);
4473 else 4573 goto out_free;
4474 err = btrfs_commit_transaction(trans, rc->extent_root); 4574 }
4575 err = qgroup_fix_relocated_data_extents(trans, rc);
4576 if (err < 0) {
4577 btrfs_abort_transaction(trans, err);
4578 goto out_free;
4579 }
4580 err = btrfs_commit_transaction(trans, rc->extent_root);
4475out_free: 4581out_free:
4476 kfree(rc); 4582 kfree(rc);
4477out: 4583out:
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 7fd7e1830cfe..091296062456 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -272,6 +272,23 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
272 root_key.objectid = key.offset; 272 root_key.objectid = key.offset;
273 key.offset++; 273 key.offset++;
274 274
275 /*
276 * The root might have been inserted already, as before we look
277 * for orphan roots, log replay might have happened, which
278 * triggers a transaction commit and qgroup accounting, which
279 * in turn reads and inserts fs roots while doing backref
280 * walking.
281 */
282 root = btrfs_lookup_fs_root(tree_root->fs_info,
283 root_key.objectid);
284 if (root) {
285 WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
286 &root->state));
287 if (btrfs_root_refs(&root->root_item) == 0)
288 btrfs_add_dead_root(root);
289 continue;
290 }
291
275 root = btrfs_read_fs_root(tree_root, &root_key); 292 root = btrfs_read_fs_root(tree_root, &root_key);
276 err = PTR_ERR_OR_ZERO(root); 293 err = PTR_ERR_OR_ZERO(root);
277 if (err && err != -ENOENT) { 294 if (err && err != -ENOENT) {
@@ -310,16 +327,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
310 set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); 327 set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
311 328
312 err = btrfs_insert_fs_root(root->fs_info, root); 329 err = btrfs_insert_fs_root(root->fs_info, root);
313 /*
314 * The root might have been inserted already, as before we look
315 * for orphan roots, log replay might have happened, which
316 * triggers a transaction commit and qgroup accounting, which
317 * in turn reads and inserts fs roots while doing backref
318 * walking.
319 */
320 if (err == -EEXIST)
321 err = 0;
322 if (err) { 330 if (err) {
331 BUG_ON(err == -EEXIST);
323 btrfs_free_fs_root(root); 332 btrfs_free_fs_root(root);
324 break; 333 break;
325 } 334 }
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index b71dd298385c..efe129fe2678 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -231,7 +231,6 @@ struct pending_dir_move {
231 u64 parent_ino; 231 u64 parent_ino;
232 u64 ino; 232 u64 ino;
233 u64 gen; 233 u64 gen;
234 bool is_orphan;
235 struct list_head update_refs; 234 struct list_head update_refs;
236}; 235};
237 236
@@ -274,6 +273,39 @@ struct name_cache_entry {
274 char name[]; 273 char name[];
275}; 274};
276 275
276static void inconsistent_snapshot_error(struct send_ctx *sctx,
277 enum btrfs_compare_tree_result result,
278 const char *what)
279{
280 const char *result_string;
281
282 switch (result) {
283 case BTRFS_COMPARE_TREE_NEW:
284 result_string = "new";
285 break;
286 case BTRFS_COMPARE_TREE_DELETED:
287 result_string = "deleted";
288 break;
289 case BTRFS_COMPARE_TREE_CHANGED:
290 result_string = "updated";
291 break;
292 case BTRFS_COMPARE_TREE_SAME:
293 ASSERT(0);
294 result_string = "unchanged";
295 break;
296 default:
297 ASSERT(0);
298 result_string = "unexpected";
299 }
300
301 btrfs_err(sctx->send_root->fs_info,
302 "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu",
303 result_string, what, sctx->cmp_key->objectid,
304 sctx->send_root->root_key.objectid,
305 (sctx->parent_root ?
306 sctx->parent_root->root_key.objectid : 0));
307}
308
277static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); 309static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
278 310
279static struct waiting_dir_move * 311static struct waiting_dir_move *
@@ -1861,7 +1893,8 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
1861 * was already unlinked/moved, so we can safely assume that we will not 1893 * was already unlinked/moved, so we can safely assume that we will not
1862 * overwrite anything at this point in time. 1894 * overwrite anything at this point in time.
1863 */ 1895 */
1864 if (other_inode > sctx->send_progress) { 1896 if (other_inode > sctx->send_progress ||
1897 is_waiting_for_move(sctx, other_inode)) {
1865 ret = get_inode_info(sctx->parent_root, other_inode, NULL, 1898 ret = get_inode_info(sctx->parent_root, other_inode, NULL,
1866 who_gen, NULL, NULL, NULL, NULL); 1899 who_gen, NULL, NULL, NULL, NULL);
1867 if (ret < 0) 1900 if (ret < 0)
@@ -2502,6 +2535,8 @@ verbose_printk("btrfs: send_utimes %llu\n", ino);
2502 key.type = BTRFS_INODE_ITEM_KEY; 2535 key.type = BTRFS_INODE_ITEM_KEY;
2503 key.offset = 0; 2536 key.offset = 0;
2504 ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); 2537 ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2538 if (ret > 0)
2539 ret = -ENOENT;
2505 if (ret < 0) 2540 if (ret < 0)
2506 goto out; 2541 goto out;
2507 2542
@@ -2947,6 +2982,10 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
2947 } 2982 }
2948 2983
2949 if (loc.objectid > send_progress) { 2984 if (loc.objectid > send_progress) {
2985 struct orphan_dir_info *odi;
2986
2987 odi = get_orphan_dir_info(sctx, dir);
2988 free_orphan_dir_info(sctx, odi);
2950 ret = 0; 2989 ret = 0;
2951 goto out; 2990 goto out;
2952 } 2991 }
@@ -3047,7 +3086,6 @@ static int add_pending_dir_move(struct send_ctx *sctx,
3047 pm->parent_ino = parent_ino; 3086 pm->parent_ino = parent_ino;
3048 pm->ino = ino; 3087 pm->ino = ino;
3049 pm->gen = ino_gen; 3088 pm->gen = ino_gen;
3050 pm->is_orphan = is_orphan;
3051 INIT_LIST_HEAD(&pm->list); 3089 INIT_LIST_HEAD(&pm->list);
3052 INIT_LIST_HEAD(&pm->update_refs); 3090 INIT_LIST_HEAD(&pm->update_refs);
3053 RB_CLEAR_NODE(&pm->node); 3091 RB_CLEAR_NODE(&pm->node);
@@ -3113,6 +3151,48 @@ static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
3113 return NULL; 3151 return NULL;
3114} 3152}
3115 3153
3154static int path_loop(struct send_ctx *sctx, struct fs_path *name,
3155 u64 ino, u64 gen, u64 *ancestor_ino)
3156{
3157 int ret = 0;
3158 u64 parent_inode = 0;
3159 u64 parent_gen = 0;
3160 u64 start_ino = ino;
3161
3162 *ancestor_ino = 0;
3163 while (ino != BTRFS_FIRST_FREE_OBJECTID) {
3164 fs_path_reset(name);
3165
3166 if (is_waiting_for_rm(sctx, ino))
3167 break;
3168 if (is_waiting_for_move(sctx, ino)) {
3169 if (*ancestor_ino == 0)
3170 *ancestor_ino = ino;
3171 ret = get_first_ref(sctx->parent_root, ino,
3172 &parent_inode, &parent_gen, name);
3173 } else {
3174 ret = __get_cur_name_and_parent(sctx, ino, gen,
3175 &parent_inode,
3176 &parent_gen, name);
3177 if (ret > 0) {
3178 ret = 0;
3179 break;
3180 }
3181 }
3182 if (ret < 0)
3183 break;
3184 if (parent_inode == start_ino) {
3185 ret = 1;
3186 if (*ancestor_ino == 0)
3187 *ancestor_ino = ino;
3188 break;
3189 }
3190 ino = parent_inode;
3191 gen = parent_gen;
3192 }
3193 return ret;
3194}
3195
3116static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) 3196static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3117{ 3197{
3118 struct fs_path *from_path = NULL; 3198 struct fs_path *from_path = NULL;
@@ -3123,6 +3203,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3123 u64 parent_ino, parent_gen; 3203 u64 parent_ino, parent_gen;
3124 struct waiting_dir_move *dm = NULL; 3204 struct waiting_dir_move *dm = NULL;
3125 u64 rmdir_ino = 0; 3205 u64 rmdir_ino = 0;
3206 u64 ancestor;
3207 bool is_orphan;
3126 int ret; 3208 int ret;
3127 3209
3128 name = fs_path_alloc(); 3210 name = fs_path_alloc();
@@ -3135,9 +3217,10 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3135 dm = get_waiting_dir_move(sctx, pm->ino); 3217 dm = get_waiting_dir_move(sctx, pm->ino);
3136 ASSERT(dm); 3218 ASSERT(dm);
3137 rmdir_ino = dm->rmdir_ino; 3219 rmdir_ino = dm->rmdir_ino;
3220 is_orphan = dm->orphanized;
3138 free_waiting_dir_move(sctx, dm); 3221 free_waiting_dir_move(sctx, dm);
3139 3222
3140 if (pm->is_orphan) { 3223 if (is_orphan) {
3141 ret = gen_unique_name(sctx, pm->ino, 3224 ret = gen_unique_name(sctx, pm->ino,
3142 pm->gen, from_path); 3225 pm->gen, from_path);
3143 } else { 3226 } else {
@@ -3155,6 +3238,24 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3155 goto out; 3238 goto out;
3156 3239
3157 sctx->send_progress = sctx->cur_ino + 1; 3240 sctx->send_progress = sctx->cur_ino + 1;
3241 ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
3242 if (ret < 0)
3243 goto out;
3244 if (ret) {
3245 LIST_HEAD(deleted_refs);
3246 ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
3247 ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
3248 &pm->update_refs, &deleted_refs,
3249 is_orphan);
3250 if (ret < 0)
3251 goto out;
3252 if (rmdir_ino) {
3253 dm = get_waiting_dir_move(sctx, pm->ino);
3254 ASSERT(dm);
3255 dm->rmdir_ino = rmdir_ino;
3256 }
3257 goto out;
3258 }
3158 fs_path_reset(name); 3259 fs_path_reset(name);
3159 to_path = name; 3260 to_path = name;
3160 name = NULL; 3261 name = NULL;
@@ -3174,7 +3275,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3174 /* already deleted */ 3275 /* already deleted */
3175 goto finish; 3276 goto finish;
3176 } 3277 }
3177 ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1); 3278 ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino);
3178 if (ret < 0) 3279 if (ret < 0)
3179 goto out; 3280 goto out;
3180 if (!ret) 3281 if (!ret)
@@ -3204,8 +3305,18 @@ finish:
3204 * and old parent(s). 3305 * and old parent(s).
3205 */ 3306 */
3206 list_for_each_entry(cur, &pm->update_refs, list) { 3307 list_for_each_entry(cur, &pm->update_refs, list) {
3207 if (cur->dir == rmdir_ino) 3308 /*
3309 * The parent inode might have been deleted in the send snapshot
3310 */
3311 ret = get_inode_info(sctx->send_root, cur->dir, NULL,
3312 NULL, NULL, NULL, NULL, NULL);
3313 if (ret == -ENOENT) {
3314 ret = 0;
3208 continue; 3315 continue;
3316 }
3317 if (ret < 0)
3318 goto out;
3319
3209 ret = send_utimes(sctx, cur->dir, cur->dir_gen); 3320 ret = send_utimes(sctx, cur->dir, cur->dir_gen);
3210 if (ret < 0) 3321 if (ret < 0)
3211 goto out; 3322 goto out;
@@ -3325,6 +3436,7 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
3325 u64 left_gen; 3436 u64 left_gen;
3326 u64 right_gen; 3437 u64 right_gen;
3327 int ret = 0; 3438 int ret = 0;
3439 struct waiting_dir_move *wdm;
3328 3440
3329 if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) 3441 if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
3330 return 0; 3442 return 0;
@@ -3383,7 +3495,8 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
3383 goto out; 3495 goto out;
3384 } 3496 }
3385 3497
3386 if (is_waiting_for_move(sctx, di_key.objectid)) { 3498 wdm = get_waiting_dir_move(sctx, di_key.objectid);
3499 if (wdm && !wdm->orphanized) {
3387 ret = add_pending_dir_move(sctx, 3500 ret = add_pending_dir_move(sctx,
3388 sctx->cur_ino, 3501 sctx->cur_ino,
3389 sctx->cur_inode_gen, 3502 sctx->cur_inode_gen,
@@ -3470,7 +3583,8 @@ static int wait_for_parent_move(struct send_ctx *sctx,
3470 ret = is_ancestor(sctx->parent_root, 3583 ret = is_ancestor(sctx->parent_root,
3471 sctx->cur_ino, sctx->cur_inode_gen, 3584 sctx->cur_ino, sctx->cur_inode_gen,
3472 ino, path_before); 3585 ino, path_before);
3473 break; 3586 if (ret)
3587 break;
3474 } 3588 }
3475 3589
3476 fs_path_reset(path_before); 3590 fs_path_reset(path_before);
@@ -3643,11 +3757,26 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3643 goto out; 3757 goto out;
3644 if (ret) { 3758 if (ret) {
3645 struct name_cache_entry *nce; 3759 struct name_cache_entry *nce;
3760 struct waiting_dir_move *wdm;
3646 3761
3647 ret = orphanize_inode(sctx, ow_inode, ow_gen, 3762 ret = orphanize_inode(sctx, ow_inode, ow_gen,
3648 cur->full_path); 3763 cur->full_path);
3649 if (ret < 0) 3764 if (ret < 0)
3650 goto out; 3765 goto out;
3766
3767 /*
3768 * If ow_inode has its rename operation delayed
3769 * make sure that its orphanized name is used in
3770 * the source path when performing its rename
3771 * operation.
3772 */
3773 if (is_waiting_for_move(sctx, ow_inode)) {
3774 wdm = get_waiting_dir_move(sctx,
3775 ow_inode);
3776 ASSERT(wdm);
3777 wdm->orphanized = true;
3778 }
3779
3651 /* 3780 /*
3652 * Make sure we clear our orphanized inode's 3781 * Make sure we clear our orphanized inode's
3653 * name from the name cache. This is because the 3782 * name from the name cache. This is because the
@@ -3663,6 +3792,19 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3663 name_cache_delete(sctx, nce); 3792 name_cache_delete(sctx, nce);
3664 kfree(nce); 3793 kfree(nce);
3665 } 3794 }
3795
3796 /*
3797 * ow_inode might currently be an ancestor of
3798 * cur_ino, therefore compute valid_path (the
3799 * current path of cur_ino) again because it
3800 * might contain the pre-orphanization name of
3801 * ow_inode, which is no longer valid.
3802 */
3803 fs_path_reset(valid_path);
3804 ret = get_cur_path(sctx, sctx->cur_ino,
3805 sctx->cur_inode_gen, valid_path);
3806 if (ret < 0)
3807 goto out;
3666 } else { 3808 } else {
3667 ret = send_unlink(sctx, cur->full_path); 3809 ret = send_unlink(sctx, cur->full_path);
3668 if (ret < 0) 3810 if (ret < 0)
@@ -5602,7 +5744,10 @@ static int changed_ref(struct send_ctx *sctx,
5602{ 5744{
5603 int ret = 0; 5745 int ret = 0;
5604 5746
5605 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 5747 if (sctx->cur_ino != sctx->cmp_key->objectid) {
5748 inconsistent_snapshot_error(sctx, result, "reference");
5749 return -EIO;
5750 }
5606 5751
5607 if (!sctx->cur_inode_new_gen && 5752 if (!sctx->cur_inode_new_gen &&
5608 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) { 5753 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
@@ -5627,7 +5772,10 @@ static int changed_xattr(struct send_ctx *sctx,
5627{ 5772{
5628 int ret = 0; 5773 int ret = 0;
5629 5774
5630 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 5775 if (sctx->cur_ino != sctx->cmp_key->objectid) {
5776 inconsistent_snapshot_error(sctx, result, "xattr");
5777 return -EIO;
5778 }
5631 5779
5632 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { 5780 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
5633 if (result == BTRFS_COMPARE_TREE_NEW) 5781 if (result == BTRFS_COMPARE_TREE_NEW)
@@ -5651,7 +5799,10 @@ static int changed_extent(struct send_ctx *sctx,
5651{ 5799{
5652 int ret = 0; 5800 int ret = 0;
5653 5801
5654 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 5802 if (sctx->cur_ino != sctx->cmp_key->objectid) {
5803 inconsistent_snapshot_error(sctx, result, "extent");
5804 return -EIO;
5805 }
5655 5806
5656 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { 5807 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
5657 if (result != BTRFS_COMPARE_TREE_DELETED) 5808 if (result != BTRFS_COMPARE_TREE_DELETED)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 864ce334f696..4071fe2bd098 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2241,6 +2241,13 @@ static int btrfs_freeze(struct super_block *sb)
2241 struct btrfs_trans_handle *trans; 2241 struct btrfs_trans_handle *trans;
2242 struct btrfs_root *root = btrfs_sb(sb)->tree_root; 2242 struct btrfs_root *root = btrfs_sb(sb)->tree_root;
2243 2243
2244 root->fs_info->fs_frozen = 1;
2245 /*
2246 * We don't need a barrier here, we'll wait for any transaction that
2247 * could be in progress on other threads (and do delayed iputs that
2248 * we want to avoid on a frozen filesystem), or do the commit
2249 * ourselves.
2250 */
2244 trans = btrfs_attach_transaction_barrier(root); 2251 trans = btrfs_attach_transaction_barrier(root);
2245 if (IS_ERR(trans)) { 2252 if (IS_ERR(trans)) {
2246 /* no transaction, don't bother */ 2253 /* no transaction, don't bother */
@@ -2251,6 +2258,14 @@ static int btrfs_freeze(struct super_block *sb)
2251 return btrfs_commit_transaction(trans, root); 2258 return btrfs_commit_transaction(trans, root);
2252} 2259}
2253 2260
2261static int btrfs_unfreeze(struct super_block *sb)
2262{
2263 struct btrfs_root *root = btrfs_sb(sb)->tree_root;
2264
2265 root->fs_info->fs_frozen = 0;
2266 return 0;
2267}
2268
2254static int btrfs_show_devname(struct seq_file *m, struct dentry *root) 2269static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
2255{ 2270{
2256 struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); 2271 struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
@@ -2299,6 +2314,7 @@ static const struct super_operations btrfs_super_ops = {
2299 .statfs = btrfs_statfs, 2314 .statfs = btrfs_statfs,
2300 .remount_fs = btrfs_remount, 2315 .remount_fs = btrfs_remount,
2301 .freeze_fs = btrfs_freeze, 2316 .freeze_fs = btrfs_freeze,
2317 .unfreeze_fs = btrfs_unfreeze,
2302}; 2318};
2303 2319
2304static const struct file_operations btrfs_ctl_fops = { 2320static const struct file_operations btrfs_ctl_fops = {
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 9cca0a721961..95d41919d034 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -2278,8 +2278,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
2278 2278
2279 kmem_cache_free(btrfs_trans_handle_cachep, trans); 2279 kmem_cache_free(btrfs_trans_handle_cachep, trans);
2280 2280
2281 /*
2282 * If fs has been frozen, we can not handle delayed iputs, otherwise
2283 * it'll result in deadlock about SB_FREEZE_FS.
2284 */
2281 if (current != root->fs_info->transaction_kthread && 2285 if (current != root->fs_info->transaction_kthread &&
2282 current != root->fs_info->cleaner_kthread) 2286 current != root->fs_info->cleaner_kthread &&
2287 !root->fs_info->fs_frozen)
2283 btrfs_run_delayed_iputs(root); 2288 btrfs_run_delayed_iputs(root);
2284 2289
2285 return ret; 2290 return ret;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index d31a0c4f56be..e935035ac034 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -27,6 +27,7 @@
27#include "backref.h" 27#include "backref.h"
28#include "hash.h" 28#include "hash.h"
29#include "compression.h" 29#include "compression.h"
30#include "qgroup.h"
30 31
31/* magic values for the inode_only field in btrfs_log_inode: 32/* magic values for the inode_only field in btrfs_log_inode:
32 * 33 *
@@ -680,6 +681,21 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
680 ins.type = BTRFS_EXTENT_ITEM_KEY; 681 ins.type = BTRFS_EXTENT_ITEM_KEY;
681 offset = key->offset - btrfs_file_extent_offset(eb, item); 682 offset = key->offset - btrfs_file_extent_offset(eb, item);
682 683
684 /*
685 * Manually record dirty extent, as here we did a shallow
686 * file extent item copy and skip normal backref update,
687 * but modifying extent tree all by ourselves.
688 * So need to manually record dirty extent for qgroup,
689 * as the owner of the file extent changed from log tree
690 * (doesn't affect qgroup) to fs/file tree(affects qgroup)
691 */
692 ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info,
693 btrfs_file_extent_disk_bytenr(eb, item),
694 btrfs_file_extent_disk_num_bytes(eb, item),
695 GFP_NOFS);
696 if (ret < 0)
697 goto out;
698
683 if (ins.objectid > 0) { 699 if (ins.objectid > 0) {
684 u64 csum_start; 700 u64 csum_start;
685 u64 csum_end; 701 u64 csum_end;
@@ -2807,7 +2823,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2807 */ 2823 */
2808 mutex_unlock(&root->log_mutex); 2824 mutex_unlock(&root->log_mutex);
2809 2825
2810 btrfs_init_log_ctx(&root_log_ctx); 2826 btrfs_init_log_ctx(&root_log_ctx, NULL);
2811 2827
2812 mutex_lock(&log_root_tree->log_mutex); 2828 mutex_lock(&log_root_tree->log_mutex);
2813 atomic_inc(&log_root_tree->log_batch); 2829 atomic_inc(&log_root_tree->log_batch);
@@ -4469,7 +4485,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
4469static int btrfs_check_ref_name_override(struct extent_buffer *eb, 4485static int btrfs_check_ref_name_override(struct extent_buffer *eb,
4470 const int slot, 4486 const int slot,
4471 const struct btrfs_key *key, 4487 const struct btrfs_key *key,
4472 struct inode *inode) 4488 struct inode *inode,
4489 u64 *other_ino)
4473{ 4490{
4474 int ret; 4491 int ret;
4475 struct btrfs_path *search_path; 4492 struct btrfs_path *search_path;
@@ -4528,7 +4545,16 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
4528 search_path, parent, 4545 search_path, parent,
4529 name, this_name_len, 0); 4546 name, this_name_len, 0);
4530 if (di && !IS_ERR(di)) { 4547 if (di && !IS_ERR(di)) {
4531 ret = 1; 4548 struct btrfs_key di_key;
4549
4550 btrfs_dir_item_key_to_cpu(search_path->nodes[0],
4551 di, &di_key);
4552 if (di_key.type == BTRFS_INODE_ITEM_KEY) {
4553 ret = 1;
4554 *other_ino = di_key.objectid;
4555 } else {
4556 ret = -EAGAIN;
4557 }
4532 goto out; 4558 goto out;
4533 } else if (IS_ERR(di)) { 4559 } else if (IS_ERR(di)) {
4534 ret = PTR_ERR(di); 4560 ret = PTR_ERR(di);
@@ -4722,16 +4748,72 @@ again:
4722 if ((min_key.type == BTRFS_INODE_REF_KEY || 4748 if ((min_key.type == BTRFS_INODE_REF_KEY ||
4723 min_key.type == BTRFS_INODE_EXTREF_KEY) && 4749 min_key.type == BTRFS_INODE_EXTREF_KEY) &&
4724 BTRFS_I(inode)->generation == trans->transid) { 4750 BTRFS_I(inode)->generation == trans->transid) {
4751 u64 other_ino = 0;
4752
4725 ret = btrfs_check_ref_name_override(path->nodes[0], 4753 ret = btrfs_check_ref_name_override(path->nodes[0],
4726 path->slots[0], 4754 path->slots[0],
4727 &min_key, inode); 4755 &min_key, inode,
4756 &other_ino);
4728 if (ret < 0) { 4757 if (ret < 0) {
4729 err = ret; 4758 err = ret;
4730 goto out_unlock; 4759 goto out_unlock;
4731 } else if (ret > 0) { 4760 } else if (ret > 0 && ctx &&
4732 err = 1; 4761 other_ino != btrfs_ino(ctx->inode)) {
4733 btrfs_set_log_full_commit(root->fs_info, trans); 4762 struct btrfs_key inode_key;
4734 goto out_unlock; 4763 struct inode *other_inode;
4764
4765 if (ins_nr > 0) {
4766 ins_nr++;
4767 } else {
4768 ins_nr = 1;
4769 ins_start_slot = path->slots[0];
4770 }
4771 ret = copy_items(trans, inode, dst_path, path,
4772 &last_extent, ins_start_slot,
4773 ins_nr, inode_only,
4774 logged_isize);
4775 if (ret < 0) {
4776 err = ret;
4777 goto out_unlock;
4778 }
4779 ins_nr = 0;
4780 btrfs_release_path(path);
4781 inode_key.objectid = other_ino;
4782 inode_key.type = BTRFS_INODE_ITEM_KEY;
4783 inode_key.offset = 0;
4784 other_inode = btrfs_iget(root->fs_info->sb,
4785 &inode_key, root,
4786 NULL);
4787 /*
4788 * If the other inode that had a conflicting dir
4789 * entry was deleted in the current transaction,
4790 * we don't need to do more work nor fallback to
4791 * a transaction commit.
4792 */
4793 if (IS_ERR(other_inode) &&
4794 PTR_ERR(other_inode) == -ENOENT) {
4795 goto next_key;
4796 } else if (IS_ERR(other_inode)) {
4797 err = PTR_ERR(other_inode);
4798 goto out_unlock;
4799 }
4800 /*
4801 * We are safe logging the other inode without
4802 * acquiring its i_mutex as long as we log with
4803 * the LOG_INODE_EXISTS mode. We're safe against
4804 * concurrent renames of the other inode as well
4805 * because during a rename we pin the log and
4806 * update the log with the new name before we
4807 * unpin it.
4808 */
4809 err = btrfs_log_inode(trans, root, other_inode,
4810 LOG_INODE_EXISTS,
4811 0, LLONG_MAX, ctx);
4812 iput(other_inode);
4813 if (err)
4814 goto out_unlock;
4815 else
4816 goto next_key;
4735 } 4817 }
4736 } 4818 }
4737 4819
@@ -4799,7 +4881,7 @@ next_slot:
4799 ins_nr = 0; 4881 ins_nr = 0;
4800 } 4882 }
4801 btrfs_release_path(path); 4883 btrfs_release_path(path);
4802 4884next_key:
4803 if (min_key.offset < (u64)-1) { 4885 if (min_key.offset < (u64)-1) {
4804 min_key.offset++; 4886 min_key.offset++;
4805 } else if (min_key.type < max_key.type) { 4887 } else if (min_key.type < max_key.type) {
@@ -4993,8 +5075,12 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
4993 if (!parent || d_really_is_negative(parent) || sb != parent->d_sb) 5075 if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
4994 break; 5076 break;
4995 5077
4996 if (IS_ROOT(parent)) 5078 if (IS_ROOT(parent)) {
5079 inode = d_inode(parent);
5080 if (btrfs_must_commit_transaction(trans, inode))
5081 ret = 1;
4997 break; 5082 break;
5083 }
4998 5084
4999 parent = dget_parent(parent); 5085 parent = dget_parent(parent);
5000 dput(old_parent); 5086 dput(old_parent);
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index a9f1b75d080d..ab858e31ccbc 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -30,15 +30,18 @@ struct btrfs_log_ctx {
30 int log_transid; 30 int log_transid;
31 int io_err; 31 int io_err;
32 bool log_new_dentries; 32 bool log_new_dentries;
33 struct inode *inode;
33 struct list_head list; 34 struct list_head list;
34}; 35};
35 36
36static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) 37static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
38 struct inode *inode)
37{ 39{
38 ctx->log_ret = 0; 40 ctx->log_ret = 0;
39 ctx->log_transid = 0; 41 ctx->log_transid = 0;
40 ctx->io_err = 0; 42 ctx->io_err = 0;
41 ctx->log_new_dentries = false; 43 ctx->log_new_dentries = false;
44 ctx->inode = inode;
42 INIT_LIST_HEAD(&ctx->list); 45 INIT_LIST_HEAD(&ctx->list);
43} 46}
44 47
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 51f125508771..035efce603a9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -834,10 +834,6 @@ static void __free_device(struct work_struct *work)
834 struct btrfs_device *device; 834 struct btrfs_device *device;
835 835
836 device = container_of(work, struct btrfs_device, rcu_work); 836 device = container_of(work, struct btrfs_device, rcu_work);
837
838 if (device->bdev)
839 blkdev_put(device->bdev, device->mode);
840
841 rcu_string_free(device->name); 837 rcu_string_free(device->name);
842 kfree(device); 838 kfree(device);
843} 839}
@@ -852,6 +848,17 @@ static void free_device(struct rcu_head *head)
852 schedule_work(&device->rcu_work); 848 schedule_work(&device->rcu_work);
853} 849}
854 850
851static void btrfs_close_bdev(struct btrfs_device *device)
852{
853 if (device->bdev && device->writeable) {
854 sync_blockdev(device->bdev);
855 invalidate_bdev(device->bdev);
856 }
857
858 if (device->bdev)
859 blkdev_put(device->bdev, device->mode);
860}
861
855static void btrfs_close_one_device(struct btrfs_device *device) 862static void btrfs_close_one_device(struct btrfs_device *device)
856{ 863{
857 struct btrfs_fs_devices *fs_devices = device->fs_devices; 864 struct btrfs_fs_devices *fs_devices = device->fs_devices;
@@ -870,10 +877,7 @@ static void btrfs_close_one_device(struct btrfs_device *device)
870 if (device->missing) 877 if (device->missing)
871 fs_devices->missing_devices--; 878 fs_devices->missing_devices--;
872 879
873 if (device->bdev && device->writeable) { 880 btrfs_close_bdev(device);
874 sync_blockdev(device->bdev);
875 invalidate_bdev(device->bdev);
876 }
877 881
878 new_device = btrfs_alloc_device(NULL, &device->devid, 882 new_device = btrfs_alloc_device(NULL, &device->devid,
879 device->uuid); 883 device->uuid);
@@ -1932,6 +1936,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path, u64 devid)
1932 btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device); 1936 btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device);
1933 } 1937 }
1934 1938
1939 btrfs_close_bdev(device);
1940
1935 call_rcu(&device->rcu, free_device); 1941 call_rcu(&device->rcu, free_device);
1936 1942
1937 num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; 1943 num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
@@ -2025,6 +2031,9 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
2025 /* zero out the old super if it is writable */ 2031 /* zero out the old super if it is writable */
2026 btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str); 2032 btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
2027 } 2033 }
2034
2035 btrfs_close_bdev(srcdev);
2036
2028 call_rcu(&srcdev->rcu, free_device); 2037 call_rcu(&srcdev->rcu, free_device);
2029 2038
2030 /* 2039 /*
@@ -2080,6 +2089,8 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
2080 * the device_list_mutex lock. 2089 * the device_list_mutex lock.
2081 */ 2090 */
2082 btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); 2091 btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
2092
2093 btrfs_close_bdev(tgtdev);
2083 call_rcu(&tgtdev->rcu, free_device); 2094 call_rcu(&tgtdev->rcu, free_device);
2084} 2095}
2085 2096
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 99115cae1652..16e6ded0b7f2 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1347,9 +1347,12 @@ void ceph_flush_snaps(struct ceph_inode_info *ci,
1347{ 1347{
1348 struct inode *inode = &ci->vfs_inode; 1348 struct inode *inode = &ci->vfs_inode;
1349 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; 1349 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
1350 struct ceph_mds_session *session = *psession; 1350 struct ceph_mds_session *session = NULL;
1351 int mds; 1351 int mds;
1352
1352 dout("ceph_flush_snaps %p\n", inode); 1353 dout("ceph_flush_snaps %p\n", inode);
1354 if (psession)
1355 session = *psession;
1353retry: 1356retry:
1354 spin_lock(&ci->i_ceph_lock); 1357 spin_lock(&ci->i_ceph_lock);
1355 if (!(ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)) { 1358 if (!(ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index fa59a85226b2..f72d4ae303b2 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2759,6 +2759,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2759 } else { 2759 } else {
2760 path = NULL; 2760 path = NULL;
2761 pathlen = 0; 2761 pathlen = 0;
2762 pathbase = 0;
2762 } 2763 }
2763 2764
2764 spin_lock(&ci->i_ceph_lock); 2765 spin_lock(&ci->i_ceph_lock);
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index eea64912c9c0..466f7d60edc2 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -607,20 +607,54 @@ static const struct file_operations format2_fops;
607static const struct file_operations format3_fops; 607static const struct file_operations format3_fops;
608static const struct file_operations format4_fops; 608static const struct file_operations format4_fops;
609 609
610static int table_open(struct inode *inode, struct file *file) 610static int table_open1(struct inode *inode, struct file *file)
611{ 611{
612 struct seq_file *seq; 612 struct seq_file *seq;
613 int ret = -1; 613 int ret;
614 614
615 if (file->f_op == &format1_fops) 615 ret = seq_open(file, &format1_seq_ops);
616 ret = seq_open(file, &format1_seq_ops); 616 if (ret)
617 else if (file->f_op == &format2_fops) 617 return ret;
618 ret = seq_open(file, &format2_seq_ops); 618
619 else if (file->f_op == &format3_fops) 619 seq = file->private_data;
620 ret = seq_open(file, &format3_seq_ops); 620 seq->private = inode->i_private; /* the dlm_ls */
621 else if (file->f_op == &format4_fops) 621 return 0;
622 ret = seq_open(file, &format4_seq_ops); 622}
623
624static int table_open2(struct inode *inode, struct file *file)
625{
626 struct seq_file *seq;
627 int ret;
628
629 ret = seq_open(file, &format2_seq_ops);
630 if (ret)
631 return ret;
632
633 seq = file->private_data;
634 seq->private = inode->i_private; /* the dlm_ls */
635 return 0;
636}
637
638static int table_open3(struct inode *inode, struct file *file)
639{
640 struct seq_file *seq;
641 int ret;
642
643 ret = seq_open(file, &format3_seq_ops);
644 if (ret)
645 return ret;
646
647 seq = file->private_data;
648 seq->private = inode->i_private; /* the dlm_ls */
649 return 0;
650}
651
652static int table_open4(struct inode *inode, struct file *file)
653{
654 struct seq_file *seq;
655 int ret;
623 656
657 ret = seq_open(file, &format4_seq_ops);
624 if (ret) 658 if (ret)
625 return ret; 659 return ret;
626 660
@@ -631,7 +665,7 @@ static int table_open(struct inode *inode, struct file *file)
631 665
632static const struct file_operations format1_fops = { 666static const struct file_operations format1_fops = {
633 .owner = THIS_MODULE, 667 .owner = THIS_MODULE,
634 .open = table_open, 668 .open = table_open1,
635 .read = seq_read, 669 .read = seq_read,
636 .llseek = seq_lseek, 670 .llseek = seq_lseek,
637 .release = seq_release 671 .release = seq_release
@@ -639,7 +673,7 @@ static const struct file_operations format1_fops = {
639 673
640static const struct file_operations format2_fops = { 674static const struct file_operations format2_fops = {
641 .owner = THIS_MODULE, 675 .owner = THIS_MODULE,
642 .open = table_open, 676 .open = table_open2,
643 .read = seq_read, 677 .read = seq_read,
644 .llseek = seq_lseek, 678 .llseek = seq_lseek,
645 .release = seq_release 679 .release = seq_release
@@ -647,7 +681,7 @@ static const struct file_operations format2_fops = {
647 681
648static const struct file_operations format3_fops = { 682static const struct file_operations format3_fops = {
649 .owner = THIS_MODULE, 683 .owner = THIS_MODULE,
650 .open = table_open, 684 .open = table_open3,
651 .read = seq_read, 685 .read = seq_read,
652 .llseek = seq_lseek, 686 .llseek = seq_lseek,
653 .release = seq_release 687 .release = seq_release
@@ -655,7 +689,7 @@ static const struct file_operations format3_fops = {
655 689
656static const struct file_operations format4_fops = { 690static const struct file_operations format4_fops = {
657 .owner = THIS_MODULE, 691 .owner = THIS_MODULE,
658 .open = table_open, 692 .open = table_open4,
659 .read = seq_read, 693 .read = seq_read,
660 .llseek = seq_lseek, 694 .llseek = seq_lseek,
661 .release = seq_release 695 .release = seq_release
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index d64d2a515cb2..ccb401eebc11 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1699,11 +1699,11 @@ static int f2fs_write_end(struct file *file,
1699 trace_f2fs_write_end(inode, pos, len, copied); 1699 trace_f2fs_write_end(inode, pos, len, copied);
1700 1700
1701 set_page_dirty(page); 1701 set_page_dirty(page);
1702 f2fs_put_page(page, 1);
1703 1702
1704 if (pos + copied > i_size_read(inode)) 1703 if (pos + copied > i_size_read(inode))
1705 f2fs_i_size_write(inode, pos + copied); 1704 f2fs_i_size_write(inode, pos + copied);
1706 1705
1706 f2fs_put_page(page, 1);
1707 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 1707 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1708 return copied; 1708 return copied;
1709} 1709}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 675fa79d86f6..14f5fe2b841e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -538,7 +538,7 @@ struct f2fs_nm_info {
538 /* NAT cache management */ 538 /* NAT cache management */
539 struct radix_tree_root nat_root;/* root of the nat entry cache */ 539 struct radix_tree_root nat_root;/* root of the nat entry cache */
540 struct radix_tree_root nat_set_root;/* root of the nat set cache */ 540 struct radix_tree_root nat_set_root;/* root of the nat set cache */
541 struct percpu_rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ 541 struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */
542 struct list_head nat_entries; /* cached nat entry list (clean) */ 542 struct list_head nat_entries; /* cached nat entry list (clean) */
543 unsigned int nat_cnt; /* the # of cached nat entries */ 543 unsigned int nat_cnt; /* the # of cached nat entries */
544 unsigned int dirty_nat_cnt; /* total num of nat entries in set */ 544 unsigned int dirty_nat_cnt; /* total num of nat entries in set */
@@ -787,7 +787,7 @@ struct f2fs_sb_info {
787 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ 787 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
788 struct inode *meta_inode; /* cache meta blocks */ 788 struct inode *meta_inode; /* cache meta blocks */
789 struct mutex cp_mutex; /* checkpoint procedure lock */ 789 struct mutex cp_mutex; /* checkpoint procedure lock */
790 struct percpu_rw_semaphore cp_rwsem; /* blocking FS operations */ 790 struct rw_semaphore cp_rwsem; /* blocking FS operations */
791 struct rw_semaphore node_write; /* locking node writes */ 791 struct rw_semaphore node_write; /* locking node writes */
792 wait_queue_head_t cp_wait; 792 wait_queue_head_t cp_wait;
793 unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ 793 unsigned long last_time[MAX_TIME]; /* to store time in jiffies */
@@ -1074,22 +1074,22 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
1074 1074
1075static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) 1075static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
1076{ 1076{
1077 percpu_down_read(&sbi->cp_rwsem); 1077 down_read(&sbi->cp_rwsem);
1078} 1078}
1079 1079
1080static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi) 1080static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
1081{ 1081{
1082 percpu_up_read(&sbi->cp_rwsem); 1082 up_read(&sbi->cp_rwsem);
1083} 1083}
1084 1084
1085static inline void f2fs_lock_all(struct f2fs_sb_info *sbi) 1085static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
1086{ 1086{
1087 percpu_down_write(&sbi->cp_rwsem); 1087 down_write(&sbi->cp_rwsem);
1088} 1088}
1089 1089
1090static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) 1090static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
1091{ 1091{
1092 percpu_up_write(&sbi->cp_rwsem); 1092 up_write(&sbi->cp_rwsem);
1093} 1093}
1094 1094
1095static inline int __get_cp_reason(struct f2fs_sb_info *sbi) 1095static inline int __get_cp_reason(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 0e493f63ea41..47abb96098e4 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2086,15 +2086,19 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
2086 if (unlikely(f2fs_readonly(src->i_sb))) 2086 if (unlikely(f2fs_readonly(src->i_sb)))
2087 return -EROFS; 2087 return -EROFS;
2088 2088
2089 if (S_ISDIR(src->i_mode) || S_ISDIR(dst->i_mode)) 2089 if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode))
2090 return -EISDIR; 2090 return -EINVAL;
2091 2091
2092 if (f2fs_encrypted_inode(src) || f2fs_encrypted_inode(dst)) 2092 if (f2fs_encrypted_inode(src) || f2fs_encrypted_inode(dst))
2093 return -EOPNOTSUPP; 2093 return -EOPNOTSUPP;
2094 2094
2095 inode_lock(src); 2095 inode_lock(src);
2096 if (src != dst) 2096 if (src != dst) {
2097 inode_lock(dst); 2097 if (!inode_trylock(dst)) {
2098 ret = -EBUSY;
2099 goto out;
2100 }
2101 }
2098 2102
2099 ret = -EINVAL; 2103 ret = -EINVAL;
2100 if (pos_in + len > src->i_size || pos_in + len < pos_in) 2104 if (pos_in + len > src->i_size || pos_in + len < pos_in)
@@ -2152,6 +2156,7 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
2152out_unlock: 2156out_unlock:
2153 if (src != dst) 2157 if (src != dst)
2154 inode_unlock(dst); 2158 inode_unlock(dst);
2159out:
2155 inode_unlock(src); 2160 inode_unlock(src);
2156 return ret; 2161 return ret;
2157} 2162}
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b2fa4b615925..f75d197d5beb 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -206,14 +206,14 @@ int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
206 struct nat_entry *e; 206 struct nat_entry *e;
207 bool need = false; 207 bool need = false;
208 208
209 percpu_down_read(&nm_i->nat_tree_lock); 209 down_read(&nm_i->nat_tree_lock);
210 e = __lookup_nat_cache(nm_i, nid); 210 e = __lookup_nat_cache(nm_i, nid);
211 if (e) { 211 if (e) {
212 if (!get_nat_flag(e, IS_CHECKPOINTED) && 212 if (!get_nat_flag(e, IS_CHECKPOINTED) &&
213 !get_nat_flag(e, HAS_FSYNCED_INODE)) 213 !get_nat_flag(e, HAS_FSYNCED_INODE))
214 need = true; 214 need = true;
215 } 215 }
216 percpu_up_read(&nm_i->nat_tree_lock); 216 up_read(&nm_i->nat_tree_lock);
217 return need; 217 return need;
218} 218}
219 219
@@ -223,11 +223,11 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
223 struct nat_entry *e; 223 struct nat_entry *e;
224 bool is_cp = true; 224 bool is_cp = true;
225 225
226 percpu_down_read(&nm_i->nat_tree_lock); 226 down_read(&nm_i->nat_tree_lock);
227 e = __lookup_nat_cache(nm_i, nid); 227 e = __lookup_nat_cache(nm_i, nid);
228 if (e && !get_nat_flag(e, IS_CHECKPOINTED)) 228 if (e && !get_nat_flag(e, IS_CHECKPOINTED))
229 is_cp = false; 229 is_cp = false;
230 percpu_up_read(&nm_i->nat_tree_lock); 230 up_read(&nm_i->nat_tree_lock);
231 return is_cp; 231 return is_cp;
232} 232}
233 233
@@ -237,13 +237,13 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
237 struct nat_entry *e; 237 struct nat_entry *e;
238 bool need_update = true; 238 bool need_update = true;
239 239
240 percpu_down_read(&nm_i->nat_tree_lock); 240 down_read(&nm_i->nat_tree_lock);
241 e = __lookup_nat_cache(nm_i, ino); 241 e = __lookup_nat_cache(nm_i, ino);
242 if (e && get_nat_flag(e, HAS_LAST_FSYNC) && 242 if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
243 (get_nat_flag(e, IS_CHECKPOINTED) || 243 (get_nat_flag(e, IS_CHECKPOINTED) ||
244 get_nat_flag(e, HAS_FSYNCED_INODE))) 244 get_nat_flag(e, HAS_FSYNCED_INODE)))
245 need_update = false; 245 need_update = false;
246 percpu_up_read(&nm_i->nat_tree_lock); 246 up_read(&nm_i->nat_tree_lock);
247 return need_update; 247 return need_update;
248} 248}
249 249
@@ -284,7 +284,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
284 struct f2fs_nm_info *nm_i = NM_I(sbi); 284 struct f2fs_nm_info *nm_i = NM_I(sbi);
285 struct nat_entry *e; 285 struct nat_entry *e;
286 286
287 percpu_down_write(&nm_i->nat_tree_lock); 287 down_write(&nm_i->nat_tree_lock);
288 e = __lookup_nat_cache(nm_i, ni->nid); 288 e = __lookup_nat_cache(nm_i, ni->nid);
289 if (!e) { 289 if (!e) {
290 e = grab_nat_entry(nm_i, ni->nid); 290 e = grab_nat_entry(nm_i, ni->nid);
@@ -334,7 +334,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
334 set_nat_flag(e, HAS_FSYNCED_INODE, true); 334 set_nat_flag(e, HAS_FSYNCED_INODE, true);
335 set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); 335 set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
336 } 336 }
337 percpu_up_write(&nm_i->nat_tree_lock); 337 up_write(&nm_i->nat_tree_lock);
338} 338}
339 339
340int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) 340int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -342,7 +342,8 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
342 struct f2fs_nm_info *nm_i = NM_I(sbi); 342 struct f2fs_nm_info *nm_i = NM_I(sbi);
343 int nr = nr_shrink; 343 int nr = nr_shrink;
344 344
345 percpu_down_write(&nm_i->nat_tree_lock); 345 if (!down_write_trylock(&nm_i->nat_tree_lock))
346 return 0;
346 347
347 while (nr_shrink && !list_empty(&nm_i->nat_entries)) { 348 while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
348 struct nat_entry *ne; 349 struct nat_entry *ne;
@@ -351,7 +352,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
351 __del_from_nat_cache(nm_i, ne); 352 __del_from_nat_cache(nm_i, ne);
352 nr_shrink--; 353 nr_shrink--;
353 } 354 }
354 percpu_up_write(&nm_i->nat_tree_lock); 355 up_write(&nm_i->nat_tree_lock);
355 return nr - nr_shrink; 356 return nr - nr_shrink;
356} 357}
357 358
@@ -373,13 +374,13 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
373 ni->nid = nid; 374 ni->nid = nid;
374 375
375 /* Check nat cache */ 376 /* Check nat cache */
376 percpu_down_read(&nm_i->nat_tree_lock); 377 down_read(&nm_i->nat_tree_lock);
377 e = __lookup_nat_cache(nm_i, nid); 378 e = __lookup_nat_cache(nm_i, nid);
378 if (e) { 379 if (e) {
379 ni->ino = nat_get_ino(e); 380 ni->ino = nat_get_ino(e);
380 ni->blk_addr = nat_get_blkaddr(e); 381 ni->blk_addr = nat_get_blkaddr(e);
381 ni->version = nat_get_version(e); 382 ni->version = nat_get_version(e);
382 percpu_up_read(&nm_i->nat_tree_lock); 383 up_read(&nm_i->nat_tree_lock);
383 return; 384 return;
384 } 385 }
385 386
@@ -403,11 +404,11 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
403 node_info_from_raw_nat(ni, &ne); 404 node_info_from_raw_nat(ni, &ne);
404 f2fs_put_page(page, 1); 405 f2fs_put_page(page, 1);
405cache: 406cache:
406 percpu_up_read(&nm_i->nat_tree_lock); 407 up_read(&nm_i->nat_tree_lock);
407 /* cache nat entry */ 408 /* cache nat entry */
408 percpu_down_write(&nm_i->nat_tree_lock); 409 down_write(&nm_i->nat_tree_lock);
409 cache_nat_entry(sbi, nid, &ne); 410 cache_nat_entry(sbi, nid, &ne);
410 percpu_up_write(&nm_i->nat_tree_lock); 411 up_write(&nm_i->nat_tree_lock);
411} 412}
412 413
413/* 414/*
@@ -1788,7 +1789,7 @@ void build_free_nids(struct f2fs_sb_info *sbi)
1788 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, 1789 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
1789 META_NAT, true); 1790 META_NAT, true);
1790 1791
1791 percpu_down_read(&nm_i->nat_tree_lock); 1792 down_read(&nm_i->nat_tree_lock);
1792 1793
1793 while (1) { 1794 while (1) {
1794 struct page *page = get_current_nat_page(sbi, nid); 1795 struct page *page = get_current_nat_page(sbi, nid);
@@ -1820,7 +1821,7 @@ void build_free_nids(struct f2fs_sb_info *sbi)
1820 remove_free_nid(nm_i, nid); 1821 remove_free_nid(nm_i, nid);
1821 } 1822 }
1822 up_read(&curseg->journal_rwsem); 1823 up_read(&curseg->journal_rwsem);
1823 percpu_up_read(&nm_i->nat_tree_lock); 1824 up_read(&nm_i->nat_tree_lock);
1824 1825
1825 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), 1826 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
1826 nm_i->ra_nid_pages, META_NAT, false); 1827 nm_i->ra_nid_pages, META_NAT, false);
@@ -2209,7 +2210,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
2209 if (!nm_i->dirty_nat_cnt) 2210 if (!nm_i->dirty_nat_cnt)
2210 return; 2211 return;
2211 2212
2212 percpu_down_write(&nm_i->nat_tree_lock); 2213 down_write(&nm_i->nat_tree_lock);
2213 2214
2214 /* 2215 /*
2215 * if there are no enough space in journal to store dirty nat 2216 * if there are no enough space in journal to store dirty nat
@@ -2232,7 +2233,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
2232 list_for_each_entry_safe(set, tmp, &sets, set_list) 2233 list_for_each_entry_safe(set, tmp, &sets, set_list)
2233 __flush_nat_entry_set(sbi, set); 2234 __flush_nat_entry_set(sbi, set);
2234 2235
2235 percpu_up_write(&nm_i->nat_tree_lock); 2236 up_write(&nm_i->nat_tree_lock);
2236 2237
2237 f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); 2238 f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
2238} 2239}
@@ -2268,8 +2269,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
2268 2269
2269 mutex_init(&nm_i->build_lock); 2270 mutex_init(&nm_i->build_lock);
2270 spin_lock_init(&nm_i->free_nid_list_lock); 2271 spin_lock_init(&nm_i->free_nid_list_lock);
2271 if (percpu_init_rwsem(&nm_i->nat_tree_lock)) 2272 init_rwsem(&nm_i->nat_tree_lock);
2272 return -ENOMEM;
2273 2273
2274 nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); 2274 nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
2275 nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); 2275 nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
@@ -2326,7 +2326,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
2326 spin_unlock(&nm_i->free_nid_list_lock); 2326 spin_unlock(&nm_i->free_nid_list_lock);
2327 2327
2328 /* destroy nat cache */ 2328 /* destroy nat cache */
2329 percpu_down_write(&nm_i->nat_tree_lock); 2329 down_write(&nm_i->nat_tree_lock);
2330 while ((found = __gang_lookup_nat_cache(nm_i, 2330 while ((found = __gang_lookup_nat_cache(nm_i,
2331 nid, NATVEC_SIZE, natvec))) { 2331 nid, NATVEC_SIZE, natvec))) {
2332 unsigned idx; 2332 unsigned idx;
@@ -2351,9 +2351,8 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
2351 kmem_cache_free(nat_entry_set_slab, setvec[idx]); 2351 kmem_cache_free(nat_entry_set_slab, setvec[idx]);
2352 } 2352 }
2353 } 2353 }
2354 percpu_up_write(&nm_i->nat_tree_lock); 2354 up_write(&nm_i->nat_tree_lock);
2355 2355
2356 percpu_free_rwsem(&nm_i->nat_tree_lock);
2357 kfree(nm_i->nat_bitmap); 2356 kfree(nm_i->nat_bitmap);
2358 sbi->nm_info = NULL; 2357 sbi->nm_info = NULL;
2359 kfree(nm_i); 2358 kfree(nm_i);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 1b86d3f638ef..7f863a645ab1 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -706,8 +706,6 @@ static void destroy_percpu_info(struct f2fs_sb_info *sbi)
706 percpu_counter_destroy(&sbi->nr_pages[i]); 706 percpu_counter_destroy(&sbi->nr_pages[i]);
707 percpu_counter_destroy(&sbi->alloc_valid_block_count); 707 percpu_counter_destroy(&sbi->alloc_valid_block_count);
708 percpu_counter_destroy(&sbi->total_valid_inode_count); 708 percpu_counter_destroy(&sbi->total_valid_inode_count);
709
710 percpu_free_rwsem(&sbi->cp_rwsem);
711} 709}
712 710
713static void f2fs_put_super(struct super_block *sb) 711static void f2fs_put_super(struct super_block *sb)
@@ -1483,9 +1481,6 @@ static int init_percpu_info(struct f2fs_sb_info *sbi)
1483{ 1481{
1484 int i, err; 1482 int i, err;
1485 1483
1486 if (percpu_init_rwsem(&sbi->cp_rwsem))
1487 return -ENOMEM;
1488
1489 for (i = 0; i < NR_COUNT_TYPE; i++) { 1484 for (i = 0; i < NR_COUNT_TYPE; i++) {
1490 err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL); 1485 err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL);
1491 if (err) 1486 if (err)
@@ -1686,6 +1681,7 @@ try_onemore:
1686 sbi->write_io[i].bio = NULL; 1681 sbi->write_io[i].bio = NULL;
1687 } 1682 }
1688 1683
1684 init_rwsem(&sbi->cp_rwsem);
1689 init_waitqueue_head(&sbi->cp_wait); 1685 init_waitqueue_head(&sbi->cp_wait);
1690 init_sb_info(sbi); 1686 init_sb_info(sbi);
1691 1687
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 4d09d4441e3e..05713a5da083 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1949,6 +1949,12 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
1949{ 1949{
1950 struct backing_dev_info *bdi; 1950 struct backing_dev_info *bdi;
1951 1951
1952 /*
1953 * If we are expecting writeback progress we must submit plugged IO.
1954 */
1955 if (blk_needs_flush_plug(current))
1956 blk_schedule_flush_plug(current);
1957
1952 if (!nr_pages) 1958 if (!nr_pages)
1953 nr_pages = get_nr_dirty_pages(); 1959 nr_pages = get_nr_dirty_pages();
1954 1960
diff --git a/fs/iomap.c b/fs/iomap.c
index 48141b8eff5f..0342254646e3 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -84,8 +84,11 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
84 * Now the data has been copied, commit the range we've copied. This 84 * Now the data has been copied, commit the range we've copied. This
85 * should not fail unless the filesystem has had a fatal error. 85 * should not fail unless the filesystem has had a fatal error.
86 */ 86 */
87 ret = ops->iomap_end(inode, pos, length, written > 0 ? written : 0, 87 if (ops->iomap_end) {
88 flags, &iomap); 88 ret = ops->iomap_end(inode, pos, length,
89 written > 0 ? written : 0,
90 flags, &iomap);
91 }
89 92
90 return written ? written : ret; 93 return written ? written : ret;
91} 94}
@@ -194,12 +197,9 @@ again:
194 if (mapping_writably_mapped(inode->i_mapping)) 197 if (mapping_writably_mapped(inode->i_mapping))
195 flush_dcache_page(page); 198 flush_dcache_page(page);
196 199
197 pagefault_disable();
198 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); 200 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
199 pagefault_enable();
200 201
201 flush_dcache_page(page); 202 flush_dcache_page(page);
202 mark_page_accessed(page);
203 203
204 status = iomap_write_end(inode, pos, bytes, copied, page); 204 status = iomap_write_end(inode, pos, bytes, copied, page);
205 if (unlikely(status < 0)) 205 if (unlikely(status < 0))
@@ -470,13 +470,18 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
470 if (ret) 470 if (ret)
471 return ret; 471 return ret;
472 472
473 ret = filemap_write_and_wait(inode->i_mapping); 473 if (fi->fi_flags & FIEMAP_FLAG_SYNC) {
474 if (ret) 474 ret = filemap_write_and_wait(inode->i_mapping);
475 return ret; 475 if (ret)
476 return ret;
477 }
476 478
477 while (len > 0) { 479 while (len > 0) {
478 ret = iomap_apply(inode, start, len, 0, ops, &ctx, 480 ret = iomap_apply(inode, start, len, 0, ops, &ctx,
479 iomap_fiemap_actor); 481 iomap_fiemap_actor);
482 /* inode with no (attribute) mapping will give ENOENT */
483 if (ret == -ENOENT)
484 break;
480 if (ret < 0) 485 if (ret < 0)
481 return ret; 486 return ret;
482 if (ret == 0) 487 if (ret == 0)
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 33da841a21bb..6f4752734804 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -338,6 +338,8 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
338 case 0: 338 case 0:
339 break; 339 break;
340 case -NFS4ERR_EXPIRED: 340 case -NFS4ERR_EXPIRED:
341 case -NFS4ERR_ADMIN_REVOKED:
342 case -NFS4ERR_DELEG_REVOKED:
341 case -NFS4ERR_STALE_STATEID: 343 case -NFS4ERR_STALE_STATEID:
342 case -NFS4ERR_OLD_STATEID: 344 case -NFS4ERR_OLD_STATEID:
343 case -NFS4ERR_BAD_STATEID: 345 case -NFS4ERR_BAD_STATEID:
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 324bfdc21250..9bf64eacba5b 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -396,6 +396,10 @@ extern void nfs4_schedule_state_renewal(struct nfs_client *);
396extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); 396extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
397extern void nfs4_kill_renewd(struct nfs_client *); 397extern void nfs4_kill_renewd(struct nfs_client *);
398extern void nfs4_renew_state(struct work_struct *); 398extern void nfs4_renew_state(struct work_struct *);
399extern void nfs4_set_lease_period(struct nfs_client *clp,
400 unsigned long lease,
401 unsigned long lastrenewed);
402
399 403
400/* nfs4state.c */ 404/* nfs4state.c */
401struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp); 405struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a036e93bdf96..1949bbd806eb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4237,12 +4237,9 @@ static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, str
4237 err = _nfs4_do_fsinfo(server, fhandle, fsinfo); 4237 err = _nfs4_do_fsinfo(server, fhandle, fsinfo);
4238 trace_nfs4_fsinfo(server, fhandle, fsinfo->fattr, err); 4238 trace_nfs4_fsinfo(server, fhandle, fsinfo->fattr, err);
4239 if (err == 0) { 4239 if (err == 0) {
4240 struct nfs_client *clp = server->nfs_client; 4240 nfs4_set_lease_period(server->nfs_client,
4241 4241 fsinfo->lease_time * HZ,
4242 spin_lock(&clp->cl_lock); 4242 now);
4243 clp->cl_lease_time = fsinfo->lease_time * HZ;
4244 clp->cl_last_renewal = now;
4245 spin_unlock(&clp->cl_lock);
4246 break; 4243 break;
4247 } 4244 }
4248 err = nfs4_handle_exception(server, err, &exception); 4245 err = nfs4_handle_exception(server, err, &exception);
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index e1ba58c3d1ad..82e77198d17e 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -136,6 +136,26 @@ nfs4_kill_renewd(struct nfs_client *clp)
136 cancel_delayed_work_sync(&clp->cl_renewd); 136 cancel_delayed_work_sync(&clp->cl_renewd);
137} 137}
138 138
139/**
140 * nfs4_set_lease_period - Sets the lease period on a nfs_client
141 *
142 * @clp: pointer to nfs_client
143 * @lease: new value for lease period
144 * @lastrenewed: time at which lease was last renewed
145 */
146void nfs4_set_lease_period(struct nfs_client *clp,
147 unsigned long lease,
148 unsigned long lastrenewed)
149{
150 spin_lock(&clp->cl_lock);
151 clp->cl_lease_time = lease;
152 clp->cl_last_renewal = lastrenewed;
153 spin_unlock(&clp->cl_lock);
154
155 /* Cap maximum reconnect timeout at 1/2 lease period */
156 rpc_cap_max_reconnect_timeout(clp->cl_rpcclient, lease >> 1);
157}
158
139/* 159/*
140 * Local variables: 160 * Local variables:
141 * c-basic-offset: 8 161 * c-basic-offset: 8
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 834b875900d6..cada00aa5096 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -277,20 +277,17 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
277{ 277{
278 int status; 278 int status;
279 struct nfs_fsinfo fsinfo; 279 struct nfs_fsinfo fsinfo;
280 unsigned long now;
280 281
281 if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) { 282 if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) {
282 nfs4_schedule_state_renewal(clp); 283 nfs4_schedule_state_renewal(clp);
283 return 0; 284 return 0;
284 } 285 }
285 286
287 now = jiffies;
286 status = nfs4_proc_get_lease_time(clp, &fsinfo); 288 status = nfs4_proc_get_lease_time(clp, &fsinfo);
287 if (status == 0) { 289 if (status == 0) {
288 /* Update lease time and schedule renewal */ 290 nfs4_set_lease_period(clp, fsinfo.lease_time * HZ, now);
289 spin_lock(&clp->cl_lock);
290 clp->cl_lease_time = fsinfo.lease_time * HZ;
291 clp->cl_last_renewal = jiffies;
292 spin_unlock(&clp->cl_lock);
293
294 nfs4_schedule_state_renewal(clp); 291 nfs4_schedule_state_renewal(clp);
295 } 292 }
296 293
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8410ca275db1..a204d7e109d4 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4903,6 +4903,32 @@ nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4903 return nfs_ok; 4903 return nfs_ok;
4904} 4904}
4905 4905
4906static __be32
4907nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s)
4908{
4909 struct nfs4_ol_stateid *stp = openlockstateid(s);
4910 __be32 ret;
4911
4912 mutex_lock(&stp->st_mutex);
4913
4914 ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
4915 if (ret)
4916 goto out;
4917
4918 ret = nfserr_locks_held;
4919 if (check_for_locks(stp->st_stid.sc_file,
4920 lockowner(stp->st_stateowner)))
4921 goto out;
4922
4923 release_lock_stateid(stp);
4924 ret = nfs_ok;
4925
4926out:
4927 mutex_unlock(&stp->st_mutex);
4928 nfs4_put_stid(s);
4929 return ret;
4930}
4931
4906__be32 4932__be32
4907nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 4933nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4908 struct nfsd4_free_stateid *free_stateid) 4934 struct nfsd4_free_stateid *free_stateid)
@@ -4910,7 +4936,6 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4910 stateid_t *stateid = &free_stateid->fr_stateid; 4936 stateid_t *stateid = &free_stateid->fr_stateid;
4911 struct nfs4_stid *s; 4937 struct nfs4_stid *s;
4912 struct nfs4_delegation *dp; 4938 struct nfs4_delegation *dp;
4913 struct nfs4_ol_stateid *stp;
4914 struct nfs4_client *cl = cstate->session->se_client; 4939 struct nfs4_client *cl = cstate->session->se_client;
4915 __be32 ret = nfserr_bad_stateid; 4940 __be32 ret = nfserr_bad_stateid;
4916 4941
@@ -4929,18 +4954,9 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4929 ret = nfserr_locks_held; 4954 ret = nfserr_locks_held;
4930 break; 4955 break;
4931 case NFS4_LOCK_STID: 4956 case NFS4_LOCK_STID:
4932 ret = check_stateid_generation(stateid, &s->sc_stateid, 1); 4957 atomic_inc(&s->sc_count);
4933 if (ret)
4934 break;
4935 stp = openlockstateid(s);
4936 ret = nfserr_locks_held;
4937 if (check_for_locks(stp->st_stid.sc_file,
4938 lockowner(stp->st_stateowner)))
4939 break;
4940 WARN_ON(!unhash_lock_stateid(stp));
4941 spin_unlock(&cl->cl_lock); 4958 spin_unlock(&cl->cl_lock);
4942 nfs4_put_stid(s); 4959 ret = nfsd4_free_lock_stateid(stateid, s);
4943 ret = nfs_ok;
4944 goto out; 4960 goto out;
4945 case NFS4_REVOKED_DELEG_STID: 4961 case NFS4_REVOKED_DELEG_STID:
4946 dp = delegstateid(s); 4962 dp = delegstateid(s);
@@ -5507,7 +5523,7 @@ static __be32
5507lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, 5523lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
5508 struct nfs4_ol_stateid *ost, 5524 struct nfs4_ol_stateid *ost,
5509 struct nfsd4_lock *lock, 5525 struct nfsd4_lock *lock,
5510 struct nfs4_ol_stateid **lst, bool *new) 5526 struct nfs4_ol_stateid **plst, bool *new)
5511{ 5527{
5512 __be32 status; 5528 __be32 status;
5513 struct nfs4_file *fi = ost->st_stid.sc_file; 5529 struct nfs4_file *fi = ost->st_stid.sc_file;
@@ -5515,7 +5531,9 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
5515 struct nfs4_client *cl = oo->oo_owner.so_client; 5531 struct nfs4_client *cl = oo->oo_owner.so_client;
5516 struct inode *inode = d_inode(cstate->current_fh.fh_dentry); 5532 struct inode *inode = d_inode(cstate->current_fh.fh_dentry);
5517 struct nfs4_lockowner *lo; 5533 struct nfs4_lockowner *lo;
5534 struct nfs4_ol_stateid *lst;
5518 unsigned int strhashval; 5535 unsigned int strhashval;
5536 bool hashed;
5519 5537
5520 lo = find_lockowner_str(cl, &lock->lk_new_owner); 5538 lo = find_lockowner_str(cl, &lock->lk_new_owner);
5521 if (!lo) { 5539 if (!lo) {
@@ -5531,12 +5549,27 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
5531 goto out; 5549 goto out;
5532 } 5550 }
5533 5551
5534 *lst = find_or_create_lock_stateid(lo, fi, inode, ost, new); 5552retry:
5535 if (*lst == NULL) { 5553 lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);
5554 if (lst == NULL) {
5536 status = nfserr_jukebox; 5555 status = nfserr_jukebox;
5537 goto out; 5556 goto out;
5538 } 5557 }
5558
5559 mutex_lock(&lst->st_mutex);
5560
5561 /* See if it's still hashed to avoid race with FREE_STATEID */
5562 spin_lock(&cl->cl_lock);
5563 hashed = !list_empty(&lst->st_perfile);
5564 spin_unlock(&cl->cl_lock);
5565
5566 if (!hashed) {
5567 mutex_unlock(&lst->st_mutex);
5568 nfs4_put_stid(&lst->st_stid);
5569 goto retry;
5570 }
5539 status = nfs_ok; 5571 status = nfs_ok;
5572 *plst = lst;
5540out: 5573out:
5541 nfs4_put_stateowner(&lo->lo_owner); 5574 nfs4_put_stateowner(&lo->lo_owner);
5542 return status; 5575 return status;
@@ -5603,8 +5636,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5603 goto out; 5636 goto out;
5604 status = lookup_or_create_lock_state(cstate, open_stp, lock, 5637 status = lookup_or_create_lock_state(cstate, open_stp, lock,
5605 &lock_stp, &new); 5638 &lock_stp, &new);
5606 if (status == nfs_ok)
5607 mutex_lock(&lock_stp->st_mutex);
5608 } else { 5639 } else {
5609 status = nfs4_preprocess_seqid_op(cstate, 5640 status = nfs4_preprocess_seqid_op(cstate,
5610 lock->lk_old_lock_seqid, 5641 lock->lk_old_lock_seqid,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ba944123167b..ff476e654b8f 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1252,10 +1252,13 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1252 if (IS_ERR(dchild)) 1252 if (IS_ERR(dchild))
1253 return nfserrno(host_err); 1253 return nfserrno(host_err);
1254 err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); 1254 err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
1255 if (err) { 1255 /*
1256 dput(dchild); 1256 * We unconditionally drop our ref to dchild as fh_compose will have
1257 * already grabbed its own ref for it.
1258 */
1259 dput(dchild);
1260 if (err)
1257 return err; 1261 return err;
1258 }
1259 return nfsd_create_locked(rqstp, fhp, fname, flen, iap, type, 1262 return nfsd_create_locked(rqstp, fhp, fname, flen, iap, type,
1260 rdev, resfhp); 1263 rdev, resfhp);
1261} 1264}
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 09e18fdf61e5..b9a8c813e5e6 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -46,7 +46,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
46 cached = 0; 46 cached = 0;
47 47
48 for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++) 48 for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
49 pages[lru] = global_page_state(NR_LRU_BASE + lru); 49 pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
50 50
51 available = si_mem_available(); 51 available = si_mem_available();
52 52
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 19f532e7d35e..6dc4296eed62 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -223,8 +223,10 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
223 size -= n; 223 size -= n;
224 buf += n; 224 buf += n;
225 copied += n; 225 copied += n;
226 if (!m->count) 226 if (!m->count) {
227 m->from = 0;
227 m->index++; 228 m->index++;
229 }
228 if (!size) 230 if (!size)
229 goto Done; 231 goto Done;
230 } 232 }
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index b45345d701e7..51157da3f76e 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -370,7 +370,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
370 370
371 p = c->gap_lebs; 371 p = c->gap_lebs;
372 do { 372 do {
373 ubifs_assert(p < c->gap_lebs + sizeof(int) * c->lst.idx_lebs); 373 ubifs_assert(p < c->gap_lebs + c->lst.idx_lebs);
374 written = layout_leb_in_gaps(c, p); 374 written = layout_leb_in_gaps(c, p);
375 if (written < 0) { 375 if (written < 0) {
376 err = written; 376 err = written;
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index e237811f09ce..11a004114eba 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -575,7 +575,8 @@ static int ubifs_xattr_get(const struct xattr_handler *handler,
575 dbg_gen("xattr '%s', ino %lu ('%pd'), buf size %zd", name, 575 dbg_gen("xattr '%s', ino %lu ('%pd'), buf size %zd", name,
576 inode->i_ino, dentry, size); 576 inode->i_ino, dentry, size);
577 577
578 return __ubifs_getxattr(inode, name, buffer, size); 578 name = xattr_full_name(handler, name);
579 return __ubifs_getxattr(inode, name, buffer, size);
579} 580}
580 581
581static int ubifs_xattr_set(const struct xattr_handler *handler, 582static int ubifs_xattr_set(const struct xattr_handler *handler,
@@ -586,6 +587,8 @@ static int ubifs_xattr_set(const struct xattr_handler *handler,
586 dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd", 587 dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd",
587 name, inode->i_ino, dentry, size); 588 name, inode->i_ino, dentry, size);
588 589
590 name = xattr_full_name(handler, name);
591
589 if (value) 592 if (value)
590 return __ubifs_setxattr(inode, name, value, size, flags); 593 return __ubifs_setxattr(inode, name, value, size, flags);
591 else 594 else
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 776ae2f325d1..3dd8f1d54498 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -1582,6 +1582,7 @@ xfs_alloc_ag_vextent_small(
1582 xfs_extlen_t *flenp, /* result length */ 1582 xfs_extlen_t *flenp, /* result length */
1583 int *stat) /* status: 0-freelist, 1-normal/none */ 1583 int *stat) /* status: 0-freelist, 1-normal/none */
1584{ 1584{
1585 struct xfs_owner_info oinfo;
1585 int error; 1586 int error;
1586 xfs_agblock_t fbno; 1587 xfs_agblock_t fbno;
1587 xfs_extlen_t flen; 1588 xfs_extlen_t flen;
@@ -1624,6 +1625,18 @@ xfs_alloc_ag_vextent_small(
1624 error0); 1625 error0);
1625 args->wasfromfl = 1; 1626 args->wasfromfl = 1;
1626 trace_xfs_alloc_small_freelist(args); 1627 trace_xfs_alloc_small_freelist(args);
1628
1629 /*
1630 * If we're feeding an AGFL block to something that
1631 * doesn't live in the free space, we need to clear
1632 * out the OWN_AG rmap.
1633 */
1634 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
1635 error = xfs_rmap_free(args->tp, args->agbp, args->agno,
1636 fbno, 1, &oinfo);
1637 if (error)
1638 goto error0;
1639
1627 *stat = 0; 1640 *stat = 0;
1628 return 0; 1641 return 0;
1629 } 1642 }
@@ -2264,6 +2277,7 @@ xfs_alloc_log_agf(
2264 offsetof(xfs_agf_t, agf_longest), 2277 offsetof(xfs_agf_t, agf_longest),
2265 offsetof(xfs_agf_t, agf_btreeblks), 2278 offsetof(xfs_agf_t, agf_btreeblks),
2266 offsetof(xfs_agf_t, agf_uuid), 2279 offsetof(xfs_agf_t, agf_uuid),
2280 offsetof(xfs_agf_t, agf_rmap_blocks),
2267 sizeof(xfs_agf_t) 2281 sizeof(xfs_agf_t)
2268 }; 2282 };
2269 2283
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index f814d42c73b2..e6a8bea0f7ba 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -640,12 +640,15 @@ typedef struct xfs_agf {
640 __be32 agf_btreeblks; /* # of blocks held in AGF btrees */ 640 __be32 agf_btreeblks; /* # of blocks held in AGF btrees */
641 uuid_t agf_uuid; /* uuid of filesystem */ 641 uuid_t agf_uuid; /* uuid of filesystem */
642 642
643 __be32 agf_rmap_blocks; /* rmapbt blocks used */
644 __be32 agf_padding; /* padding */
645
643 /* 646 /*
644 * reserve some contiguous space for future logged fields before we add 647 * reserve some contiguous space for future logged fields before we add
645 * the unlogged fields. This makes the range logging via flags and 648 * the unlogged fields. This makes the range logging via flags and
646 * structure offsets much simpler. 649 * structure offsets much simpler.
647 */ 650 */
648 __be64 agf_spare64[16]; 651 __be64 agf_spare64[15];
649 652
650 /* unlogged fields, written during buffer writeback. */ 653 /* unlogged fields, written during buffer writeback. */
651 __be64 agf_lsn; /* last write sequence */ 654 __be64 agf_lsn; /* last write sequence */
@@ -670,7 +673,8 @@ typedef struct xfs_agf {
670#define XFS_AGF_LONGEST 0x00000400 673#define XFS_AGF_LONGEST 0x00000400
671#define XFS_AGF_BTREEBLKS 0x00000800 674#define XFS_AGF_BTREEBLKS 0x00000800
672#define XFS_AGF_UUID 0x00001000 675#define XFS_AGF_UUID 0x00001000
673#define XFS_AGF_NUM_BITS 13 676#define XFS_AGF_RMAP_BLOCKS 0x00002000
677#define XFS_AGF_NUM_BITS 14
674#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) 678#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1)
675 679
676#define XFS_AGF_FLAGS \ 680#define XFS_AGF_FLAGS \
@@ -686,7 +690,8 @@ typedef struct xfs_agf {
686 { XFS_AGF_FREEBLKS, "FREEBLKS" }, \ 690 { XFS_AGF_FREEBLKS, "FREEBLKS" }, \
687 { XFS_AGF_LONGEST, "LONGEST" }, \ 691 { XFS_AGF_LONGEST, "LONGEST" }, \
688 { XFS_AGF_BTREEBLKS, "BTREEBLKS" }, \ 692 { XFS_AGF_BTREEBLKS, "BTREEBLKS" }, \
689 { XFS_AGF_UUID, "UUID" } 693 { XFS_AGF_UUID, "UUID" }, \
694 { XFS_AGF_RMAP_BLOCKS, "RMAP_BLOCKS" }
690 695
691/* disk block (xfs_daddr_t) in the AG */ 696/* disk block (xfs_daddr_t) in the AG */
692#define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) 697#define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log))
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index bc1faebc84ec..17b8eeb34ac8 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -98,6 +98,8 @@ xfs_rmapbt_alloc_block(
98 union xfs_btree_ptr *new, 98 union xfs_btree_ptr *new,
99 int *stat) 99 int *stat)
100{ 100{
101 struct xfs_buf *agbp = cur->bc_private.a.agbp;
102 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
101 int error; 103 int error;
102 xfs_agblock_t bno; 104 xfs_agblock_t bno;
103 105
@@ -124,6 +126,8 @@ xfs_rmapbt_alloc_block(
124 126
125 xfs_trans_agbtree_delta(cur->bc_tp, 1); 127 xfs_trans_agbtree_delta(cur->bc_tp, 1);
126 new->s = cpu_to_be32(bno); 128 new->s = cpu_to_be32(bno);
129 be32_add_cpu(&agf->agf_rmap_blocks, 1);
130 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
127 131
128 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 132 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
129 *stat = 1; 133 *stat = 1;
@@ -143,6 +147,8 @@ xfs_rmapbt_free_block(
143 bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp)); 147 bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
144 trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_private.a.agno, 148 trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_private.a.agno,
145 bno, 1); 149 bno, 1);
150 be32_add_cpu(&agf->agf_rmap_blocks, -1);
151 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
146 error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1); 152 error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
147 if (error) 153 if (error)
148 return error; 154 return error;
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 47a318ce82e0..607cc29bba21 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -115,7 +115,6 @@ xfs_buf_ioacct_dec(
115 if (!(bp->b_flags & _XBF_IN_FLIGHT)) 115 if (!(bp->b_flags & _XBF_IN_FLIGHT))
116 return; 116 return;
117 117
118 ASSERT(bp->b_flags & XBF_ASYNC);
119 bp->b_flags &= ~_XBF_IN_FLIGHT; 118 bp->b_flags &= ~_XBF_IN_FLIGHT;
120 percpu_counter_dec(&bp->b_target->bt_io_count); 119 percpu_counter_dec(&bp->b_target->bt_io_count);
121} 120}
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ed95e5bb04e6..e612a0233710 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -741,9 +741,20 @@ xfs_file_dax_write(
741 * page is inserted into the pagecache when we have to serve a write 741 * page is inserted into the pagecache when we have to serve a write
742 * fault on a hole. It should never be dirtied and can simply be 742 * fault on a hole. It should never be dirtied and can simply be
743 * dropped from the pagecache once we get real data for the page. 743 * dropped from the pagecache once we get real data for the page.
744 *
745 * XXX: This is racy against mmap, and there's nothing we can do about
746 * it. dax_do_io() should really do this invalidation internally as
747 * it will know if we've allocated over a holei for this specific IO and
748 * if so it needs to update the mapping tree and invalidate existing
749 * PTEs over the newly allocated range. Remove this invalidation when
750 * dax_do_io() is fixed up.
744 */ 751 */
745 if (mapping->nrpages) { 752 if (mapping->nrpages) {
746 ret = invalidate_inode_pages2(mapping); 753 loff_t end = iocb->ki_pos + iov_iter_count(from) - 1;
754
755 ret = invalidate_inode_pages2_range(mapping,
756 iocb->ki_pos >> PAGE_SHIFT,
757 end >> PAGE_SHIFT);
747 WARN_ON_ONCE(ret); 758 WARN_ON_ONCE(ret);
748 } 759 }
749 760
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 0f96847b90e1..0b7f986745c1 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -248,6 +248,7 @@ xfs_growfs_data_private(
248 agf->agf_roots[XFS_BTNUM_RMAPi] = 248 agf->agf_roots[XFS_BTNUM_RMAPi] =
249 cpu_to_be32(XFS_RMAP_BLOCK(mp)); 249 cpu_to_be32(XFS_RMAP_BLOCK(mp));
250 agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1); 250 agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
251 agf->agf_rmap_blocks = cpu_to_be32(1);
251 } 252 }
252 253
253 agf->agf_flfirst = cpu_to_be32(1); 254 agf->agf_flfirst = cpu_to_be32(1);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 2114d53df433..2af0dda1c978 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -715,12 +715,16 @@ xfs_iomap_write_allocate(
715 * is in the delayed allocation extent on which we sit 715 * is in the delayed allocation extent on which we sit
716 * but before our buffer starts. 716 * but before our buffer starts.
717 */ 717 */
718
719 nimaps = 0; 718 nimaps = 0;
720 while (nimaps == 0) { 719 while (nimaps == 0) {
721 nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); 720 nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
722 721 /*
723 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, nres, 722 * We have already reserved space for the extent and any
723 * indirect blocks when creating the delalloc extent,
724 * there is no need to reserve space in this transaction
725 * again.
726 */
727 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0,
724 0, XFS_TRANS_RESERVE, &tp); 728 0, XFS_TRANS_RESERVE, &tp);
725 if (error) 729 if (error)
726 return error; 730 return error;
@@ -1037,20 +1041,14 @@ xfs_file_iomap_begin(
1037 return error; 1041 return error;
1038 1042
1039 trace_xfs_iomap_alloc(ip, offset, length, 0, &imap); 1043 trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
1040 xfs_bmbt_to_iomap(ip, iomap, &imap);
1041 } else if (nimaps) {
1042 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1043 trace_xfs_iomap_found(ip, offset, length, 0, &imap);
1044 xfs_bmbt_to_iomap(ip, iomap, &imap);
1045 } else { 1044 } else {
1045 ASSERT(nimaps);
1046
1046 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1047 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1047 trace_xfs_iomap_not_found(ip, offset, length, 0, &imap); 1048 trace_xfs_iomap_found(ip, offset, length, 0, &imap);
1048 iomap->blkno = IOMAP_NULL_BLOCK;
1049 iomap->type = IOMAP_HOLE;
1050 iomap->offset = offset;
1051 iomap->length = length;
1052 } 1049 }
1053 1050
1051 xfs_bmbt_to_iomap(ip, iomap, &imap);
1054 return 0; 1052 return 0;
1055} 1053}
1056 1054
@@ -1112,3 +1110,48 @@ struct iomap_ops xfs_iomap_ops = {
1112 .iomap_begin = xfs_file_iomap_begin, 1110 .iomap_begin = xfs_file_iomap_begin,
1113 .iomap_end = xfs_file_iomap_end, 1111 .iomap_end = xfs_file_iomap_end,
1114}; 1112};
1113
1114static int
1115xfs_xattr_iomap_begin(
1116 struct inode *inode,
1117 loff_t offset,
1118 loff_t length,
1119 unsigned flags,
1120 struct iomap *iomap)
1121{
1122 struct xfs_inode *ip = XFS_I(inode);
1123 struct xfs_mount *mp = ip->i_mount;
1124 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
1125 xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length);
1126 struct xfs_bmbt_irec imap;
1127 int nimaps = 1, error = 0;
1128 unsigned lockmode;
1129
1130 if (XFS_FORCED_SHUTDOWN(mp))
1131 return -EIO;
1132
1133 lockmode = xfs_ilock_data_map_shared(ip);
1134
1135 /* if there are no attribute fork or extents, return ENOENT */
1136 if (XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) {
1137 error = -ENOENT;
1138 goto out_unlock;
1139 }
1140
1141 ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL);
1142 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
1143 &nimaps, XFS_BMAPI_ENTIRE | XFS_BMAPI_ATTRFORK);
1144out_unlock:
1145 xfs_iunlock(ip, lockmode);
1146
1147 if (!error) {
1148 ASSERT(nimaps);
1149 xfs_bmbt_to_iomap(ip, iomap, &imap);
1150 }
1151
1152 return error;
1153}
1154
1155struct iomap_ops xfs_xattr_iomap_ops = {
1156 .iomap_begin = xfs_xattr_iomap_begin,
1157};
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index e066d045e2ff..fb8aca3d69ab 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -35,5 +35,6 @@ void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
35 struct xfs_bmbt_irec *); 35 struct xfs_bmbt_irec *);
36 36
37extern struct iomap_ops xfs_iomap_ops; 37extern struct iomap_ops xfs_iomap_ops;
38extern struct iomap_ops xfs_xattr_iomap_ops;
38 39
39#endif /* __XFS_IOMAP_H__*/ 40#endif /* __XFS_IOMAP_H__*/
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index ab820f84ed50..b24c3102fa93 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1009,7 +1009,14 @@ xfs_vn_fiemap(
1009 int error; 1009 int error;
1010 1010
1011 xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED); 1011 xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED);
1012 error = iomap_fiemap(inode, fieinfo, start, length, &xfs_iomap_ops); 1012 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
1013 fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR;
1014 error = iomap_fiemap(inode, fieinfo, start, length,
1015 &xfs_xattr_iomap_ops);
1016 } else {
1017 error = iomap_fiemap(inode, fieinfo, start, length,
1018 &xfs_iomap_ops);
1019 }
1013 xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED); 1020 xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED);
1014 1021
1015 return error; 1022 return error;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 551b7e26980c..7e88bec3f359 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1298,7 +1298,6 @@ DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
1298DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct); 1298DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct);
1299DEFINE_IOMAP_EVENT(xfs_iomap_alloc); 1299DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
1300DEFINE_IOMAP_EVENT(xfs_iomap_found); 1300DEFINE_IOMAP_EVENT(xfs_iomap_found);
1301DEFINE_IOMAP_EVENT(xfs_iomap_not_found);
1302 1301
1303DECLARE_EVENT_CLASS(xfs_simple_io_class, 1302DECLARE_EVENT_CLASS(xfs_simple_io_class,
1304 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), 1303 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),