diff options
Diffstat (limited to 'fs')
119 files changed, 2206 insertions, 1043 deletions
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 4b0eff6da674..85737e96ab8b 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c | |||
@@ -189,11 +189,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, | |||
189 | case 1: | 189 | case 1: |
190 | _debug("extract FID count"); | 190 | _debug("extract FID count"); |
191 | ret = afs_extract_data(call, skb, last, &call->tmp, 4); | 191 | ret = afs_extract_data(call, skb, last, &call->tmp, 4); |
192 | switch (ret) { | 192 | if (ret < 0) |
193 | case 0: break; | 193 | return ret; |
194 | case -EAGAIN: return 0; | ||
195 | default: return ret; | ||
196 | } | ||
197 | 194 | ||
198 | call->count = ntohl(call->tmp); | 195 | call->count = ntohl(call->tmp); |
199 | _debug("FID count: %u", call->count); | 196 | _debug("FID count: %u", call->count); |
@@ -210,11 +207,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, | |||
210 | _debug("extract FID array"); | 207 | _debug("extract FID array"); |
211 | ret = afs_extract_data(call, skb, last, call->buffer, | 208 | ret = afs_extract_data(call, skb, last, call->buffer, |
212 | call->count * 3 * 4); | 209 | call->count * 3 * 4); |
213 | switch (ret) { | 210 | if (ret < 0) |
214 | case 0: break; | 211 | return ret; |
215 | case -EAGAIN: return 0; | ||
216 | default: return ret; | ||
217 | } | ||
218 | 212 | ||
219 | _debug("unmarshall FID array"); | 213 | _debug("unmarshall FID array"); |
220 | call->request = kcalloc(call->count, | 214 | call->request = kcalloc(call->count, |
@@ -239,11 +233,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, | |||
239 | case 3: | 233 | case 3: |
240 | _debug("extract CB count"); | 234 | _debug("extract CB count"); |
241 | ret = afs_extract_data(call, skb, last, &call->tmp, 4); | 235 | ret = afs_extract_data(call, skb, last, &call->tmp, 4); |
242 | switch (ret) { | 236 | if (ret < 0) |
243 | case 0: break; | 237 | return ret; |
244 | case -EAGAIN: return 0; | ||
245 | default: return ret; | ||
246 | } | ||
247 | 238 | ||
248 | tmp = ntohl(call->tmp); | 239 | tmp = ntohl(call->tmp); |
249 | _debug("CB count: %u", tmp); | 240 | _debug("CB count: %u", tmp); |
@@ -258,11 +249,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, | |||
258 | _debug("extract CB array"); | 249 | _debug("extract CB array"); |
259 | ret = afs_extract_data(call, skb, last, call->request, | 250 | ret = afs_extract_data(call, skb, last, call->request, |
260 | call->count * 3 * 4); | 251 | call->count * 3 * 4); |
261 | switch (ret) { | 252 | if (ret < 0) |
262 | case 0: break; | 253 | return ret; |
263 | case -EAGAIN: return 0; | ||
264 | default: return ret; | ||
265 | } | ||
266 | 254 | ||
267 | _debug("unmarshall CB array"); | 255 | _debug("unmarshall CB array"); |
268 | cb = call->request; | 256 | cb = call->request; |
@@ -278,9 +266,9 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, | |||
278 | call->unmarshall++; | 266 | call->unmarshall++; |
279 | 267 | ||
280 | case 5: | 268 | case 5: |
281 | _debug("trailer"); | 269 | ret = afs_data_complete(call, skb, last); |
282 | if (skb->len != 0) | 270 | if (ret < 0) |
283 | return -EBADMSG; | 271 | return ret; |
284 | 272 | ||
285 | /* Record that the message was unmarshalled successfully so | 273 | /* Record that the message was unmarshalled successfully so |
286 | * that the call destructor can know do the callback breaking | 274 | * that the call destructor can know do the callback breaking |
@@ -294,8 +282,6 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, | |||
294 | break; | 282 | break; |
295 | } | 283 | } |
296 | 284 | ||
297 | if (!last) | ||
298 | return 0; | ||
299 | 285 | ||
300 | call->state = AFS_CALL_REPLYING; | 286 | call->state = AFS_CALL_REPLYING; |
301 | 287 | ||
@@ -335,13 +321,13 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call, | |||
335 | { | 321 | { |
336 | struct afs_server *server; | 322 | struct afs_server *server; |
337 | struct in_addr addr; | 323 | struct in_addr addr; |
324 | int ret; | ||
338 | 325 | ||
339 | _enter(",{%u},%d", skb->len, last); | 326 | _enter(",{%u},%d", skb->len, last); |
340 | 327 | ||
341 | if (skb->len > 0) | 328 | ret = afs_data_complete(call, skb, last); |
342 | return -EBADMSG; | 329 | if (ret < 0) |
343 | if (!last) | 330 | return ret; |
344 | return 0; | ||
345 | 331 | ||
346 | /* no unmarshalling required */ | 332 | /* no unmarshalling required */ |
347 | call->state = AFS_CALL_REPLYING; | 333 | call->state = AFS_CALL_REPLYING; |
@@ -371,8 +357,10 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call, | |||
371 | 357 | ||
372 | _enter(",{%u},%d", skb->len, last); | 358 | _enter(",{%u},%d", skb->len, last); |
373 | 359 | ||
360 | /* There are some arguments that we ignore */ | ||
361 | afs_data_consumed(call, skb); | ||
374 | if (!last) | 362 | if (!last) |
375 | return 0; | 363 | return -EAGAIN; |
376 | 364 | ||
377 | /* no unmarshalling required */ | 365 | /* no unmarshalling required */ |
378 | call->state = AFS_CALL_REPLYING; | 366 | call->state = AFS_CALL_REPLYING; |
@@ -408,12 +396,13 @@ static void SRXAFSCB_Probe(struct work_struct *work) | |||
408 | static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb, | 396 | static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb, |
409 | bool last) | 397 | bool last) |
410 | { | 398 | { |
399 | int ret; | ||
400 | |||
411 | _enter(",{%u},%d", skb->len, last); | 401 | _enter(",{%u},%d", skb->len, last); |
412 | 402 | ||
413 | if (skb->len > 0) | 403 | ret = afs_data_complete(call, skb, last); |
414 | return -EBADMSG; | 404 | if (ret < 0) |
415 | if (!last) | 405 | return ret; |
416 | return 0; | ||
417 | 406 | ||
418 | /* no unmarshalling required */ | 407 | /* no unmarshalling required */ |
419 | call->state = AFS_CALL_REPLYING; | 408 | call->state = AFS_CALL_REPLYING; |
@@ -460,10 +449,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, | |||
460 | 449 | ||
461 | _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); | 450 | _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); |
462 | 451 | ||
463 | if (skb->len > 0) | 452 | ret = afs_data_complete(call, skb, last); |
464 | return -EBADMSG; | 453 | if (ret < 0) |
465 | if (!last) | 454 | return ret; |
466 | return 0; | ||
467 | 455 | ||
468 | switch (call->unmarshall) { | 456 | switch (call->unmarshall) { |
469 | case 0: | 457 | case 0: |
@@ -509,8 +497,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, | |||
509 | break; | 497 | break; |
510 | } | 498 | } |
511 | 499 | ||
512 | if (!last) | 500 | ret = afs_data_complete(call, skb, last); |
513 | return 0; | 501 | if (ret < 0) |
502 | return ret; | ||
514 | 503 | ||
515 | call->state = AFS_CALL_REPLYING; | 504 | call->state = AFS_CALL_REPLYING; |
516 | 505 | ||
@@ -588,12 +577,13 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work) | |||
588 | static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call, | 577 | static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call, |
589 | struct sk_buff *skb, bool last) | 578 | struct sk_buff *skb, bool last) |
590 | { | 579 | { |
580 | int ret; | ||
581 | |||
591 | _enter(",{%u},%d", skb->len, last); | 582 | _enter(",{%u},%d", skb->len, last); |
592 | 583 | ||
593 | if (skb->len > 0) | 584 | ret = afs_data_complete(call, skb, last); |
594 | return -EBADMSG; | 585 | if (ret < 0) |
595 | if (!last) | 586 | return ret; |
596 | return 0; | ||
597 | 587 | ||
598 | /* no unmarshalling required */ | 588 | /* no unmarshalling required */ |
599 | call->state = AFS_CALL_REPLYING; | 589 | call->state = AFS_CALL_REPLYING; |
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index c2e930ec2888..9312b92e54be 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c | |||
@@ -240,15 +240,13 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call, | |||
240 | { | 240 | { |
241 | struct afs_vnode *vnode = call->reply; | 241 | struct afs_vnode *vnode = call->reply; |
242 | const __be32 *bp; | 242 | const __be32 *bp; |
243 | int ret; | ||
243 | 244 | ||
244 | _enter(",,%u", last); | 245 | _enter(",,%u", last); |
245 | 246 | ||
246 | afs_transfer_reply(call, skb); | 247 | ret = afs_transfer_reply(call, skb, last); |
247 | if (!last) | 248 | if (ret < 0) |
248 | return 0; | 249 | return ret; |
249 | |||
250 | if (call->reply_size != call->reply_max) | ||
251 | return -EBADMSG; | ||
252 | 250 | ||
253 | /* unmarshall the reply once we've received all of it */ | 251 | /* unmarshall the reply once we've received all of it */ |
254 | bp = call->buffer; | 252 | bp = call->buffer; |
@@ -335,11 +333,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, | |||
335 | case 1: | 333 | case 1: |
336 | _debug("extract data length (MSW)"); | 334 | _debug("extract data length (MSW)"); |
337 | ret = afs_extract_data(call, skb, last, &call->tmp, 4); | 335 | ret = afs_extract_data(call, skb, last, &call->tmp, 4); |
338 | switch (ret) { | 336 | if (ret < 0) |
339 | case 0: break; | 337 | return ret; |
340 | case -EAGAIN: return 0; | ||
341 | default: return ret; | ||
342 | } | ||
343 | 338 | ||
344 | call->count = ntohl(call->tmp); | 339 | call->count = ntohl(call->tmp); |
345 | _debug("DATA length MSW: %u", call->count); | 340 | _debug("DATA length MSW: %u", call->count); |
@@ -353,11 +348,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, | |||
353 | case 2: | 348 | case 2: |
354 | _debug("extract data length"); | 349 | _debug("extract data length"); |
355 | ret = afs_extract_data(call, skb, last, &call->tmp, 4); | 350 | ret = afs_extract_data(call, skb, last, &call->tmp, 4); |
356 | switch (ret) { | 351 | if (ret < 0) |
357 | case 0: break; | 352 | return ret; |
358 | case -EAGAIN: return 0; | ||
359 | default: return ret; | ||
360 | } | ||
361 | 353 | ||
362 | call->count = ntohl(call->tmp); | 354 | call->count = ntohl(call->tmp); |
363 | _debug("DATA length: %u", call->count); | 355 | _debug("DATA length: %u", call->count); |
@@ -375,11 +367,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, | |||
375 | ret = afs_extract_data(call, skb, last, buffer, | 367 | ret = afs_extract_data(call, skb, last, buffer, |
376 | call->count); | 368 | call->count); |
377 | kunmap_atomic(buffer); | 369 | kunmap_atomic(buffer); |
378 | switch (ret) { | 370 | if (ret < 0) |
379 | case 0: break; | 371 | return ret; |
380 | case -EAGAIN: return 0; | ||
381 | default: return ret; | ||
382 | } | ||
383 | } | 372 | } |
384 | 373 | ||
385 | call->offset = 0; | 374 | call->offset = 0; |
@@ -389,11 +378,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, | |||
389 | case 4: | 378 | case 4: |
390 | ret = afs_extract_data(call, skb, last, call->buffer, | 379 | ret = afs_extract_data(call, skb, last, call->buffer, |
391 | (21 + 3 + 6) * 4); | 380 | (21 + 3 + 6) * 4); |
392 | switch (ret) { | 381 | if (ret < 0) |
393 | case 0: break; | 382 | return ret; |
394 | case -EAGAIN: return 0; | ||
395 | default: return ret; | ||
396 | } | ||
397 | 383 | ||
398 | bp = call->buffer; | 384 | bp = call->buffer; |
399 | xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); | 385 | xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); |
@@ -405,15 +391,12 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, | |||
405 | call->unmarshall++; | 391 | call->unmarshall++; |
406 | 392 | ||
407 | case 5: | 393 | case 5: |
408 | _debug("trailer"); | 394 | ret = afs_data_complete(call, skb, last); |
409 | if (skb->len != 0) | 395 | if (ret < 0) |
410 | return -EBADMSG; | 396 | return ret; |
411 | break; | 397 | break; |
412 | } | 398 | } |
413 | 399 | ||
414 | if (!last) | ||
415 | return 0; | ||
416 | |||
417 | if (call->count < PAGE_SIZE) { | 400 | if (call->count < PAGE_SIZE) { |
418 | _debug("clear"); | 401 | _debug("clear"); |
419 | page = call->reply3; | 402 | page = call->reply3; |
@@ -537,9 +520,8 @@ static int afs_deliver_fs_give_up_callbacks(struct afs_call *call, | |||
537 | { | 520 | { |
538 | _enter(",{%u},%d", skb->len, last); | 521 | _enter(",{%u},%d", skb->len, last); |
539 | 522 | ||
540 | if (skb->len > 0) | 523 | /* shouldn't be any reply data */ |
541 | return -EBADMSG; /* shouldn't be any reply data */ | 524 | return afs_data_complete(call, skb, last); |
542 | return 0; | ||
543 | } | 525 | } |
544 | 526 | ||
545 | /* | 527 | /* |
@@ -622,15 +604,13 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call, | |||
622 | { | 604 | { |
623 | struct afs_vnode *vnode = call->reply; | 605 | struct afs_vnode *vnode = call->reply; |
624 | const __be32 *bp; | 606 | const __be32 *bp; |
607 | int ret; | ||
625 | 608 | ||
626 | _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); | 609 | _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); |
627 | 610 | ||
628 | afs_transfer_reply(call, skb); | 611 | ret = afs_transfer_reply(call, skb, last); |
629 | if (!last) | 612 | if (ret < 0) |
630 | return 0; | 613 | return ret; |
631 | |||
632 | if (call->reply_size != call->reply_max) | ||
633 | return -EBADMSG; | ||
634 | 614 | ||
635 | /* unmarshall the reply once we've received all of it */ | 615 | /* unmarshall the reply once we've received all of it */ |
636 | bp = call->buffer; | 616 | bp = call->buffer; |
@@ -721,15 +701,13 @@ static int afs_deliver_fs_remove(struct afs_call *call, | |||
721 | { | 701 | { |
722 | struct afs_vnode *vnode = call->reply; | 702 | struct afs_vnode *vnode = call->reply; |
723 | const __be32 *bp; | 703 | const __be32 *bp; |
704 | int ret; | ||
724 | 705 | ||
725 | _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); | 706 | _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); |
726 | 707 | ||
727 | afs_transfer_reply(call, skb); | 708 | ret = afs_transfer_reply(call, skb, last); |
728 | if (!last) | 709 | if (ret < 0) |
729 | return 0; | 710 | return ret; |
730 | |||
731 | if (call->reply_size != call->reply_max) | ||
732 | return -EBADMSG; | ||
733 | 711 | ||
734 | /* unmarshall the reply once we've received all of it */ | 712 | /* unmarshall the reply once we've received all of it */ |
735 | bp = call->buffer; | 713 | bp = call->buffer; |
@@ -804,15 +782,13 @@ static int afs_deliver_fs_link(struct afs_call *call, | |||
804 | { | 782 | { |
805 | struct afs_vnode *dvnode = call->reply, *vnode = call->reply2; | 783 | struct afs_vnode *dvnode = call->reply, *vnode = call->reply2; |
806 | const __be32 *bp; | 784 | const __be32 *bp; |
785 | int ret; | ||
807 | 786 | ||
808 | _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); | 787 | _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); |
809 | 788 | ||
810 | afs_transfer_reply(call, skb); | 789 | ret = afs_transfer_reply(call, skb, last); |
811 | if (!last) | 790 | if (ret < 0) |
812 | return 0; | 791 | return ret; |
813 | |||
814 | if (call->reply_size != call->reply_max) | ||
815 | return -EBADMSG; | ||
816 | 792 | ||
817 | /* unmarshall the reply once we've received all of it */ | 793 | /* unmarshall the reply once we've received all of it */ |
818 | bp = call->buffer; | 794 | bp = call->buffer; |
@@ -892,15 +868,13 @@ static int afs_deliver_fs_symlink(struct afs_call *call, | |||
892 | { | 868 | { |
893 | struct afs_vnode *vnode = call->reply; | 869 | struct afs_vnode *vnode = call->reply; |
894 | const __be32 *bp; | 870 | const __be32 *bp; |
871 | int ret; | ||
895 | 872 | ||
896 | _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); | 873 | _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); |
897 | 874 | ||
898 | afs_transfer_reply(call, skb); | 875 | ret = afs_transfer_reply(call, skb, last); |
899 | if (!last) | 876 | if (ret < 0) |
900 | return 0; | 877 | return ret; |
901 | |||
902 | if (call->reply_size != call->reply_max) | ||
903 | return -EBADMSG; | ||
904 | 878 | ||
905 | /* unmarshall the reply once we've received all of it */ | 879 | /* unmarshall the reply once we've received all of it */ |
906 | bp = call->buffer; | 880 | bp = call->buffer; |
@@ -999,15 +973,13 @@ static int afs_deliver_fs_rename(struct afs_call *call, | |||
999 | { | 973 | { |
1000 | struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2; | 974 | struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2; |
1001 | const __be32 *bp; | 975 | const __be32 *bp; |
976 | int ret; | ||
1002 | 977 | ||
1003 | _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); | 978 | _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); |
1004 | 979 | ||
1005 | afs_transfer_reply(call, skb); | 980 | ret = afs_transfer_reply(call, skb, last); |
1006 | if (!last) | 981 | if (ret < 0) |
1007 | return 0; | 982 | return ret; |
1008 | |||
1009 | if (call->reply_size != call->reply_max) | ||
1010 | return -EBADMSG; | ||
1011 | 983 | ||
1012 | /* unmarshall the reply once we've received all of it */ | 984 | /* unmarshall the reply once we've received all of it */ |
1013 | bp = call->buffer; | 985 | bp = call->buffer; |
@@ -1105,20 +1077,13 @@ static int afs_deliver_fs_store_data(struct afs_call *call, | |||
1105 | { | 1077 | { |
1106 | struct afs_vnode *vnode = call->reply; | 1078 | struct afs_vnode *vnode = call->reply; |
1107 | const __be32 *bp; | 1079 | const __be32 *bp; |
1080 | int ret; | ||
1108 | 1081 | ||
1109 | _enter(",,%u", last); | 1082 | _enter(",,%u", last); |
1110 | 1083 | ||
1111 | afs_transfer_reply(call, skb); | 1084 | ret = afs_transfer_reply(call, skb, last); |
1112 | if (!last) { | 1085 | if (ret < 0) |
1113 | _leave(" = 0 [more]"); | 1086 | return ret; |
1114 | return 0; | ||
1115 | } | ||
1116 | |||
1117 | if (call->reply_size != call->reply_max) { | ||
1118 | _leave(" = -EBADMSG [%u != %u]", | ||
1119 | call->reply_size, call->reply_max); | ||
1120 | return -EBADMSG; | ||
1121 | } | ||
1122 | 1087 | ||
1123 | /* unmarshall the reply once we've received all of it */ | 1088 | /* unmarshall the reply once we've received all of it */ |
1124 | bp = call->buffer; | 1089 | bp = call->buffer; |
@@ -1292,20 +1257,13 @@ static int afs_deliver_fs_store_status(struct afs_call *call, | |||
1292 | afs_dataversion_t *store_version; | 1257 | afs_dataversion_t *store_version; |
1293 | struct afs_vnode *vnode = call->reply; | 1258 | struct afs_vnode *vnode = call->reply; |
1294 | const __be32 *bp; | 1259 | const __be32 *bp; |
1260 | int ret; | ||
1295 | 1261 | ||
1296 | _enter(",,%u", last); | 1262 | _enter(",,%u", last); |
1297 | 1263 | ||
1298 | afs_transfer_reply(call, skb); | 1264 | ret = afs_transfer_reply(call, skb, last); |
1299 | if (!last) { | 1265 | if (ret < 0) |
1300 | _leave(" = 0 [more]"); | 1266 | return ret; |
1301 | return 0; | ||
1302 | } | ||
1303 | |||
1304 | if (call->reply_size != call->reply_max) { | ||
1305 | _leave(" = -EBADMSG [%u != %u]", | ||
1306 | call->reply_size, call->reply_max); | ||
1307 | return -EBADMSG; | ||
1308 | } | ||
1309 | 1267 | ||
1310 | /* unmarshall the reply once we've received all of it */ | 1268 | /* unmarshall the reply once we've received all of it */ |
1311 | store_version = NULL; | 1269 | store_version = NULL; |
@@ -1504,11 +1462,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, | |||
1504 | _debug("extract status"); | 1462 | _debug("extract status"); |
1505 | ret = afs_extract_data(call, skb, last, call->buffer, | 1463 | ret = afs_extract_data(call, skb, last, call->buffer, |
1506 | 12 * 4); | 1464 | 12 * 4); |
1507 | switch (ret) { | 1465 | if (ret < 0) |
1508 | case 0: break; | 1466 | return ret; |
1509 | case -EAGAIN: return 0; | ||
1510 | default: return ret; | ||
1511 | } | ||
1512 | 1467 | ||
1513 | bp = call->buffer; | 1468 | bp = call->buffer; |
1514 | xdr_decode_AFSFetchVolumeStatus(&bp, call->reply2); | 1469 | xdr_decode_AFSFetchVolumeStatus(&bp, call->reply2); |
@@ -1518,11 +1473,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, | |||
1518 | /* extract the volume name length */ | 1473 | /* extract the volume name length */ |
1519 | case 2: | 1474 | case 2: |
1520 | ret = afs_extract_data(call, skb, last, &call->tmp, 4); | 1475 | ret = afs_extract_data(call, skb, last, &call->tmp, 4); |
1521 | switch (ret) { | 1476 | if (ret < 0) |
1522 | case 0: break; | 1477 | return ret; |
1523 | case -EAGAIN: return 0; | ||
1524 | default: return ret; | ||
1525 | } | ||
1526 | 1478 | ||
1527 | call->count = ntohl(call->tmp); | 1479 | call->count = ntohl(call->tmp); |
1528 | _debug("volname length: %u", call->count); | 1480 | _debug("volname length: %u", call->count); |
@@ -1537,11 +1489,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, | |||
1537 | if (call->count > 0) { | 1489 | if (call->count > 0) { |
1538 | ret = afs_extract_data(call, skb, last, call->reply3, | 1490 | ret = afs_extract_data(call, skb, last, call->reply3, |
1539 | call->count); | 1491 | call->count); |
1540 | switch (ret) { | 1492 | if (ret < 0) |
1541 | case 0: break; | 1493 | return ret; |
1542 | case -EAGAIN: return 0; | ||
1543 | default: return ret; | ||
1544 | } | ||
1545 | } | 1494 | } |
1546 | 1495 | ||
1547 | p = call->reply3; | 1496 | p = call->reply3; |
@@ -1561,11 +1510,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, | |||
1561 | case 4: | 1510 | case 4: |
1562 | ret = afs_extract_data(call, skb, last, call->buffer, | 1511 | ret = afs_extract_data(call, skb, last, call->buffer, |
1563 | call->count); | 1512 | call->count); |
1564 | switch (ret) { | 1513 | if (ret < 0) |
1565 | case 0: break; | 1514 | return ret; |
1566 | case -EAGAIN: return 0; | ||
1567 | default: return ret; | ||
1568 | } | ||
1569 | 1515 | ||
1570 | call->offset = 0; | 1516 | call->offset = 0; |
1571 | call->unmarshall++; | 1517 | call->unmarshall++; |
@@ -1574,11 +1520,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, | |||
1574 | /* extract the offline message length */ | 1520 | /* extract the offline message length */ |
1575 | case 5: | 1521 | case 5: |
1576 | ret = afs_extract_data(call, skb, last, &call->tmp, 4); | 1522 | ret = afs_extract_data(call, skb, last, &call->tmp, 4); |
1577 | switch (ret) { | 1523 | if (ret < 0) |
1578 | case 0: break; | 1524 | return ret; |
1579 | case -EAGAIN: return 0; | ||
1580 | default: return ret; | ||
1581 | } | ||
1582 | 1525 | ||
1583 | call->count = ntohl(call->tmp); | 1526 | call->count = ntohl(call->tmp); |
1584 | _debug("offline msg length: %u", call->count); | 1527 | _debug("offline msg length: %u", call->count); |
@@ -1593,11 +1536,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, | |||
1593 | if (call->count > 0) { | 1536 | if (call->count > 0) { |
1594 | ret = afs_extract_data(call, skb, last, call->reply3, | 1537 | ret = afs_extract_data(call, skb, last, call->reply3, |
1595 | call->count); | 1538 | call->count); |
1596 | switch (ret) { | 1539 | if (ret < 0) |
1597 | case 0: break; | 1540 | return ret; |
1598 | case -EAGAIN: return 0; | ||
1599 | default: return ret; | ||
1600 | } | ||
1601 | } | 1541 | } |
1602 | 1542 | ||
1603 | p = call->reply3; | 1543 | p = call->reply3; |
@@ -1617,11 +1557,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, | |||
1617 | case 7: | 1557 | case 7: |
1618 | ret = afs_extract_data(call, skb, last, call->buffer, | 1558 | ret = afs_extract_data(call, skb, last, call->buffer, |
1619 | call->count); | 1559 | call->count); |
1620 | switch (ret) { | 1560 | if (ret < 0) |
1621 | case 0: break; | 1561 | return ret; |
1622 | case -EAGAIN: return 0; | ||
1623 | default: return ret; | ||
1624 | } | ||
1625 | 1562 | ||
1626 | call->offset = 0; | 1563 | call->offset = 0; |
1627 | call->unmarshall++; | 1564 | call->unmarshall++; |
@@ -1630,11 +1567,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, | |||
1630 | /* extract the message of the day length */ | 1567 | /* extract the message of the day length */ |
1631 | case 8: | 1568 | case 8: |
1632 | ret = afs_extract_data(call, skb, last, &call->tmp, 4); | 1569 | ret = afs_extract_data(call, skb, last, &call->tmp, 4); |
1633 | switch (ret) { | 1570 | if (ret < 0) |
1634 | case 0: break; | 1571 | return ret; |
1635 | case -EAGAIN: return 0; | ||
1636 | default: return ret; | ||
1637 | } | ||
1638 | 1572 | ||
1639 | call->count = ntohl(call->tmp); | 1573 | call->count = ntohl(call->tmp); |
1640 | _debug("motd length: %u", call->count); | 1574 | _debug("motd length: %u", call->count); |
@@ -1649,11 +1583,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, | |||
1649 | if (call->count > 0) { | 1583 | if (call->count > 0) { |
1650 | ret = afs_extract_data(call, skb, last, call->reply3, | 1584 | ret = afs_extract_data(call, skb, last, call->reply3, |
1651 | call->count); | 1585 | call->count); |
1652 | switch (ret) { | 1586 | if (ret < 0) |
1653 | case 0: break; | 1587 | return ret; |
1654 | case -EAGAIN: return 0; | ||
1655 | default: return ret; | ||
1656 | } | ||
1657 | } | 1588 | } |
1658 | 1589 | ||
1659 | p = call->reply3; | 1590 | p = call->reply3; |
@@ -1673,26 +1604,20 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, | |||
1673 | case 10: | 1604 | case 10: |
1674 | ret = afs_extract_data(call, skb, last, call->buffer, | 1605 | ret = afs_extract_data(call, skb, last, call->buffer, |
1675 | call->count); | 1606 | call->count); |
1676 | switch (ret) { | 1607 | if (ret < 0) |
1677 | case 0: break; | 1608 | return ret; |
1678 | case -EAGAIN: return 0; | ||
1679 | default: return ret; | ||
1680 | } | ||
1681 | 1609 | ||
1682 | call->offset = 0; | 1610 | call->offset = 0; |
1683 | call->unmarshall++; | 1611 | call->unmarshall++; |
1684 | no_motd_padding: | 1612 | no_motd_padding: |
1685 | 1613 | ||
1686 | case 11: | 1614 | case 11: |
1687 | _debug("trailer %d", skb->len); | 1615 | ret = afs_data_complete(call, skb, last); |
1688 | if (skb->len != 0) | 1616 | if (ret < 0) |
1689 | return -EBADMSG; | 1617 | return ret; |
1690 | break; | 1618 | break; |
1691 | } | 1619 | } |
1692 | 1620 | ||
1693 | if (!last) | ||
1694 | return 0; | ||
1695 | |||
1696 | _leave(" = 0 [done]"); | 1621 | _leave(" = 0 [done]"); |
1697 | return 0; | 1622 | return 0; |
1698 | } | 1623 | } |
@@ -1764,15 +1689,13 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call, | |||
1764 | struct sk_buff *skb, bool last) | 1689 | struct sk_buff *skb, bool last) |
1765 | { | 1690 | { |
1766 | const __be32 *bp; | 1691 | const __be32 *bp; |
1692 | int ret; | ||
1767 | 1693 | ||
1768 | _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); | 1694 | _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); |
1769 | 1695 | ||
1770 | afs_transfer_reply(call, skb); | 1696 | ret = afs_transfer_reply(call, skb, last); |
1771 | if (!last) | 1697 | if (ret < 0) |
1772 | return 0; | 1698 | return ret; |
1773 | |||
1774 | if (call->reply_size != call->reply_max) | ||
1775 | return -EBADMSG; | ||
1776 | 1699 | ||
1777 | /* unmarshall the reply once we've received all of it */ | 1700 | /* unmarshall the reply once we've received all of it */ |
1778 | bp = call->buffer; | 1701 | bp = call->buffer; |
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 71d5982312f3..df976b2a7f40 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
@@ -609,17 +609,29 @@ extern void afs_proc_cell_remove(struct afs_cell *); | |||
609 | */ | 609 | */ |
610 | extern int afs_open_socket(void); | 610 | extern int afs_open_socket(void); |
611 | extern void afs_close_socket(void); | 611 | extern void afs_close_socket(void); |
612 | extern void afs_data_consumed(struct afs_call *, struct sk_buff *); | ||
612 | extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t, | 613 | extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t, |
613 | const struct afs_wait_mode *); | 614 | const struct afs_wait_mode *); |
614 | extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *, | 615 | extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *, |
615 | size_t, size_t); | 616 | size_t, size_t); |
616 | extern void afs_flat_call_destructor(struct afs_call *); | 617 | extern void afs_flat_call_destructor(struct afs_call *); |
617 | extern void afs_transfer_reply(struct afs_call *, struct sk_buff *); | 618 | extern int afs_transfer_reply(struct afs_call *, struct sk_buff *, bool); |
618 | extern void afs_send_empty_reply(struct afs_call *); | 619 | extern void afs_send_empty_reply(struct afs_call *); |
619 | extern void afs_send_simple_reply(struct afs_call *, const void *, size_t); | 620 | extern void afs_send_simple_reply(struct afs_call *, const void *, size_t); |
620 | extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *, | 621 | extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *, |
621 | size_t); | 622 | size_t); |
622 | 623 | ||
624 | static inline int afs_data_complete(struct afs_call *call, struct sk_buff *skb, | ||
625 | bool last) | ||
626 | { | ||
627 | if (skb->len > 0) | ||
628 | return -EBADMSG; | ||
629 | afs_data_consumed(call, skb); | ||
630 | if (!last) | ||
631 | return -EAGAIN; | ||
632 | return 0; | ||
633 | } | ||
634 | |||
623 | /* | 635 | /* |
624 | * security.c | 636 | * security.c |
625 | */ | 637 | */ |
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 4832de84d52c..14d04c848465 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c | |||
@@ -150,10 +150,9 @@ void afs_close_socket(void) | |||
150 | } | 150 | } |
151 | 151 | ||
152 | /* | 152 | /* |
153 | * note that the data in a socket buffer is now delivered and that the buffer | 153 | * Note that the data in a socket buffer is now consumed. |
154 | * should be freed | ||
155 | */ | 154 | */ |
156 | static void afs_data_delivered(struct sk_buff *skb) | 155 | void afs_data_consumed(struct afs_call *call, struct sk_buff *skb) |
157 | { | 156 | { |
158 | if (!skb) { | 157 | if (!skb) { |
159 | _debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs)); | 158 | _debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs)); |
@@ -161,9 +160,7 @@ static void afs_data_delivered(struct sk_buff *skb) | |||
161 | } else { | 160 | } else { |
162 | _debug("DLVR %p{%u} [%d]", | 161 | _debug("DLVR %p{%u} [%d]", |
163 | skb, skb->mark, atomic_read(&afs_outstanding_skbs)); | 162 | skb, skb->mark, atomic_read(&afs_outstanding_skbs)); |
164 | if (atomic_dec_return(&afs_outstanding_skbs) == -1) | 163 | rxrpc_kernel_data_consumed(call->rxcall, skb); |
165 | BUG(); | ||
166 | rxrpc_kernel_data_delivered(skb); | ||
167 | } | 164 | } |
168 | } | 165 | } |
169 | 166 | ||
@@ -489,9 +486,15 @@ static void afs_deliver_to_call(struct afs_call *call) | |||
489 | last = rxrpc_kernel_is_data_last(skb); | 486 | last = rxrpc_kernel_is_data_last(skb); |
490 | ret = call->type->deliver(call, skb, last); | 487 | ret = call->type->deliver(call, skb, last); |
491 | switch (ret) { | 488 | switch (ret) { |
489 | case -EAGAIN: | ||
490 | if (last) { | ||
491 | _debug("short data"); | ||
492 | goto unmarshal_error; | ||
493 | } | ||
494 | break; | ||
492 | case 0: | 495 | case 0: |
493 | if (last && | 496 | ASSERT(last); |
494 | call->state == AFS_CALL_AWAIT_REPLY) | 497 | if (call->state == AFS_CALL_AWAIT_REPLY) |
495 | call->state = AFS_CALL_COMPLETE; | 498 | call->state = AFS_CALL_COMPLETE; |
496 | break; | 499 | break; |
497 | case -ENOTCONN: | 500 | case -ENOTCONN: |
@@ -501,6 +504,7 @@ static void afs_deliver_to_call(struct afs_call *call) | |||
501 | abort_code = RX_INVALID_OPERATION; | 504 | abort_code = RX_INVALID_OPERATION; |
502 | goto do_abort; | 505 | goto do_abort; |
503 | default: | 506 | default: |
507 | unmarshal_error: | ||
504 | abort_code = RXGEN_CC_UNMARSHAL; | 508 | abort_code = RXGEN_CC_UNMARSHAL; |
505 | if (call->state != AFS_CALL_AWAIT_REPLY) | 509 | if (call->state != AFS_CALL_AWAIT_REPLY) |
506 | abort_code = RXGEN_SS_UNMARSHAL; | 510 | abort_code = RXGEN_SS_UNMARSHAL; |
@@ -511,9 +515,7 @@ static void afs_deliver_to_call(struct afs_call *call) | |||
511 | call->state = AFS_CALL_ERROR; | 515 | call->state = AFS_CALL_ERROR; |
512 | break; | 516 | break; |
513 | } | 517 | } |
514 | afs_data_delivered(skb); | 518 | break; |
515 | skb = NULL; | ||
516 | continue; | ||
517 | case RXRPC_SKB_MARK_FINAL_ACK: | 519 | case RXRPC_SKB_MARK_FINAL_ACK: |
518 | _debug("Rcv ACK"); | 520 | _debug("Rcv ACK"); |
519 | call->state = AFS_CALL_COMPLETE; | 521 | call->state = AFS_CALL_COMPLETE; |
@@ -685,15 +687,35 @@ static void afs_process_async_call(struct afs_call *call) | |||
685 | } | 687 | } |
686 | 688 | ||
687 | /* | 689 | /* |
688 | * empty a socket buffer into a flat reply buffer | 690 | * Empty a socket buffer into a flat reply buffer. |
689 | */ | 691 | */ |
690 | void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb) | 692 | int afs_transfer_reply(struct afs_call *call, struct sk_buff *skb, bool last) |
691 | { | 693 | { |
692 | size_t len = skb->len; | 694 | size_t len = skb->len; |
693 | 695 | ||
694 | if (skb_copy_bits(skb, 0, call->buffer + call->reply_size, len) < 0) | 696 | if (len > call->reply_max - call->reply_size) { |
695 | BUG(); | 697 | _leave(" = -EBADMSG [%zu > %u]", |
696 | call->reply_size += len; | 698 | len, call->reply_max - call->reply_size); |
699 | return -EBADMSG; | ||
700 | } | ||
701 | |||
702 | if (len > 0) { | ||
703 | if (skb_copy_bits(skb, 0, call->buffer + call->reply_size, | ||
704 | len) < 0) | ||
705 | BUG(); | ||
706 | call->reply_size += len; | ||
707 | } | ||
708 | |||
709 | afs_data_consumed(call, skb); | ||
710 | if (!last) | ||
711 | return -EAGAIN; | ||
712 | |||
713 | if (call->reply_size != call->reply_max) { | ||
714 | _leave(" = -EBADMSG [%u != %u]", | ||
715 | call->reply_size, call->reply_max); | ||
716 | return -EBADMSG; | ||
717 | } | ||
718 | return 0; | ||
697 | } | 719 | } |
698 | 720 | ||
699 | /* | 721 | /* |
@@ -745,7 +767,8 @@ static void afs_collect_incoming_call(struct work_struct *work) | |||
745 | } | 767 | } |
746 | 768 | ||
747 | /* | 769 | /* |
748 | * grab the operation ID from an incoming cache manager call | 770 | * Grab the operation ID from an incoming cache manager call. The socket |
771 | * buffer is discarded on error or if we don't yet have sufficient data. | ||
749 | */ | 772 | */ |
750 | static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb, | 773 | static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb, |
751 | bool last) | 774 | bool last) |
@@ -766,12 +789,9 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb, | |||
766 | call->offset += len; | 789 | call->offset += len; |
767 | 790 | ||
768 | if (call->offset < 4) { | 791 | if (call->offset < 4) { |
769 | if (last) { | 792 | afs_data_consumed(call, skb); |
770 | _leave(" = -EBADMSG [op ID short]"); | 793 | _leave(" = -EAGAIN"); |
771 | return -EBADMSG; | 794 | return -EAGAIN; |
772 | } | ||
773 | _leave(" = 0 [incomplete]"); | ||
774 | return 0; | ||
775 | } | 795 | } |
776 | 796 | ||
777 | call->state = AFS_CALL_AWAIT_REQUEST; | 797 | call->state = AFS_CALL_AWAIT_REQUEST; |
@@ -855,7 +875,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) | |||
855 | } | 875 | } |
856 | 876 | ||
857 | /* | 877 | /* |
858 | * extract a piece of data from the received data socket buffers | 878 | * Extract a piece of data from the received data socket buffers. |
859 | */ | 879 | */ |
860 | int afs_extract_data(struct afs_call *call, struct sk_buff *skb, | 880 | int afs_extract_data(struct afs_call *call, struct sk_buff *skb, |
861 | bool last, void *buf, size_t count) | 881 | bool last, void *buf, size_t count) |
@@ -873,10 +893,7 @@ int afs_extract_data(struct afs_call *call, struct sk_buff *skb, | |||
873 | call->offset += len; | 893 | call->offset += len; |
874 | 894 | ||
875 | if (call->offset < count) { | 895 | if (call->offset < count) { |
876 | if (last) { | 896 | afs_data_consumed(call, skb); |
877 | _leave(" = -EBADMSG [%d < %zu]", call->offset, count); | ||
878 | return -EBADMSG; | ||
879 | } | ||
880 | _leave(" = -EAGAIN"); | 897 | _leave(" = -EAGAIN"); |
881 | return -EAGAIN; | 898 | return -EAGAIN; |
882 | } | 899 | } |
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index 340afd0cd182..f94d1abdc3eb 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c | |||
@@ -64,16 +64,13 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call, | |||
64 | struct afs_cache_vlocation *entry; | 64 | struct afs_cache_vlocation *entry; |
65 | __be32 *bp; | 65 | __be32 *bp; |
66 | u32 tmp; | 66 | u32 tmp; |
67 | int loop; | 67 | int loop, ret; |
68 | 68 | ||
69 | _enter(",,%u", last); | 69 | _enter(",,%u", last); |
70 | 70 | ||
71 | afs_transfer_reply(call, skb); | 71 | ret = afs_transfer_reply(call, skb, last); |
72 | if (!last) | 72 | if (ret < 0) |
73 | return 0; | 73 | return ret; |
74 | |||
75 | if (call->reply_size != call->reply_max) | ||
76 | return -EBADMSG; | ||
77 | 74 | ||
78 | /* unmarshall the reply once we've received all of it */ | 75 | /* unmarshall the reply once we've received all of it */ |
79 | entry = call->reply; | 76 | entry = call->reply; |
@@ -239,7 +239,12 @@ static struct dentry *aio_mount(struct file_system_type *fs_type, | |||
239 | static const struct dentry_operations ops = { | 239 | static const struct dentry_operations ops = { |
240 | .d_dname = simple_dname, | 240 | .d_dname = simple_dname, |
241 | }; | 241 | }; |
242 | return mount_pseudo(fs_type, "aio:", NULL, &ops, AIO_RING_MAGIC); | 242 | struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, &ops, |
243 | AIO_RING_MAGIC); | ||
244 | |||
245 | if (!IS_ERR(root)) | ||
246 | root->d_sb->s_iflags |= SB_I_NOEXEC; | ||
247 | return root; | ||
243 | } | 248 | } |
244 | 249 | ||
245 | /* aio_setup | 250 | /* aio_setup |
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index b493909e7492..d8e6d421c27f 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c | |||
@@ -417,6 +417,7 @@ static struct dentry *should_expire(struct dentry *dentry, | |||
417 | } | 417 | } |
418 | return NULL; | 418 | return NULL; |
419 | } | 419 | } |
420 | |||
420 | /* | 421 | /* |
421 | * Find an eligible tree to time-out | 422 | * Find an eligible tree to time-out |
422 | * A tree is eligible if :- | 423 | * A tree is eligible if :- |
@@ -432,6 +433,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, | |||
432 | struct dentry *root = sb->s_root; | 433 | struct dentry *root = sb->s_root; |
433 | struct dentry *dentry; | 434 | struct dentry *dentry; |
434 | struct dentry *expired; | 435 | struct dentry *expired; |
436 | struct dentry *found; | ||
435 | struct autofs_info *ino; | 437 | struct autofs_info *ino; |
436 | 438 | ||
437 | if (!root) | 439 | if (!root) |
@@ -442,31 +444,46 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, | |||
442 | 444 | ||
443 | dentry = NULL; | 445 | dentry = NULL; |
444 | while ((dentry = get_next_positive_subdir(dentry, root))) { | 446 | while ((dentry = get_next_positive_subdir(dentry, root))) { |
447 | int flags = how; | ||
448 | |||
445 | spin_lock(&sbi->fs_lock); | 449 | spin_lock(&sbi->fs_lock); |
446 | ino = autofs4_dentry_ino(dentry); | 450 | ino = autofs4_dentry_ino(dentry); |
447 | if (ino->flags & AUTOFS_INF_WANT_EXPIRE) | 451 | if (ino->flags & AUTOFS_INF_WANT_EXPIRE) { |
448 | expired = NULL; | ||
449 | else | ||
450 | expired = should_expire(dentry, mnt, timeout, how); | ||
451 | if (!expired) { | ||
452 | spin_unlock(&sbi->fs_lock); | 452 | spin_unlock(&sbi->fs_lock); |
453 | continue; | 453 | continue; |
454 | } | 454 | } |
455 | spin_unlock(&sbi->fs_lock); | ||
456 | |||
457 | expired = should_expire(dentry, mnt, timeout, flags); | ||
458 | if (!expired) | ||
459 | continue; | ||
460 | |||
461 | spin_lock(&sbi->fs_lock); | ||
455 | ino = autofs4_dentry_ino(expired); | 462 | ino = autofs4_dentry_ino(expired); |
456 | ino->flags |= AUTOFS_INF_WANT_EXPIRE; | 463 | ino->flags |= AUTOFS_INF_WANT_EXPIRE; |
457 | spin_unlock(&sbi->fs_lock); | 464 | spin_unlock(&sbi->fs_lock); |
458 | synchronize_rcu(); | 465 | synchronize_rcu(); |
459 | spin_lock(&sbi->fs_lock); | ||
460 | if (should_expire(expired, mnt, timeout, how)) { | ||
461 | if (expired != dentry) | ||
462 | dput(dentry); | ||
463 | goto found; | ||
464 | } | ||
465 | 466 | ||
467 | /* Make sure a reference is not taken on found if | ||
468 | * things have changed. | ||
469 | */ | ||
470 | flags &= ~AUTOFS_EXP_LEAVES; | ||
471 | found = should_expire(expired, mnt, timeout, how); | ||
472 | if (!found || found != expired) | ||
473 | /* Something has changed, continue */ | ||
474 | goto next; | ||
475 | |||
476 | if (expired != dentry) | ||
477 | dput(dentry); | ||
478 | |||
479 | spin_lock(&sbi->fs_lock); | ||
480 | goto found; | ||
481 | next: | ||
482 | spin_lock(&sbi->fs_lock); | ||
466 | ino->flags &= ~AUTOFS_INF_WANT_EXPIRE; | 483 | ino->flags &= ~AUTOFS_INF_WANT_EXPIRE; |
484 | spin_unlock(&sbi->fs_lock); | ||
467 | if (expired != dentry) | 485 | if (expired != dentry) |
468 | dput(expired); | 486 | dput(expired); |
469 | spin_unlock(&sbi->fs_lock); | ||
470 | } | 487 | } |
471 | return NULL; | 488 | return NULL; |
472 | 489 | ||
@@ -483,6 +500,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk) | |||
483 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 500 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); |
484 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 501 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
485 | int status; | 502 | int status; |
503 | int state; | ||
486 | 504 | ||
487 | /* Block on any pending expire */ | 505 | /* Block on any pending expire */ |
488 | if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE)) | 506 | if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE)) |
@@ -490,8 +508,19 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk) | |||
490 | if (rcu_walk) | 508 | if (rcu_walk) |
491 | return -ECHILD; | 509 | return -ECHILD; |
492 | 510 | ||
511 | retry: | ||
493 | spin_lock(&sbi->fs_lock); | 512 | spin_lock(&sbi->fs_lock); |
494 | if (ino->flags & AUTOFS_INF_EXPIRING) { | 513 | state = ino->flags & (AUTOFS_INF_WANT_EXPIRE | AUTOFS_INF_EXPIRING); |
514 | if (state == AUTOFS_INF_WANT_EXPIRE) { | ||
515 | spin_unlock(&sbi->fs_lock); | ||
516 | /* | ||
517 | * Possibly being selected for expire, wait until | ||
518 | * it's selected or not. | ||
519 | */ | ||
520 | schedule_timeout_uninterruptible(HZ/10); | ||
521 | goto retry; | ||
522 | } | ||
523 | if (state & AUTOFS_INF_EXPIRING) { | ||
495 | spin_unlock(&sbi->fs_lock); | 524 | spin_unlock(&sbi->fs_lock); |
496 | 525 | ||
497 | pr_debug("waiting for expire %p name=%pd\n", dentry, dentry); | 526 | pr_debug("waiting for expire %p name=%pd\n", dentry, dentry); |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 7f6aff3f72eb..e5495f37c6ed 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -853,6 +853,7 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
853 | current->flags |= PF_RANDOMIZE; | 853 | current->flags |= PF_RANDOMIZE; |
854 | 854 | ||
855 | setup_new_exec(bprm); | 855 | setup_new_exec(bprm); |
856 | install_exec_creds(bprm); | ||
856 | 857 | ||
857 | /* Do this so that we can load the interpreter, if need be. We will | 858 | /* Do this so that we can load the interpreter, if need be. We will |
858 | change some of these later */ | 859 | change some of these later */ |
@@ -1044,7 +1045,6 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
1044 | goto out; | 1045 | goto out; |
1045 | #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ | 1046 | #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ |
1046 | 1047 | ||
1047 | install_exec_creds(bprm); | ||
1048 | retval = create_elf_tables(bprm, &loc->elf_ex, | 1048 | retval = create_elf_tables(bprm, &loc->elf_ex, |
1049 | load_addr, interp_load_addr); | 1049 | load_addr, interp_load_addr); |
1050 | if (retval < 0) | 1050 | if (retval < 0) |
diff --git a/fs/block_dev.c b/fs/block_dev.c index c3cdde87cc8c..08ae99343d92 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -249,7 +249,8 @@ struct super_block *freeze_bdev(struct block_device *bdev) | |||
249 | * thaw_bdev drops it. | 249 | * thaw_bdev drops it. |
250 | */ | 250 | */ |
251 | sb = get_super(bdev); | 251 | sb = get_super(bdev); |
252 | drop_super(sb); | 252 | if (sb) |
253 | drop_super(sb); | ||
253 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | 254 | mutex_unlock(&bdev->bd_fsfreeze_mutex); |
254 | return sb; | 255 | return sb; |
255 | } | 256 | } |
@@ -646,7 +647,7 @@ static struct dentry *bd_mount(struct file_system_type *fs_type, | |||
646 | { | 647 | { |
647 | struct dentry *dent; | 648 | struct dentry *dent; |
648 | dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC); | 649 | dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC); |
649 | if (dent) | 650 | if (!IS_ERR(dent)) |
650 | dent->d_sb->s_iflags |= SB_I_CGROUPWB; | 651 | dent->d_sb->s_iflags |= SB_I_CGROUPWB; |
651 | return dent; | 652 | return dent; |
652 | } | 653 | } |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 2b88439c2ee8..455a6b2fd539 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -589,6 +589,7 @@ static void __merge_refs(struct list_head *head, int mode) | |||
589 | 589 | ||
590 | list_del(&ref2->list); | 590 | list_del(&ref2->list); |
591 | kmem_cache_free(btrfs_prelim_ref_cache, ref2); | 591 | kmem_cache_free(btrfs_prelim_ref_cache, ref2); |
592 | cond_resched(); | ||
592 | } | 593 | } |
593 | 594 | ||
594 | } | 595 | } |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2fe8f89091a3..33fe03551105 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -427,6 +427,7 @@ struct btrfs_space_info { | |||
427 | struct list_head ro_bgs; | 427 | struct list_head ro_bgs; |
428 | struct list_head priority_tickets; | 428 | struct list_head priority_tickets; |
429 | struct list_head tickets; | 429 | struct list_head tickets; |
430 | u64 tickets_id; | ||
430 | 431 | ||
431 | struct rw_semaphore groups_sem; | 432 | struct rw_semaphore groups_sem; |
432 | /* for block groups in our same type */ | 433 | /* for block groups in our same type */ |
@@ -1028,6 +1029,7 @@ struct btrfs_fs_info { | |||
1028 | struct btrfs_workqueue *qgroup_rescan_workers; | 1029 | struct btrfs_workqueue *qgroup_rescan_workers; |
1029 | struct completion qgroup_rescan_completion; | 1030 | struct completion qgroup_rescan_completion; |
1030 | struct btrfs_work qgroup_rescan_work; | 1031 | struct btrfs_work qgroup_rescan_work; |
1032 | bool qgroup_rescan_running; /* protected by qgroup_rescan_lock */ | ||
1031 | 1033 | ||
1032 | /* filesystem state */ | 1034 | /* filesystem state */ |
1033 | unsigned long fs_state; | 1035 | unsigned long fs_state; |
@@ -1079,6 +1081,8 @@ struct btrfs_fs_info { | |||
1079 | struct list_head pinned_chunks; | 1081 | struct list_head pinned_chunks; |
1080 | 1082 | ||
1081 | int creating_free_space_tree; | 1083 | int creating_free_space_tree; |
1084 | /* Used to record internally whether fs has been frozen */ | ||
1085 | int fs_frozen; | ||
1082 | }; | 1086 | }; |
1083 | 1087 | ||
1084 | struct btrfs_subvolume_writers { | 1088 | struct btrfs_subvolume_writers { |
@@ -2578,7 +2582,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
2578 | struct btrfs_root *root, | 2582 | struct btrfs_root *root, |
2579 | u64 root_objectid, u64 owner, u64 offset, | 2583 | u64 root_objectid, u64 owner, u64 offset, |
2580 | struct btrfs_key *ins); | 2584 | struct btrfs_key *ins); |
2581 | int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, | 2585 | int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes, |
2582 | u64 min_alloc_size, u64 empty_size, u64 hint_byte, | 2586 | u64 min_alloc_size, u64 empty_size, u64 hint_byte, |
2583 | struct btrfs_key *ins, int is_data, int delalloc); | 2587 | struct btrfs_key *ins, int is_data, int delalloc); |
2584 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 2588 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index b6d210e7a993..ac02e041464b 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -541,7 +541,6 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, | |||
541 | struct btrfs_delayed_ref_head *existing; | 541 | struct btrfs_delayed_ref_head *existing; |
542 | struct btrfs_delayed_ref_head *head_ref = NULL; | 542 | struct btrfs_delayed_ref_head *head_ref = NULL; |
543 | struct btrfs_delayed_ref_root *delayed_refs; | 543 | struct btrfs_delayed_ref_root *delayed_refs; |
544 | struct btrfs_qgroup_extent_record *qexisting; | ||
545 | int count_mod = 1; | 544 | int count_mod = 1; |
546 | int must_insert_reserved = 0; | 545 | int must_insert_reserved = 0; |
547 | 546 | ||
@@ -606,10 +605,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, | |||
606 | qrecord->num_bytes = num_bytes; | 605 | qrecord->num_bytes = num_bytes; |
607 | qrecord->old_roots = NULL; | 606 | qrecord->old_roots = NULL; |
608 | 607 | ||
609 | qexisting = btrfs_qgroup_insert_dirty_extent(fs_info, | 608 | if(btrfs_qgroup_insert_dirty_extent_nolock(fs_info, |
610 | delayed_refs, | 609 | delayed_refs, qrecord)) |
611 | qrecord); | ||
612 | if (qexisting) | ||
613 | kfree(qrecord); | 610 | kfree(qrecord); |
614 | } | 611 | } |
615 | 612 | ||
@@ -862,33 +859,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
862 | return 0; | 859 | return 0; |
863 | } | 860 | } |
864 | 861 | ||
865 | int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info, | ||
866 | struct btrfs_trans_handle *trans, | ||
867 | u64 ref_root, u64 bytenr, u64 num_bytes) | ||
868 | { | ||
869 | struct btrfs_delayed_ref_root *delayed_refs; | ||
870 | struct btrfs_delayed_ref_head *ref_head; | ||
871 | int ret = 0; | ||
872 | |||
873 | if (!fs_info->quota_enabled || !is_fstree(ref_root)) | ||
874 | return 0; | ||
875 | |||
876 | delayed_refs = &trans->transaction->delayed_refs; | ||
877 | |||
878 | spin_lock(&delayed_refs->lock); | ||
879 | ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0); | ||
880 | if (!ref_head) { | ||
881 | ret = -ENOENT; | ||
882 | goto out; | ||
883 | } | ||
884 | WARN_ON(ref_head->qgroup_reserved || ref_head->qgroup_ref_root); | ||
885 | ref_head->qgroup_ref_root = ref_root; | ||
886 | ref_head->qgroup_reserved = num_bytes; | ||
887 | out: | ||
888 | spin_unlock(&delayed_refs->lock); | ||
889 | return ret; | ||
890 | } | ||
891 | |||
892 | int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, | 862 | int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, |
893 | struct btrfs_trans_handle *trans, | 863 | struct btrfs_trans_handle *trans, |
894 | u64 bytenr, u64 num_bytes, | 864 | u64 bytenr, u64 num_bytes, |
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 5fca9534a271..43f3629760e9 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
@@ -250,9 +250,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
250 | u64 parent, u64 ref_root, | 250 | u64 parent, u64 ref_root, |
251 | u64 owner, u64 offset, u64 reserved, int action, | 251 | u64 owner, u64 offset, u64 reserved, int action, |
252 | struct btrfs_delayed_extent_op *extent_op); | 252 | struct btrfs_delayed_extent_op *extent_op); |
253 | int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info, | ||
254 | struct btrfs_trans_handle *trans, | ||
255 | u64 ref_root, u64 bytenr, u64 num_bytes); | ||
256 | int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, | 253 | int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, |
257 | struct btrfs_trans_handle *trans, | 254 | struct btrfs_trans_handle *trans, |
258 | u64 bytenr, u64 num_bytes, | 255 | u64 bytenr, u64 num_bytes, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 59febfb8d04a..54bc8c7c6bcd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -559,8 +559,29 @@ static noinline int check_leaf(struct btrfs_root *root, | |||
559 | u32 nritems = btrfs_header_nritems(leaf); | 559 | u32 nritems = btrfs_header_nritems(leaf); |
560 | int slot; | 560 | int slot; |
561 | 561 | ||
562 | if (nritems == 0) | 562 | if (nritems == 0) { |
563 | struct btrfs_root *check_root; | ||
564 | |||
565 | key.objectid = btrfs_header_owner(leaf); | ||
566 | key.type = BTRFS_ROOT_ITEM_KEY; | ||
567 | key.offset = (u64)-1; | ||
568 | |||
569 | check_root = btrfs_get_fs_root(root->fs_info, &key, false); | ||
570 | /* | ||
571 | * The only reason we also check NULL here is that during | ||
572 | * open_ctree() some roots has not yet been set up. | ||
573 | */ | ||
574 | if (!IS_ERR_OR_NULL(check_root)) { | ||
575 | /* if leaf is the root, then it's fine */ | ||
576 | if (leaf->start != | ||
577 | btrfs_root_bytenr(&check_root->root_item)) { | ||
578 | CORRUPT("non-root leaf's nritems is 0", | ||
579 | leaf, root, 0); | ||
580 | return -EIO; | ||
581 | } | ||
582 | } | ||
563 | return 0; | 583 | return 0; |
584 | } | ||
564 | 585 | ||
565 | /* Check the 0 item */ | 586 | /* Check the 0 item */ |
566 | if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != | 587 | if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != |
@@ -612,6 +633,19 @@ static noinline int check_leaf(struct btrfs_root *root, | |||
612 | return 0; | 633 | return 0; |
613 | } | 634 | } |
614 | 635 | ||
636 | static int check_node(struct btrfs_root *root, struct extent_buffer *node) | ||
637 | { | ||
638 | unsigned long nr = btrfs_header_nritems(node); | ||
639 | |||
640 | if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) { | ||
641 | btrfs_crit(root->fs_info, | ||
642 | "corrupt node: block %llu root %llu nritems %lu", | ||
643 | node->start, root->objectid, nr); | ||
644 | return -EIO; | ||
645 | } | ||
646 | return 0; | ||
647 | } | ||
648 | |||
615 | static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, | 649 | static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, |
616 | u64 phy_offset, struct page *page, | 650 | u64 phy_offset, struct page *page, |
617 | u64 start, u64 end, int mirror) | 651 | u64 start, u64 end, int mirror) |
@@ -682,6 +716,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, | |||
682 | ret = -EIO; | 716 | ret = -EIO; |
683 | } | 717 | } |
684 | 718 | ||
719 | if (found_level > 0 && check_node(root, eb)) | ||
720 | ret = -EIO; | ||
721 | |||
685 | if (!ret) | 722 | if (!ret) |
686 | set_extent_buffer_uptodate(eb); | 723 | set_extent_buffer_uptodate(eb); |
687 | err: | 724 | err: |
@@ -1618,8 +1655,8 @@ fail: | |||
1618 | return ret; | 1655 | return ret; |
1619 | } | 1656 | } |
1620 | 1657 | ||
1621 | static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, | 1658 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, |
1622 | u64 root_id) | 1659 | u64 root_id) |
1623 | { | 1660 | { |
1624 | struct btrfs_root *root; | 1661 | struct btrfs_root *root; |
1625 | 1662 | ||
@@ -2298,6 +2335,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) | |||
2298 | fs_info->quota_enabled = 0; | 2335 | fs_info->quota_enabled = 0; |
2299 | fs_info->pending_quota_state = 0; | 2336 | fs_info->pending_quota_state = 0; |
2300 | fs_info->qgroup_ulist = NULL; | 2337 | fs_info->qgroup_ulist = NULL; |
2338 | fs_info->qgroup_rescan_running = false; | ||
2301 | mutex_init(&fs_info->qgroup_rescan_lock); | 2339 | mutex_init(&fs_info->qgroup_rescan_lock); |
2302 | } | 2340 | } |
2303 | 2341 | ||
@@ -2624,6 +2662,7 @@ int open_ctree(struct super_block *sb, | |||
2624 | atomic_set(&fs_info->qgroup_op_seq, 0); | 2662 | atomic_set(&fs_info->qgroup_op_seq, 0); |
2625 | atomic_set(&fs_info->reada_works_cnt, 0); | 2663 | atomic_set(&fs_info->reada_works_cnt, 0); |
2626 | atomic64_set(&fs_info->tree_mod_seq, 0); | 2664 | atomic64_set(&fs_info->tree_mod_seq, 0); |
2665 | fs_info->fs_frozen = 0; | ||
2627 | fs_info->sb = sb; | 2666 | fs_info->sb = sb; |
2628 | fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; | 2667 | fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; |
2629 | fs_info->metadata_ratio = 0; | 2668 | fs_info->metadata_ratio = 0; |
@@ -3739,8 +3778,15 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, | |||
3739 | if (btrfs_root_refs(&root->root_item) == 0) | 3778 | if (btrfs_root_refs(&root->root_item) == 0) |
3740 | synchronize_srcu(&fs_info->subvol_srcu); | 3779 | synchronize_srcu(&fs_info->subvol_srcu); |
3741 | 3780 | ||
3742 | if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) | 3781 | if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { |
3743 | btrfs_free_log(NULL, root); | 3782 | btrfs_free_log(NULL, root); |
3783 | if (root->reloc_root) { | ||
3784 | free_extent_buffer(root->reloc_root->node); | ||
3785 | free_extent_buffer(root->reloc_root->commit_root); | ||
3786 | btrfs_put_fs_root(root->reloc_root); | ||
3787 | root->reloc_root = NULL; | ||
3788 | } | ||
3789 | } | ||
3744 | 3790 | ||
3745 | if (root->free_ino_pinned) | 3791 | if (root->free_ino_pinned) |
3746 | __btrfs_remove_free_space_cache(root->free_ino_pinned); | 3792 | __btrfs_remove_free_space_cache(root->free_ino_pinned); |
@@ -3851,7 +3897,7 @@ void close_ctree(struct btrfs_root *root) | |||
3851 | smp_mb(); | 3897 | smp_mb(); |
3852 | 3898 | ||
3853 | /* wait for the qgroup rescan worker to stop */ | 3899 | /* wait for the qgroup rescan worker to stop */ |
3854 | btrfs_qgroup_wait_for_completion(fs_info); | 3900 | btrfs_qgroup_wait_for_completion(fs_info, false); |
3855 | 3901 | ||
3856 | /* wait for the uuid_scan task to finish */ | 3902 | /* wait for the uuid_scan task to finish */ |
3857 | down(&fs_info->uuid_tree_rescan_sem); | 3903 | down(&fs_info->uuid_tree_rescan_sem); |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index b3207a0e09f7..f19a982f5a4f 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -68,6 +68,8 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info, | |||
68 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, | 68 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, |
69 | struct btrfs_key *location); | 69 | struct btrfs_key *location); |
70 | int btrfs_init_fs_root(struct btrfs_root *root); | 70 | int btrfs_init_fs_root(struct btrfs_root *root); |
71 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, | ||
72 | u64 root_id); | ||
71 | int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, | 73 | int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, |
72 | struct btrfs_root *root); | 74 | struct btrfs_root *root); |
73 | void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); | 75 | void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 61b494e8e604..665da8f66ff1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -60,21 +60,6 @@ enum { | |||
60 | CHUNK_ALLOC_FORCE = 2, | 60 | CHUNK_ALLOC_FORCE = 2, |
61 | }; | 61 | }; |
62 | 62 | ||
63 | /* | ||
64 | * Control how reservations are dealt with. | ||
65 | * | ||
66 | * RESERVE_FREE - freeing a reservation. | ||
67 | * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for | ||
68 | * ENOSPC accounting | ||
69 | * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update | ||
70 | * bytes_may_use as the ENOSPC accounting is done elsewhere | ||
71 | */ | ||
72 | enum { | ||
73 | RESERVE_FREE = 0, | ||
74 | RESERVE_ALLOC = 1, | ||
75 | RESERVE_ALLOC_NO_ACCOUNT = 2, | ||
76 | }; | ||
77 | |||
78 | static int update_block_group(struct btrfs_trans_handle *trans, | 63 | static int update_block_group(struct btrfs_trans_handle *trans, |
79 | struct btrfs_root *root, u64 bytenr, | 64 | struct btrfs_root *root, u64 bytenr, |
80 | u64 num_bytes, int alloc); | 65 | u64 num_bytes, int alloc); |
@@ -104,9 +89,10 @@ static int find_next_key(struct btrfs_path *path, int level, | |||
104 | struct btrfs_key *key); | 89 | struct btrfs_key *key); |
105 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | 90 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
106 | int dump_block_groups); | 91 | int dump_block_groups); |
107 | static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | 92 | static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, |
108 | u64 num_bytes, int reserve, | 93 | u64 ram_bytes, u64 num_bytes, int delalloc); |
109 | int delalloc); | 94 | static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, |
95 | u64 num_bytes, int delalloc); | ||
110 | static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, | 96 | static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, |
111 | u64 num_bytes); | 97 | u64 num_bytes); |
112 | int btrfs_pin_extent(struct btrfs_root *root, | 98 | int btrfs_pin_extent(struct btrfs_root *root, |
@@ -3501,7 +3487,6 @@ again: | |||
3501 | dcs = BTRFS_DC_SETUP; | 3487 | dcs = BTRFS_DC_SETUP; |
3502 | else if (ret == -ENOSPC) | 3488 | else if (ret == -ENOSPC) |
3503 | set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags); | 3489 | set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags); |
3504 | btrfs_free_reserved_data_space(inode, 0, num_pages); | ||
3505 | 3490 | ||
3506 | out_put: | 3491 | out_put: |
3507 | iput(inode); | 3492 | iput(inode); |
@@ -4286,13 +4271,10 @@ int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len) | |||
4286 | if (ret < 0) | 4271 | if (ret < 0) |
4287 | return ret; | 4272 | return ret; |
4288 | 4273 | ||
4289 | /* | 4274 | /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */ |
4290 | * Use new btrfs_qgroup_reserve_data to reserve precious data space | ||
4291 | * | ||
4292 | * TODO: Find a good method to avoid reserve data space for NOCOW | ||
4293 | * range, but don't impact performance on quota disable case. | ||
4294 | */ | ||
4295 | ret = btrfs_qgroup_reserve_data(inode, start, len); | 4275 | ret = btrfs_qgroup_reserve_data(inode, start, len); |
4276 | if (ret) | ||
4277 | btrfs_free_reserved_data_space_noquota(inode, start, len); | ||
4296 | return ret; | 4278 | return ret; |
4297 | } | 4279 | } |
4298 | 4280 | ||
@@ -4472,6 +4454,15 @@ void check_system_chunk(struct btrfs_trans_handle *trans, | |||
4472 | } | 4454 | } |
4473 | } | 4455 | } |
4474 | 4456 | ||
4457 | /* | ||
4458 | * If force is CHUNK_ALLOC_FORCE: | ||
4459 | * - return 1 if it successfully allocates a chunk, | ||
4460 | * - return errors including -ENOSPC otherwise. | ||
4461 | * If force is NOT CHUNK_ALLOC_FORCE: | ||
4462 | * - return 0 if it doesn't need to allocate a new chunk, | ||
4463 | * - return 1 if it successfully allocates a chunk, | ||
4464 | * - return errors including -ENOSPC otherwise. | ||
4465 | */ | ||
4475 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 4466 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
4476 | struct btrfs_root *extent_root, u64 flags, int force) | 4467 | struct btrfs_root *extent_root, u64 flags, int force) |
4477 | { | 4468 | { |
@@ -4882,7 +4873,7 @@ static int flush_space(struct btrfs_root *root, | |||
4882 | btrfs_get_alloc_profile(root, 0), | 4873 | btrfs_get_alloc_profile(root, 0), |
4883 | CHUNK_ALLOC_NO_FORCE); | 4874 | CHUNK_ALLOC_NO_FORCE); |
4884 | btrfs_end_transaction(trans, root); | 4875 | btrfs_end_transaction(trans, root); |
4885 | if (ret == -ENOSPC) | 4876 | if (ret > 0 || ret == -ENOSPC) |
4886 | ret = 0; | 4877 | ret = 0; |
4887 | break; | 4878 | break; |
4888 | case COMMIT_TRANS: | 4879 | case COMMIT_TRANS: |
@@ -4907,11 +4898,6 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, | |||
4907 | u64 expected; | 4898 | u64 expected; |
4908 | u64 to_reclaim = 0; | 4899 | u64 to_reclaim = 0; |
4909 | 4900 | ||
4910 | to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); | ||
4911 | if (can_overcommit(root, space_info, to_reclaim, | ||
4912 | BTRFS_RESERVE_FLUSH_ALL)) | ||
4913 | return 0; | ||
4914 | |||
4915 | list_for_each_entry(ticket, &space_info->tickets, list) | 4901 | list_for_each_entry(ticket, &space_info->tickets, list) |
4916 | to_reclaim += ticket->bytes; | 4902 | to_reclaim += ticket->bytes; |
4917 | list_for_each_entry(ticket, &space_info->priority_tickets, list) | 4903 | list_for_each_entry(ticket, &space_info->priority_tickets, list) |
@@ -4919,6 +4905,11 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, | |||
4919 | if (to_reclaim) | 4905 | if (to_reclaim) |
4920 | return to_reclaim; | 4906 | return to_reclaim; |
4921 | 4907 | ||
4908 | to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); | ||
4909 | if (can_overcommit(root, space_info, to_reclaim, | ||
4910 | BTRFS_RESERVE_FLUSH_ALL)) | ||
4911 | return 0; | ||
4912 | |||
4922 | used = space_info->bytes_used + space_info->bytes_reserved + | 4913 | used = space_info->bytes_used + space_info->bytes_reserved + |
4923 | space_info->bytes_pinned + space_info->bytes_readonly + | 4914 | space_info->bytes_pinned + space_info->bytes_readonly + |
4924 | space_info->bytes_may_use; | 4915 | space_info->bytes_may_use; |
@@ -4972,12 +4963,12 @@ static void wake_all_tickets(struct list_head *head) | |||
4972 | */ | 4963 | */ |
4973 | static void btrfs_async_reclaim_metadata_space(struct work_struct *work) | 4964 | static void btrfs_async_reclaim_metadata_space(struct work_struct *work) |
4974 | { | 4965 | { |
4975 | struct reserve_ticket *last_ticket = NULL; | ||
4976 | struct btrfs_fs_info *fs_info; | 4966 | struct btrfs_fs_info *fs_info; |
4977 | struct btrfs_space_info *space_info; | 4967 | struct btrfs_space_info *space_info; |
4978 | u64 to_reclaim; | 4968 | u64 to_reclaim; |
4979 | int flush_state; | 4969 | int flush_state; |
4980 | int commit_cycles = 0; | 4970 | int commit_cycles = 0; |
4971 | u64 last_tickets_id; | ||
4981 | 4972 | ||
4982 | fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work); | 4973 | fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work); |
4983 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | 4974 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); |
@@ -4990,8 +4981,7 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work) | |||
4990 | spin_unlock(&space_info->lock); | 4981 | spin_unlock(&space_info->lock); |
4991 | return; | 4982 | return; |
4992 | } | 4983 | } |
4993 | last_ticket = list_first_entry(&space_info->tickets, | 4984 | last_tickets_id = space_info->tickets_id; |
4994 | struct reserve_ticket, list); | ||
4995 | spin_unlock(&space_info->lock); | 4985 | spin_unlock(&space_info->lock); |
4996 | 4986 | ||
4997 | flush_state = FLUSH_DELAYED_ITEMS_NR; | 4987 | flush_state = FLUSH_DELAYED_ITEMS_NR; |
@@ -5011,10 +5001,10 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work) | |||
5011 | space_info); | 5001 | space_info); |
5012 | ticket = list_first_entry(&space_info->tickets, | 5002 | ticket = list_first_entry(&space_info->tickets, |
5013 | struct reserve_ticket, list); | 5003 | struct reserve_ticket, list); |
5014 | if (last_ticket == ticket) { | 5004 | if (last_tickets_id == space_info->tickets_id) { |
5015 | flush_state++; | 5005 | flush_state++; |
5016 | } else { | 5006 | } else { |
5017 | last_ticket = ticket; | 5007 | last_tickets_id = space_info->tickets_id; |
5018 | flush_state = FLUSH_DELAYED_ITEMS_NR; | 5008 | flush_state = FLUSH_DELAYED_ITEMS_NR; |
5019 | if (commit_cycles) | 5009 | if (commit_cycles) |
5020 | commit_cycles--; | 5010 | commit_cycles--; |
@@ -5390,6 +5380,7 @@ again: | |||
5390 | list_del_init(&ticket->list); | 5380 | list_del_init(&ticket->list); |
5391 | num_bytes -= ticket->bytes; | 5381 | num_bytes -= ticket->bytes; |
5392 | ticket->bytes = 0; | 5382 | ticket->bytes = 0; |
5383 | space_info->tickets_id++; | ||
5393 | wake_up(&ticket->wait); | 5384 | wake_up(&ticket->wait); |
5394 | } else { | 5385 | } else { |
5395 | ticket->bytes -= num_bytes; | 5386 | ticket->bytes -= num_bytes; |
@@ -5432,6 +5423,7 @@ again: | |||
5432 | num_bytes -= ticket->bytes; | 5423 | num_bytes -= ticket->bytes; |
5433 | space_info->bytes_may_use += ticket->bytes; | 5424 | space_info->bytes_may_use += ticket->bytes; |
5434 | ticket->bytes = 0; | 5425 | ticket->bytes = 0; |
5426 | space_info->tickets_id++; | ||
5435 | wake_up(&ticket->wait); | 5427 | wake_up(&ticket->wait); |
5436 | } else { | 5428 | } else { |
5437 | trace_btrfs_space_reservation(fs_info, "space_info", | 5429 | trace_btrfs_space_reservation(fs_info, "space_info", |
@@ -6497,19 +6489,15 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) | |||
6497 | } | 6489 | } |
6498 | 6490 | ||
6499 | /** | 6491 | /** |
6500 | * btrfs_update_reserved_bytes - update the block_group and space info counters | 6492 | * btrfs_add_reserved_bytes - update the block_group and space info counters |
6501 | * @cache: The cache we are manipulating | 6493 | * @cache: The cache we are manipulating |
6494 | * @ram_bytes: The number of bytes of file content, and will be same to | ||
6495 | * @num_bytes except for the compress path. | ||
6502 | * @num_bytes: The number of bytes in question | 6496 | * @num_bytes: The number of bytes in question |
6503 | * @reserve: One of the reservation enums | ||
6504 | * @delalloc: The blocks are allocated for the delalloc write | 6497 | * @delalloc: The blocks are allocated for the delalloc write |
6505 | * | 6498 | * |
6506 | * This is called by the allocator when it reserves space, or by somebody who is | 6499 | * This is called by the allocator when it reserves space. Metadata |
6507 | * freeing space that was never actually used on disk. For example if you | 6500 | * reservations should be called with RESERVE_ALLOC so we do the proper |
6508 | * reserve some space for a new leaf in transaction A and before transaction A | ||
6509 | * commits you free that leaf, you call this with reserve set to 0 in order to | ||
6510 | * clear the reservation. | ||
6511 | * | ||
6512 | * Metadata reservations should be called with RESERVE_ALLOC so we do the proper | ||
6513 | * ENOSPC accounting. For data we handle the reservation through clearing the | 6501 | * ENOSPC accounting. For data we handle the reservation through clearing the |
6514 | * delalloc bits in the io_tree. We have to do this since we could end up | 6502 | * delalloc bits in the io_tree. We have to do this since we could end up |
6515 | * allocating less disk space for the amount of data we have reserved in the | 6503 | * allocating less disk space for the amount of data we have reserved in the |
@@ -6519,44 +6507,63 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) | |||
6519 | * make the reservation and return -EAGAIN, otherwise this function always | 6507 | * make the reservation and return -EAGAIN, otherwise this function always |
6520 | * succeeds. | 6508 | * succeeds. |
6521 | */ | 6509 | */ |
6522 | static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | 6510 | static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, |
6523 | u64 num_bytes, int reserve, int delalloc) | 6511 | u64 ram_bytes, u64 num_bytes, int delalloc) |
6524 | { | 6512 | { |
6525 | struct btrfs_space_info *space_info = cache->space_info; | 6513 | struct btrfs_space_info *space_info = cache->space_info; |
6526 | int ret = 0; | 6514 | int ret = 0; |
6527 | 6515 | ||
6528 | spin_lock(&space_info->lock); | 6516 | spin_lock(&space_info->lock); |
6529 | spin_lock(&cache->lock); | 6517 | spin_lock(&cache->lock); |
6530 | if (reserve != RESERVE_FREE) { | 6518 | if (cache->ro) { |
6531 | if (cache->ro) { | 6519 | ret = -EAGAIN; |
6532 | ret = -EAGAIN; | ||
6533 | } else { | ||
6534 | cache->reserved += num_bytes; | ||
6535 | space_info->bytes_reserved += num_bytes; | ||
6536 | if (reserve == RESERVE_ALLOC) { | ||
6537 | trace_btrfs_space_reservation(cache->fs_info, | ||
6538 | "space_info", space_info->flags, | ||
6539 | num_bytes, 0); | ||
6540 | space_info->bytes_may_use -= num_bytes; | ||
6541 | } | ||
6542 | |||
6543 | if (delalloc) | ||
6544 | cache->delalloc_bytes += num_bytes; | ||
6545 | } | ||
6546 | } else { | 6520 | } else { |
6547 | if (cache->ro) | 6521 | cache->reserved += num_bytes; |
6548 | space_info->bytes_readonly += num_bytes; | 6522 | space_info->bytes_reserved += num_bytes; |
6549 | cache->reserved -= num_bytes; | ||
6550 | space_info->bytes_reserved -= num_bytes; | ||
6551 | 6523 | ||
6524 | trace_btrfs_space_reservation(cache->fs_info, | ||
6525 | "space_info", space_info->flags, | ||
6526 | ram_bytes, 0); | ||
6527 | space_info->bytes_may_use -= ram_bytes; | ||
6552 | if (delalloc) | 6528 | if (delalloc) |
6553 | cache->delalloc_bytes -= num_bytes; | 6529 | cache->delalloc_bytes += num_bytes; |
6554 | } | 6530 | } |
6555 | spin_unlock(&cache->lock); | 6531 | spin_unlock(&cache->lock); |
6556 | spin_unlock(&space_info->lock); | 6532 | spin_unlock(&space_info->lock); |
6557 | return ret; | 6533 | return ret; |
6558 | } | 6534 | } |
6559 | 6535 | ||
6536 | /** | ||
6537 | * btrfs_free_reserved_bytes - update the block_group and space info counters | ||
6538 | * @cache: The cache we are manipulating | ||
6539 | * @num_bytes: The number of bytes in question | ||
6540 | * @delalloc: The blocks are allocated for the delalloc write | ||
6541 | * | ||
6542 | * This is called by somebody who is freeing space that was never actually used | ||
6543 | * on disk. For example if you reserve some space for a new leaf in transaction | ||
6544 | * A and before transaction A commits you free that leaf, you call this with | ||
6545 | * reserve set to 0 in order to clear the reservation. | ||
6546 | */ | ||
6547 | |||
6548 | static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, | ||
6549 | u64 num_bytes, int delalloc) | ||
6550 | { | ||
6551 | struct btrfs_space_info *space_info = cache->space_info; | ||
6552 | int ret = 0; | ||
6553 | |||
6554 | spin_lock(&space_info->lock); | ||
6555 | spin_lock(&cache->lock); | ||
6556 | if (cache->ro) | ||
6557 | space_info->bytes_readonly += num_bytes; | ||
6558 | cache->reserved -= num_bytes; | ||
6559 | space_info->bytes_reserved -= num_bytes; | ||
6560 | |||
6561 | if (delalloc) | ||
6562 | cache->delalloc_bytes -= num_bytes; | ||
6563 | spin_unlock(&cache->lock); | ||
6564 | spin_unlock(&space_info->lock); | ||
6565 | return ret; | ||
6566 | } | ||
6560 | void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | 6567 | void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, |
6561 | struct btrfs_root *root) | 6568 | struct btrfs_root *root) |
6562 | { | 6569 | { |
@@ -7191,7 +7198,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
7191 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); | 7198 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); |
7192 | 7199 | ||
7193 | btrfs_add_free_space(cache, buf->start, buf->len); | 7200 | btrfs_add_free_space(cache, buf->start, buf->len); |
7194 | btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0); | 7201 | btrfs_free_reserved_bytes(cache, buf->len, 0); |
7195 | btrfs_put_block_group(cache); | 7202 | btrfs_put_block_group(cache); |
7196 | trace_btrfs_reserved_extent_free(root, buf->start, buf->len); | 7203 | trace_btrfs_reserved_extent_free(root, buf->start, buf->len); |
7197 | pin = 0; | 7204 | pin = 0; |
@@ -7416,9 +7423,9 @@ btrfs_release_block_group(struct btrfs_block_group_cache *cache, | |||
7416 | * the free space extent currently. | 7423 | * the free space extent currently. |
7417 | */ | 7424 | */ |
7418 | static noinline int find_free_extent(struct btrfs_root *orig_root, | 7425 | static noinline int find_free_extent(struct btrfs_root *orig_root, |
7419 | u64 num_bytes, u64 empty_size, | 7426 | u64 ram_bytes, u64 num_bytes, u64 empty_size, |
7420 | u64 hint_byte, struct btrfs_key *ins, | 7427 | u64 hint_byte, struct btrfs_key *ins, |
7421 | u64 flags, int delalloc) | 7428 | u64 flags, int delalloc) |
7422 | { | 7429 | { |
7423 | int ret = 0; | 7430 | int ret = 0; |
7424 | struct btrfs_root *root = orig_root->fs_info->extent_root; | 7431 | struct btrfs_root *root = orig_root->fs_info->extent_root; |
@@ -7430,8 +7437,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root, | |||
7430 | struct btrfs_space_info *space_info; | 7437 | struct btrfs_space_info *space_info; |
7431 | int loop = 0; | 7438 | int loop = 0; |
7432 | int index = __get_raid_index(flags); | 7439 | int index = __get_raid_index(flags); |
7433 | int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ? | ||
7434 | RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; | ||
7435 | bool failed_cluster_refill = false; | 7440 | bool failed_cluster_refill = false; |
7436 | bool failed_alloc = false; | 7441 | bool failed_alloc = false; |
7437 | bool use_cluster = true; | 7442 | bool use_cluster = true; |
@@ -7763,8 +7768,8 @@ checks: | |||
7763 | search_start - offset); | 7768 | search_start - offset); |
7764 | BUG_ON(offset > search_start); | 7769 | BUG_ON(offset > search_start); |
7765 | 7770 | ||
7766 | ret = btrfs_update_reserved_bytes(block_group, num_bytes, | 7771 | ret = btrfs_add_reserved_bytes(block_group, ram_bytes, |
7767 | alloc_type, delalloc); | 7772 | num_bytes, delalloc); |
7768 | if (ret == -EAGAIN) { | 7773 | if (ret == -EAGAIN) { |
7769 | btrfs_add_free_space(block_group, offset, num_bytes); | 7774 | btrfs_add_free_space(block_group, offset, num_bytes); |
7770 | goto loop; | 7775 | goto loop; |
@@ -7936,7 +7941,7 @@ again: | |||
7936 | up_read(&info->groups_sem); | 7941 | up_read(&info->groups_sem); |
7937 | } | 7942 | } |
7938 | 7943 | ||
7939 | int btrfs_reserve_extent(struct btrfs_root *root, | 7944 | int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, |
7940 | u64 num_bytes, u64 min_alloc_size, | 7945 | u64 num_bytes, u64 min_alloc_size, |
7941 | u64 empty_size, u64 hint_byte, | 7946 | u64 empty_size, u64 hint_byte, |
7942 | struct btrfs_key *ins, int is_data, int delalloc) | 7947 | struct btrfs_key *ins, int is_data, int delalloc) |
@@ -7948,8 +7953,8 @@ int btrfs_reserve_extent(struct btrfs_root *root, | |||
7948 | flags = btrfs_get_alloc_profile(root, is_data); | 7953 | flags = btrfs_get_alloc_profile(root, is_data); |
7949 | again: | 7954 | again: |
7950 | WARN_ON(num_bytes < root->sectorsize); | 7955 | WARN_ON(num_bytes < root->sectorsize); |
7951 | ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, | 7956 | ret = find_free_extent(root, ram_bytes, num_bytes, empty_size, |
7952 | flags, delalloc); | 7957 | hint_byte, ins, flags, delalloc); |
7953 | if (!ret && !is_data) { | 7958 | if (!ret && !is_data) { |
7954 | btrfs_dec_block_group_reservations(root->fs_info, | 7959 | btrfs_dec_block_group_reservations(root->fs_info, |
7955 | ins->objectid); | 7960 | ins->objectid); |
@@ -7958,6 +7963,7 @@ again: | |||
7958 | num_bytes = min(num_bytes >> 1, ins->offset); | 7963 | num_bytes = min(num_bytes >> 1, ins->offset); |
7959 | num_bytes = round_down(num_bytes, root->sectorsize); | 7964 | num_bytes = round_down(num_bytes, root->sectorsize); |
7960 | num_bytes = max(num_bytes, min_alloc_size); | 7965 | num_bytes = max(num_bytes, min_alloc_size); |
7966 | ram_bytes = num_bytes; | ||
7961 | if (num_bytes == min_alloc_size) | 7967 | if (num_bytes == min_alloc_size) |
7962 | final_tried = true; | 7968 | final_tried = true; |
7963 | goto again; | 7969 | goto again; |
@@ -7995,7 +8001,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root, | |||
7995 | if (btrfs_test_opt(root->fs_info, DISCARD)) | 8001 | if (btrfs_test_opt(root->fs_info, DISCARD)) |
7996 | ret = btrfs_discard_extent(root, start, len, NULL); | 8002 | ret = btrfs_discard_extent(root, start, len, NULL); |
7997 | btrfs_add_free_space(cache, start, len); | 8003 | btrfs_add_free_space(cache, start, len); |
7998 | btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); | 8004 | btrfs_free_reserved_bytes(cache, len, delalloc); |
7999 | trace_btrfs_reserved_extent_free(root, start, len); | 8005 | trace_btrfs_reserved_extent_free(root, start, len); |
8000 | } | 8006 | } |
8001 | 8007 | ||
@@ -8208,6 +8214,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
8208 | { | 8214 | { |
8209 | int ret; | 8215 | int ret; |
8210 | struct btrfs_block_group_cache *block_group; | 8216 | struct btrfs_block_group_cache *block_group; |
8217 | struct btrfs_space_info *space_info; | ||
8211 | 8218 | ||
8212 | /* | 8219 | /* |
8213 | * Mixed block groups will exclude before processing the log so we only | 8220 | * Mixed block groups will exclude before processing the log so we only |
@@ -8223,9 +8230,14 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
8223 | if (!block_group) | 8230 | if (!block_group) |
8224 | return -EINVAL; | 8231 | return -EINVAL; |
8225 | 8232 | ||
8226 | ret = btrfs_update_reserved_bytes(block_group, ins->offset, | 8233 | space_info = block_group->space_info; |
8227 | RESERVE_ALLOC_NO_ACCOUNT, 0); | 8234 | spin_lock(&space_info->lock); |
8228 | BUG_ON(ret); /* logic error */ | 8235 | spin_lock(&block_group->lock); |
8236 | space_info->bytes_reserved += ins->offset; | ||
8237 | block_group->reserved += ins->offset; | ||
8238 | spin_unlock(&block_group->lock); | ||
8239 | spin_unlock(&space_info->lock); | ||
8240 | |||
8229 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 8241 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
8230 | 0, owner, offset, ins, 1); | 8242 | 0, owner, offset, ins, 1); |
8231 | btrfs_put_block_group(block_group); | 8243 | btrfs_put_block_group(block_group); |
@@ -8368,7 +8380,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, | |||
8368 | if (IS_ERR(block_rsv)) | 8380 | if (IS_ERR(block_rsv)) |
8369 | return ERR_CAST(block_rsv); | 8381 | return ERR_CAST(block_rsv); |
8370 | 8382 | ||
8371 | ret = btrfs_reserve_extent(root, blocksize, blocksize, | 8383 | ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize, |
8372 | empty_size, hint, &ins, 0, 0); | 8384 | empty_size, hint, &ins, 0, 0); |
8373 | if (ret) | 8385 | if (ret) |
8374 | goto out_unuse; | 8386 | goto out_unuse; |
@@ -8521,35 +8533,6 @@ reada: | |||
8521 | wc->reada_slot = slot; | 8533 | wc->reada_slot = slot; |
8522 | } | 8534 | } |
8523 | 8535 | ||
8524 | /* | ||
8525 | * These may not be seen by the usual inc/dec ref code so we have to | ||
8526 | * add them here. | ||
8527 | */ | ||
8528 | static int record_one_subtree_extent(struct btrfs_trans_handle *trans, | ||
8529 | struct btrfs_root *root, u64 bytenr, | ||
8530 | u64 num_bytes) | ||
8531 | { | ||
8532 | struct btrfs_qgroup_extent_record *qrecord; | ||
8533 | struct btrfs_delayed_ref_root *delayed_refs; | ||
8534 | |||
8535 | qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS); | ||
8536 | if (!qrecord) | ||
8537 | return -ENOMEM; | ||
8538 | |||
8539 | qrecord->bytenr = bytenr; | ||
8540 | qrecord->num_bytes = num_bytes; | ||
8541 | qrecord->old_roots = NULL; | ||
8542 | |||
8543 | delayed_refs = &trans->transaction->delayed_refs; | ||
8544 | spin_lock(&delayed_refs->lock); | ||
8545 | if (btrfs_qgroup_insert_dirty_extent(trans->fs_info, | ||
8546 | delayed_refs, qrecord)) | ||
8547 | kfree(qrecord); | ||
8548 | spin_unlock(&delayed_refs->lock); | ||
8549 | |||
8550 | return 0; | ||
8551 | } | ||
8552 | |||
8553 | static int account_leaf_items(struct btrfs_trans_handle *trans, | 8536 | static int account_leaf_items(struct btrfs_trans_handle *trans, |
8554 | struct btrfs_root *root, | 8537 | struct btrfs_root *root, |
8555 | struct extent_buffer *eb) | 8538 | struct extent_buffer *eb) |
@@ -8583,7 +8566,8 @@ static int account_leaf_items(struct btrfs_trans_handle *trans, | |||
8583 | 8566 | ||
8584 | num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); | 8567 | num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); |
8585 | 8568 | ||
8586 | ret = record_one_subtree_extent(trans, root, bytenr, num_bytes); | 8569 | ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info, |
8570 | bytenr, num_bytes, GFP_NOFS); | ||
8587 | if (ret) | 8571 | if (ret) |
8588 | return ret; | 8572 | return ret; |
8589 | } | 8573 | } |
@@ -8732,8 +8716,9 @@ walk_down: | |||
8732 | btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); | 8716 | btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); |
8733 | path->locks[level] = BTRFS_READ_LOCK_BLOCKING; | 8717 | path->locks[level] = BTRFS_READ_LOCK_BLOCKING; |
8734 | 8718 | ||
8735 | ret = record_one_subtree_extent(trans, root, child_bytenr, | 8719 | ret = btrfs_qgroup_insert_dirty_extent(trans, |
8736 | root->nodesize); | 8720 | root->fs_info, child_bytenr, |
8721 | root->nodesize, GFP_NOFS); | ||
8737 | if (ret) | 8722 | if (ret) |
8738 | goto out; | 8723 | goto out; |
8739 | } | 8724 | } |
@@ -9906,6 +9891,7 @@ static int find_first_block_group(struct btrfs_root *root, | |||
9906 | } else { | 9891 | } else { |
9907 | ret = 0; | 9892 | ret = 0; |
9908 | } | 9893 | } |
9894 | free_extent_map(em); | ||
9909 | goto out; | 9895 | goto out; |
9910 | } | 9896 | } |
9911 | path->slots[0]++; | 9897 | path->slots[0]++; |
@@ -9942,6 +9928,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info) | |||
9942 | block_group->iref = 0; | 9928 | block_group->iref = 0; |
9943 | block_group->inode = NULL; | 9929 | block_group->inode = NULL; |
9944 | spin_unlock(&block_group->lock); | 9930 | spin_unlock(&block_group->lock); |
9931 | ASSERT(block_group->io_ctl.inode == NULL); | ||
9945 | iput(inode); | 9932 | iput(inode); |
9946 | last = block_group->key.objectid + block_group->key.offset; | 9933 | last = block_group->key.objectid + block_group->key.offset; |
9947 | btrfs_put_block_group(block_group); | 9934 | btrfs_put_block_group(block_group); |
@@ -9999,6 +9986,10 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
9999 | free_excluded_extents(info->extent_root, block_group); | 9986 | free_excluded_extents(info->extent_root, block_group); |
10000 | 9987 | ||
10001 | btrfs_remove_free_space_cache(block_group); | 9988 | btrfs_remove_free_space_cache(block_group); |
9989 | ASSERT(list_empty(&block_group->dirty_list)); | ||
9990 | ASSERT(list_empty(&block_group->io_list)); | ||
9991 | ASSERT(list_empty(&block_group->bg_list)); | ||
9992 | ASSERT(atomic_read(&block_group->count) == 1); | ||
10002 | btrfs_put_block_group(block_group); | 9993 | btrfs_put_block_group(block_group); |
10003 | 9994 | ||
10004 | spin_lock(&info->block_group_cache_lock); | 9995 | spin_lock(&info->block_group_cache_lock); |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index bc2729a7612d..28cd88fccc7e 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -20,6 +20,7 @@ | |||
20 | #define EXTENT_DAMAGED (1U << 14) | 20 | #define EXTENT_DAMAGED (1U << 14) |
21 | #define EXTENT_NORESERVE (1U << 15) | 21 | #define EXTENT_NORESERVE (1U << 15) |
22 | #define EXTENT_QGROUP_RESERVED (1U << 16) | 22 | #define EXTENT_QGROUP_RESERVED (1U << 16) |
23 | #define EXTENT_CLEAR_DATA_RESV (1U << 17) | ||
23 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 24 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
24 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) | 25 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) |
25 | 26 | ||
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9404121fd5f7..fea31a4a6e36 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -2033,6 +2033,14 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
2033 | */ | 2033 | */ |
2034 | clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | 2034 | clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
2035 | &BTRFS_I(inode)->runtime_flags); | 2035 | &BTRFS_I(inode)->runtime_flags); |
2036 | /* | ||
2037 | * An ordered extent might have started before and completed | ||
2038 | * already with io errors, in which case the inode was not | ||
2039 | * updated and we end up here. So check the inode's mapping | ||
2040 | * flags for any errors that might have happened while doing | ||
2041 | * writeback of file data. | ||
2042 | */ | ||
2043 | ret = btrfs_inode_check_errors(inode); | ||
2036 | inode_unlock(inode); | 2044 | inode_unlock(inode); |
2037 | goto out; | 2045 | goto out; |
2038 | } | 2046 | } |
@@ -2062,7 +2070,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
2062 | } | 2070 | } |
2063 | trans->sync = true; | 2071 | trans->sync = true; |
2064 | 2072 | ||
2065 | btrfs_init_log_ctx(&ctx); | 2073 | btrfs_init_log_ctx(&ctx, inode); |
2066 | 2074 | ||
2067 | ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx); | 2075 | ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx); |
2068 | if (ret < 0) { | 2076 | if (ret < 0) { |
@@ -2667,6 +2675,7 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2667 | 2675 | ||
2668 | alloc_start = round_down(offset, blocksize); | 2676 | alloc_start = round_down(offset, blocksize); |
2669 | alloc_end = round_up(offset + len, blocksize); | 2677 | alloc_end = round_up(offset + len, blocksize); |
2678 | cur_offset = alloc_start; | ||
2670 | 2679 | ||
2671 | /* Make sure we aren't being give some crap mode */ | 2680 | /* Make sure we aren't being give some crap mode */ |
2672 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) | 2681 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) |
@@ -2759,7 +2768,6 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2759 | 2768 | ||
2760 | /* First, check if we exceed the qgroup limit */ | 2769 | /* First, check if we exceed the qgroup limit */ |
2761 | INIT_LIST_HEAD(&reserve_list); | 2770 | INIT_LIST_HEAD(&reserve_list); |
2762 | cur_offset = alloc_start; | ||
2763 | while (1) { | 2771 | while (1) { |
2764 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, | 2772 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, |
2765 | alloc_end - cur_offset, 0); | 2773 | alloc_end - cur_offset, 0); |
@@ -2786,6 +2794,14 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2786 | last_byte - cur_offset); | 2794 | last_byte - cur_offset); |
2787 | if (ret < 0) | 2795 | if (ret < 0) |
2788 | break; | 2796 | break; |
2797 | } else { | ||
2798 | /* | ||
2799 | * Do not need to reserve unwritten extent for this | ||
2800 | * range, free reserved data space first, otherwise | ||
2801 | * it'll result in false ENOSPC error. | ||
2802 | */ | ||
2803 | btrfs_free_reserved_data_space(inode, cur_offset, | ||
2804 | last_byte - cur_offset); | ||
2789 | } | 2805 | } |
2790 | free_extent_map(em); | 2806 | free_extent_map(em); |
2791 | cur_offset = last_byte; | 2807 | cur_offset = last_byte; |
@@ -2803,6 +2819,9 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2803 | range->start, | 2819 | range->start, |
2804 | range->len, 1 << inode->i_blkbits, | 2820 | range->len, 1 << inode->i_blkbits, |
2805 | offset + len, &alloc_hint); | 2821 | offset + len, &alloc_hint); |
2822 | else | ||
2823 | btrfs_free_reserved_data_space(inode, range->start, | ||
2824 | range->len); | ||
2806 | list_del(&range->list); | 2825 | list_del(&range->list); |
2807 | kfree(range); | 2826 | kfree(range); |
2808 | } | 2827 | } |
@@ -2837,18 +2856,11 @@ out_unlock: | |||
2837 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | 2856 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, |
2838 | &cached_state, GFP_KERNEL); | 2857 | &cached_state, GFP_KERNEL); |
2839 | out: | 2858 | out: |
2840 | /* | ||
2841 | * As we waited the extent range, the data_rsv_map must be empty | ||
2842 | * in the range, as written data range will be released from it. | ||
2843 | * And for prealloacted extent, it will also be released when | ||
2844 | * its metadata is written. | ||
2845 | * So this is completely used as cleanup. | ||
2846 | */ | ||
2847 | btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start); | ||
2848 | inode_unlock(inode); | 2859 | inode_unlock(inode); |
2849 | /* Let go of our reservation. */ | 2860 | /* Let go of our reservation. */ |
2850 | btrfs_free_reserved_data_space(inode, alloc_start, | 2861 | if (ret != 0) |
2851 | alloc_end - alloc_start); | 2862 | btrfs_free_reserved_data_space(inode, alloc_start, |
2863 | alloc_end - cur_offset); | ||
2852 | return ret; | 2864 | return ret; |
2853 | } | 2865 | } |
2854 | 2866 | ||
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index aa6fabaee72e..359ee861b5a4 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -495,10 +495,9 @@ again: | |||
495 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, | 495 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, |
496 | prealloc, prealloc, &alloc_hint); | 496 | prealloc, prealloc, &alloc_hint); |
497 | if (ret) { | 497 | if (ret) { |
498 | btrfs_delalloc_release_space(inode, 0, prealloc); | 498 | btrfs_delalloc_release_metadata(inode, prealloc); |
499 | goto out_put; | 499 | goto out_put; |
500 | } | 500 | } |
501 | btrfs_free_reserved_data_space(inode, 0, prealloc); | ||
502 | 501 | ||
503 | ret = btrfs_write_out_ino_cache(root, trans, path, inode); | 502 | ret = btrfs_write_out_ino_cache(root, trans, path, inode); |
504 | out_put: | 503 | out_put: |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2f5975954ccf..e6811c42e41e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -566,6 +566,8 @@ cont: | |||
566 | PAGE_SET_WRITEBACK | | 566 | PAGE_SET_WRITEBACK | |
567 | page_error_op | | 567 | page_error_op | |
568 | PAGE_END_WRITEBACK); | 568 | PAGE_END_WRITEBACK); |
569 | btrfs_free_reserved_data_space_noquota(inode, start, | ||
570 | end - start + 1); | ||
569 | goto free_pages_out; | 571 | goto free_pages_out; |
570 | } | 572 | } |
571 | } | 573 | } |
@@ -742,7 +744,7 @@ retry: | |||
742 | lock_extent(io_tree, async_extent->start, | 744 | lock_extent(io_tree, async_extent->start, |
743 | async_extent->start + async_extent->ram_size - 1); | 745 | async_extent->start + async_extent->ram_size - 1); |
744 | 746 | ||
745 | ret = btrfs_reserve_extent(root, | 747 | ret = btrfs_reserve_extent(root, async_extent->ram_size, |
746 | async_extent->compressed_size, | 748 | async_extent->compressed_size, |
747 | async_extent->compressed_size, | 749 | async_extent->compressed_size, |
748 | 0, alloc_hint, &ins, 1, 1); | 750 | 0, alloc_hint, &ins, 1, 1); |
@@ -969,7 +971,8 @@ static noinline int cow_file_range(struct inode *inode, | |||
969 | EXTENT_DEFRAG, PAGE_UNLOCK | | 971 | EXTENT_DEFRAG, PAGE_UNLOCK | |
970 | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | | 972 | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | |
971 | PAGE_END_WRITEBACK); | 973 | PAGE_END_WRITEBACK); |
972 | 974 | btrfs_free_reserved_data_space_noquota(inode, start, | |
975 | end - start + 1); | ||
973 | *nr_written = *nr_written + | 976 | *nr_written = *nr_written + |
974 | (end - start + PAGE_SIZE) / PAGE_SIZE; | 977 | (end - start + PAGE_SIZE) / PAGE_SIZE; |
975 | *page_started = 1; | 978 | *page_started = 1; |
@@ -989,7 +992,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
989 | unsigned long op; | 992 | unsigned long op; |
990 | 993 | ||
991 | cur_alloc_size = disk_num_bytes; | 994 | cur_alloc_size = disk_num_bytes; |
992 | ret = btrfs_reserve_extent(root, cur_alloc_size, | 995 | ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size, |
993 | root->sectorsize, 0, alloc_hint, | 996 | root->sectorsize, 0, alloc_hint, |
994 | &ins, 1, 1); | 997 | &ins, 1, 1); |
995 | if (ret < 0) | 998 | if (ret < 0) |
@@ -1489,8 +1492,10 @@ out_check: | |||
1489 | extent_clear_unlock_delalloc(inode, cur_offset, | 1492 | extent_clear_unlock_delalloc(inode, cur_offset, |
1490 | cur_offset + num_bytes - 1, | 1493 | cur_offset + num_bytes - 1, |
1491 | locked_page, EXTENT_LOCKED | | 1494 | locked_page, EXTENT_LOCKED | |
1492 | EXTENT_DELALLOC, PAGE_UNLOCK | | 1495 | EXTENT_DELALLOC | |
1493 | PAGE_SET_PRIVATE2); | 1496 | EXTENT_CLEAR_DATA_RESV, |
1497 | PAGE_UNLOCK | PAGE_SET_PRIVATE2); | ||
1498 | |||
1494 | if (!nolock && nocow) | 1499 | if (!nolock && nocow) |
1495 | btrfs_end_write_no_snapshoting(root); | 1500 | btrfs_end_write_no_snapshoting(root); |
1496 | cur_offset = extent_end; | 1501 | cur_offset = extent_end; |
@@ -1807,7 +1812,9 @@ static void btrfs_clear_bit_hook(struct inode *inode, | |||
1807 | return; | 1812 | return; |
1808 | 1813 | ||
1809 | if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID | 1814 | if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID |
1810 | && do_list && !(state->state & EXTENT_NORESERVE)) | 1815 | && do_list && !(state->state & EXTENT_NORESERVE) |
1816 | && (*bits & (EXTENT_DO_ACCOUNTING | | ||
1817 | EXTENT_CLEAR_DATA_RESV))) | ||
1811 | btrfs_free_reserved_data_space_noquota(inode, | 1818 | btrfs_free_reserved_data_space_noquota(inode, |
1812 | state->start, len); | 1819 | state->start, len); |
1813 | 1820 | ||
@@ -3435,10 +3442,10 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
3435 | found_key.offset = 0; | 3442 | found_key.offset = 0; |
3436 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); | 3443 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); |
3437 | ret = PTR_ERR_OR_ZERO(inode); | 3444 | ret = PTR_ERR_OR_ZERO(inode); |
3438 | if (ret && ret != -ESTALE) | 3445 | if (ret && ret != -ENOENT) |
3439 | goto out; | 3446 | goto out; |
3440 | 3447 | ||
3441 | if (ret == -ESTALE && root == root->fs_info->tree_root) { | 3448 | if (ret == -ENOENT && root == root->fs_info->tree_root) { |
3442 | struct btrfs_root *dead_root; | 3449 | struct btrfs_root *dead_root; |
3443 | struct btrfs_fs_info *fs_info = root->fs_info; | 3450 | struct btrfs_fs_info *fs_info = root->fs_info; |
3444 | int is_dead_root = 0; | 3451 | int is_dead_root = 0; |
@@ -3474,7 +3481,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
3474 | * Inode is already gone but the orphan item is still there, | 3481 | * Inode is already gone but the orphan item is still there, |
3475 | * kill the orphan item. | 3482 | * kill the orphan item. |
3476 | */ | 3483 | */ |
3477 | if (ret == -ESTALE) { | 3484 | if (ret == -ENOENT) { |
3478 | trans = btrfs_start_transaction(root, 1); | 3485 | trans = btrfs_start_transaction(root, 1); |
3479 | if (IS_ERR(trans)) { | 3486 | if (IS_ERR(trans)) { |
3480 | ret = PTR_ERR(trans); | 3487 | ret = PTR_ERR(trans); |
@@ -3633,7 +3640,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf, | |||
3633 | /* | 3640 | /* |
3634 | * read an inode from the btree into the in-memory inode | 3641 | * read an inode from the btree into the in-memory inode |
3635 | */ | 3642 | */ |
3636 | static void btrfs_read_locked_inode(struct inode *inode) | 3643 | static int btrfs_read_locked_inode(struct inode *inode) |
3637 | { | 3644 | { |
3638 | struct btrfs_path *path; | 3645 | struct btrfs_path *path; |
3639 | struct extent_buffer *leaf; | 3646 | struct extent_buffer *leaf; |
@@ -3652,14 +3659,19 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
3652 | filled = true; | 3659 | filled = true; |
3653 | 3660 | ||
3654 | path = btrfs_alloc_path(); | 3661 | path = btrfs_alloc_path(); |
3655 | if (!path) | 3662 | if (!path) { |
3663 | ret = -ENOMEM; | ||
3656 | goto make_bad; | 3664 | goto make_bad; |
3665 | } | ||
3657 | 3666 | ||
3658 | memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); | 3667 | memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); |
3659 | 3668 | ||
3660 | ret = btrfs_lookup_inode(NULL, root, path, &location, 0); | 3669 | ret = btrfs_lookup_inode(NULL, root, path, &location, 0); |
3661 | if (ret) | 3670 | if (ret) { |
3671 | if (ret > 0) | ||
3672 | ret = -ENOENT; | ||
3662 | goto make_bad; | 3673 | goto make_bad; |
3674 | } | ||
3663 | 3675 | ||
3664 | leaf = path->nodes[0]; | 3676 | leaf = path->nodes[0]; |
3665 | 3677 | ||
@@ -3812,11 +3824,12 @@ cache_acl: | |||
3812 | } | 3824 | } |
3813 | 3825 | ||
3814 | btrfs_update_iflags(inode); | 3826 | btrfs_update_iflags(inode); |
3815 | return; | 3827 | return 0; |
3816 | 3828 | ||
3817 | make_bad: | 3829 | make_bad: |
3818 | btrfs_free_path(path); | 3830 | btrfs_free_path(path); |
3819 | make_bad_inode(inode); | 3831 | make_bad_inode(inode); |
3832 | return ret; | ||
3820 | } | 3833 | } |
3821 | 3834 | ||
3822 | /* | 3835 | /* |
@@ -4204,6 +4217,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
4204 | int err = 0; | 4217 | int err = 0; |
4205 | struct btrfs_root *root = BTRFS_I(dir)->root; | 4218 | struct btrfs_root *root = BTRFS_I(dir)->root; |
4206 | struct btrfs_trans_handle *trans; | 4219 | struct btrfs_trans_handle *trans; |
4220 | u64 last_unlink_trans; | ||
4207 | 4221 | ||
4208 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) | 4222 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) |
4209 | return -ENOTEMPTY; | 4223 | return -ENOTEMPTY; |
@@ -4226,11 +4240,27 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
4226 | if (err) | 4240 | if (err) |
4227 | goto out; | 4241 | goto out; |
4228 | 4242 | ||
4243 | last_unlink_trans = BTRFS_I(inode)->last_unlink_trans; | ||
4244 | |||
4229 | /* now the directory is empty */ | 4245 | /* now the directory is empty */ |
4230 | err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry), | 4246 | err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry), |
4231 | dentry->d_name.name, dentry->d_name.len); | 4247 | dentry->d_name.name, dentry->d_name.len); |
4232 | if (!err) | 4248 | if (!err) { |
4233 | btrfs_i_size_write(inode, 0); | 4249 | btrfs_i_size_write(inode, 0); |
4250 | /* | ||
4251 | * Propagate the last_unlink_trans value of the deleted dir to | ||
4252 | * its parent directory. This is to prevent an unrecoverable | ||
4253 | * log tree in the case we do something like this: | ||
4254 | * 1) create dir foo | ||
4255 | * 2) create snapshot under dir foo | ||
4256 | * 3) delete the snapshot | ||
4257 | * 4) rmdir foo | ||
4258 | * 5) mkdir foo | ||
4259 | * 6) fsync foo or some file inside foo | ||
4260 | */ | ||
4261 | if (last_unlink_trans >= trans->transid) | ||
4262 | BTRFS_I(dir)->last_unlink_trans = last_unlink_trans; | ||
4263 | } | ||
4234 | out: | 4264 | out: |
4235 | btrfs_end_transaction(trans, root); | 4265 | btrfs_end_transaction(trans, root); |
4236 | btrfs_btree_balance_dirty(root); | 4266 | btrfs_btree_balance_dirty(root); |
@@ -5606,7 +5636,9 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | |||
5606 | return ERR_PTR(-ENOMEM); | 5636 | return ERR_PTR(-ENOMEM); |
5607 | 5637 | ||
5608 | if (inode->i_state & I_NEW) { | 5638 | if (inode->i_state & I_NEW) { |
5609 | btrfs_read_locked_inode(inode); | 5639 | int ret; |
5640 | |||
5641 | ret = btrfs_read_locked_inode(inode); | ||
5610 | if (!is_bad_inode(inode)) { | 5642 | if (!is_bad_inode(inode)) { |
5611 | inode_tree_add(inode); | 5643 | inode_tree_add(inode); |
5612 | unlock_new_inode(inode); | 5644 | unlock_new_inode(inode); |
@@ -5615,7 +5647,8 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | |||
5615 | } else { | 5647 | } else { |
5616 | unlock_new_inode(inode); | 5648 | unlock_new_inode(inode); |
5617 | iput(inode); | 5649 | iput(inode); |
5618 | inode = ERR_PTR(-ESTALE); | 5650 | ASSERT(ret < 0); |
5651 | inode = ERR_PTR(ret < 0 ? ret : -ESTALE); | ||
5619 | } | 5652 | } |
5620 | } | 5653 | } |
5621 | 5654 | ||
@@ -7225,7 +7258,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | |||
7225 | int ret; | 7258 | int ret; |
7226 | 7259 | ||
7227 | alloc_hint = get_extent_allocation_hint(inode, start, len); | 7260 | alloc_hint = get_extent_allocation_hint(inode, start, len); |
7228 | ret = btrfs_reserve_extent(root, len, root->sectorsize, 0, | 7261 | ret = btrfs_reserve_extent(root, len, len, root->sectorsize, 0, |
7229 | alloc_hint, &ins, 1, 1); | 7262 | alloc_hint, &ins, 1, 1); |
7230 | if (ret) | 7263 | if (ret) |
7231 | return ERR_PTR(ret); | 7264 | return ERR_PTR(ret); |
@@ -7725,6 +7758,13 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
7725 | ret = PTR_ERR(em2); | 7758 | ret = PTR_ERR(em2); |
7726 | goto unlock_err; | 7759 | goto unlock_err; |
7727 | } | 7760 | } |
7761 | /* | ||
7762 | * For inode marked NODATACOW or extent marked PREALLOC, | ||
7763 | * use the existing or preallocated extent, so does not | ||
7764 | * need to adjust btrfs_space_info's bytes_may_use. | ||
7765 | */ | ||
7766 | btrfs_free_reserved_data_space_noquota(inode, | ||
7767 | start, len); | ||
7728 | goto unlock; | 7768 | goto unlock; |
7729 | } | 7769 | } |
7730 | } | 7770 | } |
@@ -7759,7 +7799,6 @@ unlock: | |||
7759 | i_size_write(inode, start + len); | 7799 | i_size_write(inode, start + len); |
7760 | 7800 | ||
7761 | adjust_dio_outstanding_extents(inode, dio_data, len); | 7801 | adjust_dio_outstanding_extents(inode, dio_data, len); |
7762 | btrfs_free_reserved_data_space(inode, start, len); | ||
7763 | WARN_ON(dio_data->reserve < len); | 7802 | WARN_ON(dio_data->reserve < len); |
7764 | dio_data->reserve -= len; | 7803 | dio_data->reserve -= len; |
7765 | dio_data->unsubmitted_oe_range_end = start + len; | 7804 | dio_data->unsubmitted_oe_range_end = start + len; |
@@ -10280,6 +10319,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
10280 | u64 last_alloc = (u64)-1; | 10319 | u64 last_alloc = (u64)-1; |
10281 | int ret = 0; | 10320 | int ret = 0; |
10282 | bool own_trans = true; | 10321 | bool own_trans = true; |
10322 | u64 end = start + num_bytes - 1; | ||
10283 | 10323 | ||
10284 | if (trans) | 10324 | if (trans) |
10285 | own_trans = false; | 10325 | own_trans = false; |
@@ -10301,8 +10341,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
10301 | * sized chunks. | 10341 | * sized chunks. |
10302 | */ | 10342 | */ |
10303 | cur_bytes = min(cur_bytes, last_alloc); | 10343 | cur_bytes = min(cur_bytes, last_alloc); |
10304 | ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, | 10344 | ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes, |
10305 | *alloc_hint, &ins, 1, 0); | 10345 | min_size, 0, *alloc_hint, &ins, 1, 0); |
10306 | if (ret) { | 10346 | if (ret) { |
10307 | if (own_trans) | 10347 | if (own_trans) |
10308 | btrfs_end_transaction(trans, root); | 10348 | btrfs_end_transaction(trans, root); |
@@ -10388,6 +10428,9 @@ next: | |||
10388 | if (own_trans) | 10428 | if (own_trans) |
10389 | btrfs_end_transaction(trans, root); | 10429 | btrfs_end_transaction(trans, root); |
10390 | } | 10430 | } |
10431 | if (cur_offset < end) | ||
10432 | btrfs_free_reserved_data_space(inode, cur_offset, | ||
10433 | end - cur_offset + 1); | ||
10391 | return ret; | 10434 | return ret; |
10392 | } | 10435 | } |
10393 | 10436 | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 14ed1e9e6bc8..7fd939bfbd99 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -1634,6 +1634,9 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
1634 | int namelen; | 1634 | int namelen; |
1635 | int ret = 0; | 1635 | int ret = 0; |
1636 | 1636 | ||
1637 | if (!S_ISDIR(file_inode(file)->i_mode)) | ||
1638 | return -ENOTDIR; | ||
1639 | |||
1637 | ret = mnt_want_write_file(file); | 1640 | ret = mnt_want_write_file(file); |
1638 | if (ret) | 1641 | if (ret) |
1639 | goto out; | 1642 | goto out; |
@@ -1691,6 +1694,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
1691 | struct btrfs_ioctl_vol_args *vol_args; | 1694 | struct btrfs_ioctl_vol_args *vol_args; |
1692 | int ret; | 1695 | int ret; |
1693 | 1696 | ||
1697 | if (!S_ISDIR(file_inode(file)->i_mode)) | ||
1698 | return -ENOTDIR; | ||
1699 | |||
1694 | vol_args = memdup_user(arg, sizeof(*vol_args)); | 1700 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
1695 | if (IS_ERR(vol_args)) | 1701 | if (IS_ERR(vol_args)) |
1696 | return PTR_ERR(vol_args); | 1702 | return PTR_ERR(vol_args); |
@@ -1714,6 +1720,9 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, | |||
1714 | bool readonly = false; | 1720 | bool readonly = false; |
1715 | struct btrfs_qgroup_inherit *inherit = NULL; | 1721 | struct btrfs_qgroup_inherit *inherit = NULL; |
1716 | 1722 | ||
1723 | if (!S_ISDIR(file_inode(file)->i_mode)) | ||
1724 | return -ENOTDIR; | ||
1725 | |||
1717 | vol_args = memdup_user(arg, sizeof(*vol_args)); | 1726 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
1718 | if (IS_ERR(vol_args)) | 1727 | if (IS_ERR(vol_args)) |
1719 | return PTR_ERR(vol_args); | 1728 | return PTR_ERR(vol_args); |
@@ -2357,6 +2366,9 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
2357 | int ret; | 2366 | int ret; |
2358 | int err = 0; | 2367 | int err = 0; |
2359 | 2368 | ||
2369 | if (!S_ISDIR(dir->i_mode)) | ||
2370 | return -ENOTDIR; | ||
2371 | |||
2360 | vol_args = memdup_user(arg, sizeof(*vol_args)); | 2372 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
2361 | if (IS_ERR(vol_args)) | 2373 | if (IS_ERR(vol_args)) |
2362 | return PTR_ERR(vol_args); | 2374 | return PTR_ERR(vol_args); |
@@ -5084,7 +5096,7 @@ static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg) | |||
5084 | if (!capable(CAP_SYS_ADMIN)) | 5096 | if (!capable(CAP_SYS_ADMIN)) |
5085 | return -EPERM; | 5097 | return -EPERM; |
5086 | 5098 | ||
5087 | return btrfs_qgroup_wait_for_completion(root->fs_info); | 5099 | return btrfs_qgroup_wait_for_completion(root->fs_info, true); |
5088 | } | 5100 | } |
5089 | 5101 | ||
5090 | static long _btrfs_ioctl_set_received_subvol(struct file *file, | 5102 | static long _btrfs_ioctl_set_received_subvol(struct file *file, |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 93ee1c18ef9d..8db2e29fdcf4 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
@@ -995,7 +995,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, | |||
995 | goto out; | 995 | goto out; |
996 | fs_info->quota_enabled = 0; | 996 | fs_info->quota_enabled = 0; |
997 | fs_info->pending_quota_state = 0; | 997 | fs_info->pending_quota_state = 0; |
998 | btrfs_qgroup_wait_for_completion(fs_info); | 998 | btrfs_qgroup_wait_for_completion(fs_info, false); |
999 | spin_lock(&fs_info->qgroup_lock); | 999 | spin_lock(&fs_info->qgroup_lock); |
1000 | quota_root = fs_info->quota_root; | 1000 | quota_root = fs_info->quota_root; |
1001 | fs_info->quota_root = NULL; | 1001 | fs_info->quota_root = NULL; |
@@ -1453,10 +1453,9 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, | |||
1453 | return ret; | 1453 | return ret; |
1454 | } | 1454 | } |
1455 | 1455 | ||
1456 | struct btrfs_qgroup_extent_record * | 1456 | int btrfs_qgroup_insert_dirty_extent_nolock(struct btrfs_fs_info *fs_info, |
1457 | btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info, | 1457 | struct btrfs_delayed_ref_root *delayed_refs, |
1458 | struct btrfs_delayed_ref_root *delayed_refs, | 1458 | struct btrfs_qgroup_extent_record *record) |
1459 | struct btrfs_qgroup_extent_record *record) | ||
1460 | { | 1459 | { |
1461 | struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node; | 1460 | struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node; |
1462 | struct rb_node *parent_node = NULL; | 1461 | struct rb_node *parent_node = NULL; |
@@ -1475,12 +1474,42 @@ btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info, | |||
1475 | else if (bytenr > entry->bytenr) | 1474 | else if (bytenr > entry->bytenr) |
1476 | p = &(*p)->rb_right; | 1475 | p = &(*p)->rb_right; |
1477 | else | 1476 | else |
1478 | return entry; | 1477 | return 1; |
1479 | } | 1478 | } |
1480 | 1479 | ||
1481 | rb_link_node(&record->node, parent_node, p); | 1480 | rb_link_node(&record->node, parent_node, p); |
1482 | rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); | 1481 | rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); |
1483 | return NULL; | 1482 | return 0; |
1483 | } | ||
1484 | |||
1485 | int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans, | ||
1486 | struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, | ||
1487 | gfp_t gfp_flag) | ||
1488 | { | ||
1489 | struct btrfs_qgroup_extent_record *record; | ||
1490 | struct btrfs_delayed_ref_root *delayed_refs; | ||
1491 | int ret; | ||
1492 | |||
1493 | if (!fs_info->quota_enabled || bytenr == 0 || num_bytes == 0) | ||
1494 | return 0; | ||
1495 | if (WARN_ON(trans == NULL)) | ||
1496 | return -EINVAL; | ||
1497 | record = kmalloc(sizeof(*record), gfp_flag); | ||
1498 | if (!record) | ||
1499 | return -ENOMEM; | ||
1500 | |||
1501 | delayed_refs = &trans->transaction->delayed_refs; | ||
1502 | record->bytenr = bytenr; | ||
1503 | record->num_bytes = num_bytes; | ||
1504 | record->old_roots = NULL; | ||
1505 | |||
1506 | spin_lock(&delayed_refs->lock); | ||
1507 | ret = btrfs_qgroup_insert_dirty_extent_nolock(fs_info, delayed_refs, | ||
1508 | record); | ||
1509 | spin_unlock(&delayed_refs->lock); | ||
1510 | if (ret > 0) | ||
1511 | kfree(record); | ||
1512 | return 0; | ||
1484 | } | 1513 | } |
1485 | 1514 | ||
1486 | #define UPDATE_NEW 0 | 1515 | #define UPDATE_NEW 0 |
@@ -2303,6 +2332,10 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) | |||
2303 | int err = -ENOMEM; | 2332 | int err = -ENOMEM; |
2304 | int ret = 0; | 2333 | int ret = 0; |
2305 | 2334 | ||
2335 | mutex_lock(&fs_info->qgroup_rescan_lock); | ||
2336 | fs_info->qgroup_rescan_running = true; | ||
2337 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
2338 | |||
2306 | path = btrfs_alloc_path(); | 2339 | path = btrfs_alloc_path(); |
2307 | if (!path) | 2340 | if (!path) |
2308 | goto out; | 2341 | goto out; |
@@ -2369,6 +2402,9 @@ out: | |||
2369 | } | 2402 | } |
2370 | 2403 | ||
2371 | done: | 2404 | done: |
2405 | mutex_lock(&fs_info->qgroup_rescan_lock); | ||
2406 | fs_info->qgroup_rescan_running = false; | ||
2407 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
2372 | complete_all(&fs_info->qgroup_rescan_completion); | 2408 | complete_all(&fs_info->qgroup_rescan_completion); |
2373 | } | 2409 | } |
2374 | 2410 | ||
@@ -2487,20 +2523,26 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) | |||
2487 | return 0; | 2523 | return 0; |
2488 | } | 2524 | } |
2489 | 2525 | ||
2490 | int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info) | 2526 | int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, |
2527 | bool interruptible) | ||
2491 | { | 2528 | { |
2492 | int running; | 2529 | int running; |
2493 | int ret = 0; | 2530 | int ret = 0; |
2494 | 2531 | ||
2495 | mutex_lock(&fs_info->qgroup_rescan_lock); | 2532 | mutex_lock(&fs_info->qgroup_rescan_lock); |
2496 | spin_lock(&fs_info->qgroup_lock); | 2533 | spin_lock(&fs_info->qgroup_lock); |
2497 | running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN; | 2534 | running = fs_info->qgroup_rescan_running; |
2498 | spin_unlock(&fs_info->qgroup_lock); | 2535 | spin_unlock(&fs_info->qgroup_lock); |
2499 | mutex_unlock(&fs_info->qgroup_rescan_lock); | 2536 | mutex_unlock(&fs_info->qgroup_rescan_lock); |
2500 | 2537 | ||
2501 | if (running) | 2538 | if (!running) |
2539 | return 0; | ||
2540 | |||
2541 | if (interruptible) | ||
2502 | ret = wait_for_completion_interruptible( | 2542 | ret = wait_for_completion_interruptible( |
2503 | &fs_info->qgroup_rescan_completion); | 2543 | &fs_info->qgroup_rescan_completion); |
2544 | else | ||
2545 | wait_for_completion(&fs_info->qgroup_rescan_completion); | ||
2504 | 2546 | ||
2505 | return ret; | 2547 | return ret; |
2506 | } | 2548 | } |
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 710887c06aaf..1bc64c864b62 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h | |||
@@ -46,7 +46,8 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, | |||
46 | struct btrfs_fs_info *fs_info); | 46 | struct btrfs_fs_info *fs_info); |
47 | int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); | 47 | int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); |
48 | void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); | 48 | void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); |
49 | int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info); | 49 | int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, |
50 | bool interruptible); | ||
50 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, | 51 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, |
51 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); | 52 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); |
52 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, | 53 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, |
@@ -63,10 +64,35 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); | |||
63 | struct btrfs_delayed_extent_op; | 64 | struct btrfs_delayed_extent_op; |
64 | int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, | 65 | int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, |
65 | struct btrfs_fs_info *fs_info); | 66 | struct btrfs_fs_info *fs_info); |
66 | struct btrfs_qgroup_extent_record * | 67 | /* |
67 | btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info, | 68 | * Insert one dirty extent record into @delayed_refs, informing qgroup to |
68 | struct btrfs_delayed_ref_root *delayed_refs, | 69 | * account that extent at commit trans time. |
69 | struct btrfs_qgroup_extent_record *record); | 70 | * |
71 | * No lock version, caller must acquire delayed ref lock and allocate memory. | ||
72 | * | ||
73 | * Return 0 for success insert | ||
74 | * Return >0 for existing record, caller can free @record safely. | ||
75 | * Error is not possible | ||
76 | */ | ||
77 | int btrfs_qgroup_insert_dirty_extent_nolock( | ||
78 | struct btrfs_fs_info *fs_info, | ||
79 | struct btrfs_delayed_ref_root *delayed_refs, | ||
80 | struct btrfs_qgroup_extent_record *record); | ||
81 | |||
82 | /* | ||
83 | * Insert one dirty extent record into @delayed_refs, informing qgroup to | ||
84 | * account that extent at commit trans time. | ||
85 | * | ||
86 | * Better encapsulated version. | ||
87 | * | ||
88 | * Return 0 if the operation is done. | ||
89 | * Return <0 for error, like memory allocation failure or invalid parameter | ||
90 | * (NULL trans) | ||
91 | */ | ||
92 | int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans, | ||
93 | struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, | ||
94 | gfp_t gfp_flag); | ||
95 | |||
70 | int | 96 | int |
71 | btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, | 97 | btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, |
72 | struct btrfs_fs_info *fs_info, | 98 | struct btrfs_fs_info *fs_info, |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b26a5aea41b4..c0c13dc6fe12 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include "async-thread.h" | 31 | #include "async-thread.h" |
32 | #include "free-space-cache.h" | 32 | #include "free-space-cache.h" |
33 | #include "inode-map.h" | 33 | #include "inode-map.h" |
34 | #include "qgroup.h" | ||
34 | 35 | ||
35 | /* | 36 | /* |
36 | * backref_node, mapping_node and tree_block start with this | 37 | * backref_node, mapping_node and tree_block start with this |
@@ -3037,15 +3038,19 @@ int prealloc_file_extent_cluster(struct inode *inode, | |||
3037 | u64 num_bytes; | 3038 | u64 num_bytes; |
3038 | int nr = 0; | 3039 | int nr = 0; |
3039 | int ret = 0; | 3040 | int ret = 0; |
3041 | u64 prealloc_start = cluster->start - offset; | ||
3042 | u64 prealloc_end = cluster->end - offset; | ||
3043 | u64 cur_offset; | ||
3040 | 3044 | ||
3041 | BUG_ON(cluster->start != cluster->boundary[0]); | 3045 | BUG_ON(cluster->start != cluster->boundary[0]); |
3042 | inode_lock(inode); | 3046 | inode_lock(inode); |
3043 | 3047 | ||
3044 | ret = btrfs_check_data_free_space(inode, cluster->start, | 3048 | ret = btrfs_check_data_free_space(inode, prealloc_start, |
3045 | cluster->end + 1 - cluster->start); | 3049 | prealloc_end + 1 - prealloc_start); |
3046 | if (ret) | 3050 | if (ret) |
3047 | goto out; | 3051 | goto out; |
3048 | 3052 | ||
3053 | cur_offset = prealloc_start; | ||
3049 | while (nr < cluster->nr) { | 3054 | while (nr < cluster->nr) { |
3050 | start = cluster->boundary[nr] - offset; | 3055 | start = cluster->boundary[nr] - offset; |
3051 | if (nr + 1 < cluster->nr) | 3056 | if (nr + 1 < cluster->nr) |
@@ -3055,16 +3060,21 @@ int prealloc_file_extent_cluster(struct inode *inode, | |||
3055 | 3060 | ||
3056 | lock_extent(&BTRFS_I(inode)->io_tree, start, end); | 3061 | lock_extent(&BTRFS_I(inode)->io_tree, start, end); |
3057 | num_bytes = end + 1 - start; | 3062 | num_bytes = end + 1 - start; |
3063 | if (cur_offset < start) | ||
3064 | btrfs_free_reserved_data_space(inode, cur_offset, | ||
3065 | start - cur_offset); | ||
3058 | ret = btrfs_prealloc_file_range(inode, 0, start, | 3066 | ret = btrfs_prealloc_file_range(inode, 0, start, |
3059 | num_bytes, num_bytes, | 3067 | num_bytes, num_bytes, |
3060 | end + 1, &alloc_hint); | 3068 | end + 1, &alloc_hint); |
3069 | cur_offset = end + 1; | ||
3061 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end); | 3070 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end); |
3062 | if (ret) | 3071 | if (ret) |
3063 | break; | 3072 | break; |
3064 | nr++; | 3073 | nr++; |
3065 | } | 3074 | } |
3066 | btrfs_free_reserved_data_space(inode, cluster->start, | 3075 | if (cur_offset < prealloc_end) |
3067 | cluster->end + 1 - cluster->start); | 3076 | btrfs_free_reserved_data_space(inode, cur_offset, |
3077 | prealloc_end + 1 - cur_offset); | ||
3068 | out: | 3078 | out: |
3069 | inode_unlock(inode); | 3079 | inode_unlock(inode); |
3070 | return ret; | 3080 | return ret; |
@@ -3916,6 +3926,90 @@ int prepare_to_relocate(struct reloc_control *rc) | |||
3916 | return 0; | 3926 | return 0; |
3917 | } | 3927 | } |
3918 | 3928 | ||
3929 | /* | ||
3930 | * Qgroup fixer for data chunk relocation. | ||
3931 | * The data relocation is done in the following steps | ||
3932 | * 1) Copy data extents into data reloc tree | ||
3933 | * 2) Create tree reloc tree(special snapshot) for related subvolumes | ||
3934 | * 3) Modify file extents in tree reloc tree | ||
3935 | * 4) Merge tree reloc tree with original fs tree, by swapping tree blocks | ||
3936 | * | ||
3937 | * The problem is, data and tree reloc tree are not accounted to qgroup, | ||
3938 | * and 4) will only info qgroup to track tree blocks change, not file extents | ||
3939 | * in the tree blocks. | ||
3940 | * | ||
3941 | * The good news is, related data extents are all in data reloc tree, so we | ||
3942 | * only need to info qgroup to track all file extents in data reloc tree | ||
3943 | * before commit trans. | ||
3944 | */ | ||
3945 | static int qgroup_fix_relocated_data_extents(struct btrfs_trans_handle *trans, | ||
3946 | struct reloc_control *rc) | ||
3947 | { | ||
3948 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; | ||
3949 | struct inode *inode = rc->data_inode; | ||
3950 | struct btrfs_root *data_reloc_root = BTRFS_I(inode)->root; | ||
3951 | struct btrfs_path *path; | ||
3952 | struct btrfs_key key; | ||
3953 | int ret = 0; | ||
3954 | |||
3955 | if (!fs_info->quota_enabled) | ||
3956 | return 0; | ||
3957 | |||
3958 | /* | ||
3959 | * Only for stage where we update data pointers the qgroup fix is | ||
3960 | * valid. | ||
3961 | * For MOVING_DATA stage, we will miss the timing of swapping tree | ||
3962 | * blocks, and won't fix it. | ||
3963 | */ | ||
3964 | if (!(rc->stage == UPDATE_DATA_PTRS && rc->extents_found)) | ||
3965 | return 0; | ||
3966 | |||
3967 | path = btrfs_alloc_path(); | ||
3968 | if (!path) | ||
3969 | return -ENOMEM; | ||
3970 | key.objectid = btrfs_ino(inode); | ||
3971 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
3972 | key.offset = 0; | ||
3973 | |||
3974 | ret = btrfs_search_slot(NULL, data_reloc_root, &key, path, 0, 0); | ||
3975 | if (ret < 0) | ||
3976 | goto out; | ||
3977 | |||
3978 | lock_extent(&BTRFS_I(inode)->io_tree, 0, (u64)-1); | ||
3979 | while (1) { | ||
3980 | struct btrfs_file_extent_item *fi; | ||
3981 | |||
3982 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | ||
3983 | if (key.objectid > btrfs_ino(inode)) | ||
3984 | break; | ||
3985 | if (key.type != BTRFS_EXTENT_DATA_KEY) | ||
3986 | goto next; | ||
3987 | fi = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
3988 | struct btrfs_file_extent_item); | ||
3989 | if (btrfs_file_extent_type(path->nodes[0], fi) != | ||
3990 | BTRFS_FILE_EXTENT_REG) | ||
3991 | goto next; | ||
3992 | ret = btrfs_qgroup_insert_dirty_extent(trans, fs_info, | ||
3993 | btrfs_file_extent_disk_bytenr(path->nodes[0], fi), | ||
3994 | btrfs_file_extent_disk_num_bytes(path->nodes[0], fi), | ||
3995 | GFP_NOFS); | ||
3996 | if (ret < 0) | ||
3997 | break; | ||
3998 | next: | ||
3999 | ret = btrfs_next_item(data_reloc_root, path); | ||
4000 | if (ret < 0) | ||
4001 | break; | ||
4002 | if (ret > 0) { | ||
4003 | ret = 0; | ||
4004 | break; | ||
4005 | } | ||
4006 | } | ||
4007 | unlock_extent(&BTRFS_I(inode)->io_tree, 0 , (u64)-1); | ||
4008 | out: | ||
4009 | btrfs_free_path(path); | ||
4010 | return ret; | ||
4011 | } | ||
4012 | |||
3919 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | 4013 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) |
3920 | { | 4014 | { |
3921 | struct rb_root blocks = RB_ROOT; | 4015 | struct rb_root blocks = RB_ROOT; |
@@ -4102,10 +4196,18 @@ restart: | |||
4102 | 4196 | ||
4103 | /* get rid of pinned extents */ | 4197 | /* get rid of pinned extents */ |
4104 | trans = btrfs_join_transaction(rc->extent_root); | 4198 | trans = btrfs_join_transaction(rc->extent_root); |
4105 | if (IS_ERR(trans)) | 4199 | if (IS_ERR(trans)) { |
4106 | err = PTR_ERR(trans); | 4200 | err = PTR_ERR(trans); |
4107 | else | 4201 | goto out_free; |
4108 | btrfs_commit_transaction(trans, rc->extent_root); | 4202 | } |
4203 | ret = qgroup_fix_relocated_data_extents(trans, rc); | ||
4204 | if (ret < 0) { | ||
4205 | btrfs_abort_transaction(trans, ret); | ||
4206 | if (!err) | ||
4207 | err = ret; | ||
4208 | goto out_free; | ||
4209 | } | ||
4210 | btrfs_commit_transaction(trans, rc->extent_root); | ||
4109 | out_free: | 4211 | out_free: |
4110 | btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); | 4212 | btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); |
4111 | btrfs_free_path(path); | 4213 | btrfs_free_path(path); |
@@ -4468,10 +4570,16 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
4468 | unset_reloc_control(rc); | 4570 | unset_reloc_control(rc); |
4469 | 4571 | ||
4470 | trans = btrfs_join_transaction(rc->extent_root); | 4572 | trans = btrfs_join_transaction(rc->extent_root); |
4471 | if (IS_ERR(trans)) | 4573 | if (IS_ERR(trans)) { |
4472 | err = PTR_ERR(trans); | 4574 | err = PTR_ERR(trans); |
4473 | else | 4575 | goto out_free; |
4474 | err = btrfs_commit_transaction(trans, rc->extent_root); | 4576 | } |
4577 | err = qgroup_fix_relocated_data_extents(trans, rc); | ||
4578 | if (err < 0) { | ||
4579 | btrfs_abort_transaction(trans, err); | ||
4580 | goto out_free; | ||
4581 | } | ||
4582 | err = btrfs_commit_transaction(trans, rc->extent_root); | ||
4475 | out_free: | 4583 | out_free: |
4476 | kfree(rc); | 4584 | kfree(rc); |
4477 | out: | 4585 | out: |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 7fd7e1830cfe..091296062456 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -272,6 +272,23 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
272 | root_key.objectid = key.offset; | 272 | root_key.objectid = key.offset; |
273 | key.offset++; | 273 | key.offset++; |
274 | 274 | ||
275 | /* | ||
276 | * The root might have been inserted already, as before we look | ||
277 | * for orphan roots, log replay might have happened, which | ||
278 | * triggers a transaction commit and qgroup accounting, which | ||
279 | * in turn reads and inserts fs roots while doing backref | ||
280 | * walking. | ||
281 | */ | ||
282 | root = btrfs_lookup_fs_root(tree_root->fs_info, | ||
283 | root_key.objectid); | ||
284 | if (root) { | ||
285 | WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, | ||
286 | &root->state)); | ||
287 | if (btrfs_root_refs(&root->root_item) == 0) | ||
288 | btrfs_add_dead_root(root); | ||
289 | continue; | ||
290 | } | ||
291 | |||
275 | root = btrfs_read_fs_root(tree_root, &root_key); | 292 | root = btrfs_read_fs_root(tree_root, &root_key); |
276 | err = PTR_ERR_OR_ZERO(root); | 293 | err = PTR_ERR_OR_ZERO(root); |
277 | if (err && err != -ENOENT) { | 294 | if (err && err != -ENOENT) { |
@@ -310,16 +327,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
310 | set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); | 327 | set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); |
311 | 328 | ||
312 | err = btrfs_insert_fs_root(root->fs_info, root); | 329 | err = btrfs_insert_fs_root(root->fs_info, root); |
313 | /* | ||
314 | * The root might have been inserted already, as before we look | ||
315 | * for orphan roots, log replay might have happened, which | ||
316 | * triggers a transaction commit and qgroup accounting, which | ||
317 | * in turn reads and inserts fs roots while doing backref | ||
318 | * walking. | ||
319 | */ | ||
320 | if (err == -EEXIST) | ||
321 | err = 0; | ||
322 | if (err) { | 330 | if (err) { |
331 | BUG_ON(err == -EEXIST); | ||
323 | btrfs_free_fs_root(root); | 332 | btrfs_free_fs_root(root); |
324 | break; | 333 | break; |
325 | } | 334 | } |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index b71dd298385c..a87675ffd02b 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
@@ -231,7 +231,6 @@ struct pending_dir_move { | |||
231 | u64 parent_ino; | 231 | u64 parent_ino; |
232 | u64 ino; | 232 | u64 ino; |
233 | u64 gen; | 233 | u64 gen; |
234 | bool is_orphan; | ||
235 | struct list_head update_refs; | 234 | struct list_head update_refs; |
236 | }; | 235 | }; |
237 | 236 | ||
@@ -274,6 +273,39 @@ struct name_cache_entry { | |||
274 | char name[]; | 273 | char name[]; |
275 | }; | 274 | }; |
276 | 275 | ||
276 | static void inconsistent_snapshot_error(struct send_ctx *sctx, | ||
277 | enum btrfs_compare_tree_result result, | ||
278 | const char *what) | ||
279 | { | ||
280 | const char *result_string; | ||
281 | |||
282 | switch (result) { | ||
283 | case BTRFS_COMPARE_TREE_NEW: | ||
284 | result_string = "new"; | ||
285 | break; | ||
286 | case BTRFS_COMPARE_TREE_DELETED: | ||
287 | result_string = "deleted"; | ||
288 | break; | ||
289 | case BTRFS_COMPARE_TREE_CHANGED: | ||
290 | result_string = "updated"; | ||
291 | break; | ||
292 | case BTRFS_COMPARE_TREE_SAME: | ||
293 | ASSERT(0); | ||
294 | result_string = "unchanged"; | ||
295 | break; | ||
296 | default: | ||
297 | ASSERT(0); | ||
298 | result_string = "unexpected"; | ||
299 | } | ||
300 | |||
301 | btrfs_err(sctx->send_root->fs_info, | ||
302 | "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu", | ||
303 | result_string, what, sctx->cmp_key->objectid, | ||
304 | sctx->send_root->root_key.objectid, | ||
305 | (sctx->parent_root ? | ||
306 | sctx->parent_root->root_key.objectid : 0)); | ||
307 | } | ||
308 | |||
277 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); | 309 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); |
278 | 310 | ||
279 | static struct waiting_dir_move * | 311 | static struct waiting_dir_move * |
@@ -1861,7 +1893,8 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, | |||
1861 | * was already unlinked/moved, so we can safely assume that we will not | 1893 | * was already unlinked/moved, so we can safely assume that we will not |
1862 | * overwrite anything at this point in time. | 1894 | * overwrite anything at this point in time. |
1863 | */ | 1895 | */ |
1864 | if (other_inode > sctx->send_progress) { | 1896 | if (other_inode > sctx->send_progress || |
1897 | is_waiting_for_move(sctx, other_inode)) { | ||
1865 | ret = get_inode_info(sctx->parent_root, other_inode, NULL, | 1898 | ret = get_inode_info(sctx->parent_root, other_inode, NULL, |
1866 | who_gen, NULL, NULL, NULL, NULL); | 1899 | who_gen, NULL, NULL, NULL, NULL); |
1867 | if (ret < 0) | 1900 | if (ret < 0) |
@@ -2502,6 +2535,8 @@ verbose_printk("btrfs: send_utimes %llu\n", ino); | |||
2502 | key.type = BTRFS_INODE_ITEM_KEY; | 2535 | key.type = BTRFS_INODE_ITEM_KEY; |
2503 | key.offset = 0; | 2536 | key.offset = 0; |
2504 | ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); | 2537 | ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); |
2538 | if (ret > 0) | ||
2539 | ret = -ENOENT; | ||
2505 | if (ret < 0) | 2540 | if (ret < 0) |
2506 | goto out; | 2541 | goto out; |
2507 | 2542 | ||
@@ -2947,6 +2982,10 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, | |||
2947 | } | 2982 | } |
2948 | 2983 | ||
2949 | if (loc.objectid > send_progress) { | 2984 | if (loc.objectid > send_progress) { |
2985 | struct orphan_dir_info *odi; | ||
2986 | |||
2987 | odi = get_orphan_dir_info(sctx, dir); | ||
2988 | free_orphan_dir_info(sctx, odi); | ||
2950 | ret = 0; | 2989 | ret = 0; |
2951 | goto out; | 2990 | goto out; |
2952 | } | 2991 | } |
@@ -3047,7 +3086,6 @@ static int add_pending_dir_move(struct send_ctx *sctx, | |||
3047 | pm->parent_ino = parent_ino; | 3086 | pm->parent_ino = parent_ino; |
3048 | pm->ino = ino; | 3087 | pm->ino = ino; |
3049 | pm->gen = ino_gen; | 3088 | pm->gen = ino_gen; |
3050 | pm->is_orphan = is_orphan; | ||
3051 | INIT_LIST_HEAD(&pm->list); | 3089 | INIT_LIST_HEAD(&pm->list); |
3052 | INIT_LIST_HEAD(&pm->update_refs); | 3090 | INIT_LIST_HEAD(&pm->update_refs); |
3053 | RB_CLEAR_NODE(&pm->node); | 3091 | RB_CLEAR_NODE(&pm->node); |
@@ -3113,6 +3151,48 @@ static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx, | |||
3113 | return NULL; | 3151 | return NULL; |
3114 | } | 3152 | } |
3115 | 3153 | ||
3154 | static int path_loop(struct send_ctx *sctx, struct fs_path *name, | ||
3155 | u64 ino, u64 gen, u64 *ancestor_ino) | ||
3156 | { | ||
3157 | int ret = 0; | ||
3158 | u64 parent_inode = 0; | ||
3159 | u64 parent_gen = 0; | ||
3160 | u64 start_ino = ino; | ||
3161 | |||
3162 | *ancestor_ino = 0; | ||
3163 | while (ino != BTRFS_FIRST_FREE_OBJECTID) { | ||
3164 | fs_path_reset(name); | ||
3165 | |||
3166 | if (is_waiting_for_rm(sctx, ino)) | ||
3167 | break; | ||
3168 | if (is_waiting_for_move(sctx, ino)) { | ||
3169 | if (*ancestor_ino == 0) | ||
3170 | *ancestor_ino = ino; | ||
3171 | ret = get_first_ref(sctx->parent_root, ino, | ||
3172 | &parent_inode, &parent_gen, name); | ||
3173 | } else { | ||
3174 | ret = __get_cur_name_and_parent(sctx, ino, gen, | ||
3175 | &parent_inode, | ||
3176 | &parent_gen, name); | ||
3177 | if (ret > 0) { | ||
3178 | ret = 0; | ||
3179 | break; | ||
3180 | } | ||
3181 | } | ||
3182 | if (ret < 0) | ||
3183 | break; | ||
3184 | if (parent_inode == start_ino) { | ||
3185 | ret = 1; | ||
3186 | if (*ancestor_ino == 0) | ||
3187 | *ancestor_ino = ino; | ||
3188 | break; | ||
3189 | } | ||
3190 | ino = parent_inode; | ||
3191 | gen = parent_gen; | ||
3192 | } | ||
3193 | return ret; | ||
3194 | } | ||
3195 | |||
3116 | static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | 3196 | static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) |
3117 | { | 3197 | { |
3118 | struct fs_path *from_path = NULL; | 3198 | struct fs_path *from_path = NULL; |
@@ -3123,6 +3203,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
3123 | u64 parent_ino, parent_gen; | 3203 | u64 parent_ino, parent_gen; |
3124 | struct waiting_dir_move *dm = NULL; | 3204 | struct waiting_dir_move *dm = NULL; |
3125 | u64 rmdir_ino = 0; | 3205 | u64 rmdir_ino = 0; |
3206 | u64 ancestor; | ||
3207 | bool is_orphan; | ||
3126 | int ret; | 3208 | int ret; |
3127 | 3209 | ||
3128 | name = fs_path_alloc(); | 3210 | name = fs_path_alloc(); |
@@ -3135,9 +3217,10 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
3135 | dm = get_waiting_dir_move(sctx, pm->ino); | 3217 | dm = get_waiting_dir_move(sctx, pm->ino); |
3136 | ASSERT(dm); | 3218 | ASSERT(dm); |
3137 | rmdir_ino = dm->rmdir_ino; | 3219 | rmdir_ino = dm->rmdir_ino; |
3220 | is_orphan = dm->orphanized; | ||
3138 | free_waiting_dir_move(sctx, dm); | 3221 | free_waiting_dir_move(sctx, dm); |
3139 | 3222 | ||
3140 | if (pm->is_orphan) { | 3223 | if (is_orphan) { |
3141 | ret = gen_unique_name(sctx, pm->ino, | 3224 | ret = gen_unique_name(sctx, pm->ino, |
3142 | pm->gen, from_path); | 3225 | pm->gen, from_path); |
3143 | } else { | 3226 | } else { |
@@ -3155,6 +3238,24 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
3155 | goto out; | 3238 | goto out; |
3156 | 3239 | ||
3157 | sctx->send_progress = sctx->cur_ino + 1; | 3240 | sctx->send_progress = sctx->cur_ino + 1; |
3241 | ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor); | ||
3242 | if (ret < 0) | ||
3243 | goto out; | ||
3244 | if (ret) { | ||
3245 | LIST_HEAD(deleted_refs); | ||
3246 | ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID); | ||
3247 | ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor, | ||
3248 | &pm->update_refs, &deleted_refs, | ||
3249 | is_orphan); | ||
3250 | if (ret < 0) | ||
3251 | goto out; | ||
3252 | if (rmdir_ino) { | ||
3253 | dm = get_waiting_dir_move(sctx, pm->ino); | ||
3254 | ASSERT(dm); | ||
3255 | dm->rmdir_ino = rmdir_ino; | ||
3256 | } | ||
3257 | goto out; | ||
3258 | } | ||
3158 | fs_path_reset(name); | 3259 | fs_path_reset(name); |
3159 | to_path = name; | 3260 | to_path = name; |
3160 | name = NULL; | 3261 | name = NULL; |
@@ -3174,7 +3275,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
3174 | /* already deleted */ | 3275 | /* already deleted */ |
3175 | goto finish; | 3276 | goto finish; |
3176 | } | 3277 | } |
3177 | ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1); | 3278 | ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino); |
3178 | if (ret < 0) | 3279 | if (ret < 0) |
3179 | goto out; | 3280 | goto out; |
3180 | if (!ret) | 3281 | if (!ret) |
@@ -3204,8 +3305,18 @@ finish: | |||
3204 | * and old parent(s). | 3305 | * and old parent(s). |
3205 | */ | 3306 | */ |
3206 | list_for_each_entry(cur, &pm->update_refs, list) { | 3307 | list_for_each_entry(cur, &pm->update_refs, list) { |
3207 | if (cur->dir == rmdir_ino) | 3308 | /* |
3309 | * The parent inode might have been deleted in the send snapshot | ||
3310 | */ | ||
3311 | ret = get_inode_info(sctx->send_root, cur->dir, NULL, | ||
3312 | NULL, NULL, NULL, NULL, NULL); | ||
3313 | if (ret == -ENOENT) { | ||
3314 | ret = 0; | ||
3208 | continue; | 3315 | continue; |
3316 | } | ||
3317 | if (ret < 0) | ||
3318 | goto out; | ||
3319 | |||
3209 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); | 3320 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); |
3210 | if (ret < 0) | 3321 | if (ret < 0) |
3211 | goto out; | 3322 | goto out; |
@@ -3325,6 +3436,7 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx, | |||
3325 | u64 left_gen; | 3436 | u64 left_gen; |
3326 | u64 right_gen; | 3437 | u64 right_gen; |
3327 | int ret = 0; | 3438 | int ret = 0; |
3439 | struct waiting_dir_move *wdm; | ||
3328 | 3440 | ||
3329 | if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) | 3441 | if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) |
3330 | return 0; | 3442 | return 0; |
@@ -3383,7 +3495,8 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx, | |||
3383 | goto out; | 3495 | goto out; |
3384 | } | 3496 | } |
3385 | 3497 | ||
3386 | if (is_waiting_for_move(sctx, di_key.objectid)) { | 3498 | wdm = get_waiting_dir_move(sctx, di_key.objectid); |
3499 | if (wdm && !wdm->orphanized) { | ||
3387 | ret = add_pending_dir_move(sctx, | 3500 | ret = add_pending_dir_move(sctx, |
3388 | sctx->cur_ino, | 3501 | sctx->cur_ino, |
3389 | sctx->cur_inode_gen, | 3502 | sctx->cur_inode_gen, |
@@ -3470,7 +3583,8 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
3470 | ret = is_ancestor(sctx->parent_root, | 3583 | ret = is_ancestor(sctx->parent_root, |
3471 | sctx->cur_ino, sctx->cur_inode_gen, | 3584 | sctx->cur_ino, sctx->cur_inode_gen, |
3472 | ino, path_before); | 3585 | ino, path_before); |
3473 | break; | 3586 | if (ret) |
3587 | break; | ||
3474 | } | 3588 | } |
3475 | 3589 | ||
3476 | fs_path_reset(path_before); | 3590 | fs_path_reset(path_before); |
@@ -3643,11 +3757,26 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
3643 | goto out; | 3757 | goto out; |
3644 | if (ret) { | 3758 | if (ret) { |
3645 | struct name_cache_entry *nce; | 3759 | struct name_cache_entry *nce; |
3760 | struct waiting_dir_move *wdm; | ||
3646 | 3761 | ||
3647 | ret = orphanize_inode(sctx, ow_inode, ow_gen, | 3762 | ret = orphanize_inode(sctx, ow_inode, ow_gen, |
3648 | cur->full_path); | 3763 | cur->full_path); |
3649 | if (ret < 0) | 3764 | if (ret < 0) |
3650 | goto out; | 3765 | goto out; |
3766 | |||
3767 | /* | ||
3768 | * If ow_inode has its rename operation delayed | ||
3769 | * make sure that its orphanized name is used in | ||
3770 | * the source path when performing its rename | ||
3771 | * operation. | ||
3772 | */ | ||
3773 | if (is_waiting_for_move(sctx, ow_inode)) { | ||
3774 | wdm = get_waiting_dir_move(sctx, | ||
3775 | ow_inode); | ||
3776 | ASSERT(wdm); | ||
3777 | wdm->orphanized = true; | ||
3778 | } | ||
3779 | |||
3651 | /* | 3780 | /* |
3652 | * Make sure we clear our orphanized inode's | 3781 | * Make sure we clear our orphanized inode's |
3653 | * name from the name cache. This is because the | 3782 | * name from the name cache. This is because the |
@@ -3663,6 +3792,19 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
3663 | name_cache_delete(sctx, nce); | 3792 | name_cache_delete(sctx, nce); |
3664 | kfree(nce); | 3793 | kfree(nce); |
3665 | } | 3794 | } |
3795 | |||
3796 | /* | ||
3797 | * ow_inode might currently be an ancestor of | ||
3798 | * cur_ino, therefore compute valid_path (the | ||
3799 | * current path of cur_ino) again because it | ||
3800 | * might contain the pre-orphanization name of | ||
3801 | * ow_inode, which is no longer valid. | ||
3802 | */ | ||
3803 | fs_path_reset(valid_path); | ||
3804 | ret = get_cur_path(sctx, sctx->cur_ino, | ||
3805 | sctx->cur_inode_gen, valid_path); | ||
3806 | if (ret < 0) | ||
3807 | goto out; | ||
3666 | } else { | 3808 | } else { |
3667 | ret = send_unlink(sctx, cur->full_path); | 3809 | ret = send_unlink(sctx, cur->full_path); |
3668 | if (ret < 0) | 3810 | if (ret < 0) |
@@ -4126,10 +4268,12 @@ static int process_all_refs(struct send_ctx *sctx, | |||
4126 | } | 4268 | } |
4127 | btrfs_release_path(path); | 4269 | btrfs_release_path(path); |
4128 | 4270 | ||
4271 | /* | ||
4272 | * We don't actually care about pending_move as we are simply | ||
4273 | * re-creating this inode and will be rename'ing it into place once we | ||
4274 | * rename the parent directory. | ||
4275 | */ | ||
4129 | ret = process_recorded_refs(sctx, &pending_move); | 4276 | ret = process_recorded_refs(sctx, &pending_move); |
4130 | /* Only applicable to an incremental send. */ | ||
4131 | ASSERT(pending_move == 0); | ||
4132 | |||
4133 | out: | 4277 | out: |
4134 | btrfs_free_path(path); | 4278 | btrfs_free_path(path); |
4135 | return ret; | 4279 | return ret; |
@@ -5602,7 +5746,10 @@ static int changed_ref(struct send_ctx *sctx, | |||
5602 | { | 5746 | { |
5603 | int ret = 0; | 5747 | int ret = 0; |
5604 | 5748 | ||
5605 | BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); | 5749 | if (sctx->cur_ino != sctx->cmp_key->objectid) { |
5750 | inconsistent_snapshot_error(sctx, result, "reference"); | ||
5751 | return -EIO; | ||
5752 | } | ||
5606 | 5753 | ||
5607 | if (!sctx->cur_inode_new_gen && | 5754 | if (!sctx->cur_inode_new_gen && |
5608 | sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) { | 5755 | sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) { |
@@ -5627,7 +5774,10 @@ static int changed_xattr(struct send_ctx *sctx, | |||
5627 | { | 5774 | { |
5628 | int ret = 0; | 5775 | int ret = 0; |
5629 | 5776 | ||
5630 | BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); | 5777 | if (sctx->cur_ino != sctx->cmp_key->objectid) { |
5778 | inconsistent_snapshot_error(sctx, result, "xattr"); | ||
5779 | return -EIO; | ||
5780 | } | ||
5631 | 5781 | ||
5632 | if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { | 5782 | if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { |
5633 | if (result == BTRFS_COMPARE_TREE_NEW) | 5783 | if (result == BTRFS_COMPARE_TREE_NEW) |
@@ -5651,7 +5801,10 @@ static int changed_extent(struct send_ctx *sctx, | |||
5651 | { | 5801 | { |
5652 | int ret = 0; | 5802 | int ret = 0; |
5653 | 5803 | ||
5654 | BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); | 5804 | if (sctx->cur_ino != sctx->cmp_key->objectid) { |
5805 | inconsistent_snapshot_error(sctx, result, "extent"); | ||
5806 | return -EIO; | ||
5807 | } | ||
5655 | 5808 | ||
5656 | if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { | 5809 | if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { |
5657 | if (result != BTRFS_COMPARE_TREE_DELETED) | 5810 | if (result != BTRFS_COMPARE_TREE_DELETED) |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 864ce334f696..4071fe2bd098 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -2241,6 +2241,13 @@ static int btrfs_freeze(struct super_block *sb) | |||
2241 | struct btrfs_trans_handle *trans; | 2241 | struct btrfs_trans_handle *trans; |
2242 | struct btrfs_root *root = btrfs_sb(sb)->tree_root; | 2242 | struct btrfs_root *root = btrfs_sb(sb)->tree_root; |
2243 | 2243 | ||
2244 | root->fs_info->fs_frozen = 1; | ||
2245 | /* | ||
2246 | * We don't need a barrier here, we'll wait for any transaction that | ||
2247 | * could be in progress on other threads (and do delayed iputs that | ||
2248 | * we want to avoid on a frozen filesystem), or do the commit | ||
2249 | * ourselves. | ||
2250 | */ | ||
2244 | trans = btrfs_attach_transaction_barrier(root); | 2251 | trans = btrfs_attach_transaction_barrier(root); |
2245 | if (IS_ERR(trans)) { | 2252 | if (IS_ERR(trans)) { |
2246 | /* no transaction, don't bother */ | 2253 | /* no transaction, don't bother */ |
@@ -2251,6 +2258,14 @@ static int btrfs_freeze(struct super_block *sb) | |||
2251 | return btrfs_commit_transaction(trans, root); | 2258 | return btrfs_commit_transaction(trans, root); |
2252 | } | 2259 | } |
2253 | 2260 | ||
2261 | static int btrfs_unfreeze(struct super_block *sb) | ||
2262 | { | ||
2263 | struct btrfs_root *root = btrfs_sb(sb)->tree_root; | ||
2264 | |||
2265 | root->fs_info->fs_frozen = 0; | ||
2266 | return 0; | ||
2267 | } | ||
2268 | |||
2254 | static int btrfs_show_devname(struct seq_file *m, struct dentry *root) | 2269 | static int btrfs_show_devname(struct seq_file *m, struct dentry *root) |
2255 | { | 2270 | { |
2256 | struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); | 2271 | struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); |
@@ -2299,6 +2314,7 @@ static const struct super_operations btrfs_super_ops = { | |||
2299 | .statfs = btrfs_statfs, | 2314 | .statfs = btrfs_statfs, |
2300 | .remount_fs = btrfs_remount, | 2315 | .remount_fs = btrfs_remount, |
2301 | .freeze_fs = btrfs_freeze, | 2316 | .freeze_fs = btrfs_freeze, |
2317 | .unfreeze_fs = btrfs_unfreeze, | ||
2302 | }; | 2318 | }; |
2303 | 2319 | ||
2304 | static const struct file_operations btrfs_ctl_fops = { | 2320 | static const struct file_operations btrfs_ctl_fops = { |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 9cca0a721961..95d41919d034 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -2278,8 +2278,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
2278 | 2278 | ||
2279 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 2279 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
2280 | 2280 | ||
2281 | /* | ||
2282 | * If fs has been frozen, we can not handle delayed iputs, otherwise | ||
2283 | * it'll result in deadlock about SB_FREEZE_FS. | ||
2284 | */ | ||
2281 | if (current != root->fs_info->transaction_kthread && | 2285 | if (current != root->fs_info->transaction_kthread && |
2282 | current != root->fs_info->cleaner_kthread) | 2286 | current != root->fs_info->cleaner_kthread && |
2287 | !root->fs_info->fs_frozen) | ||
2283 | btrfs_run_delayed_iputs(root); | 2288 | btrfs_run_delayed_iputs(root); |
2284 | 2289 | ||
2285 | return ret; | 2290 | return ret; |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d31a0c4f56be..ef9c55bc7907 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include "backref.h" | 27 | #include "backref.h" |
28 | #include "hash.h" | 28 | #include "hash.h" |
29 | #include "compression.h" | 29 | #include "compression.h" |
30 | #include "qgroup.h" | ||
30 | 31 | ||
31 | /* magic values for the inode_only field in btrfs_log_inode: | 32 | /* magic values for the inode_only field in btrfs_log_inode: |
32 | * | 33 | * |
@@ -680,6 +681,21 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
680 | ins.type = BTRFS_EXTENT_ITEM_KEY; | 681 | ins.type = BTRFS_EXTENT_ITEM_KEY; |
681 | offset = key->offset - btrfs_file_extent_offset(eb, item); | 682 | offset = key->offset - btrfs_file_extent_offset(eb, item); |
682 | 683 | ||
684 | /* | ||
685 | * Manually record dirty extent, as here we did a shallow | ||
686 | * file extent item copy and skip normal backref update, | ||
687 | * but modifying extent tree all by ourselves. | ||
688 | * So need to manually record dirty extent for qgroup, | ||
689 | * as the owner of the file extent changed from log tree | ||
690 | * (doesn't affect qgroup) to fs/file tree(affects qgroup) | ||
691 | */ | ||
692 | ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info, | ||
693 | btrfs_file_extent_disk_bytenr(eb, item), | ||
694 | btrfs_file_extent_disk_num_bytes(eb, item), | ||
695 | GFP_NOFS); | ||
696 | if (ret < 0) | ||
697 | goto out; | ||
698 | |||
683 | if (ins.objectid > 0) { | 699 | if (ins.objectid > 0) { |
684 | u64 csum_start; | 700 | u64 csum_start; |
685 | u64 csum_end; | 701 | u64 csum_end; |
@@ -2807,7 +2823,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2807 | */ | 2823 | */ |
2808 | mutex_unlock(&root->log_mutex); | 2824 | mutex_unlock(&root->log_mutex); |
2809 | 2825 | ||
2810 | btrfs_init_log_ctx(&root_log_ctx); | 2826 | btrfs_init_log_ctx(&root_log_ctx, NULL); |
2811 | 2827 | ||
2812 | mutex_lock(&log_root_tree->log_mutex); | 2828 | mutex_lock(&log_root_tree->log_mutex); |
2813 | atomic_inc(&log_root_tree->log_batch); | 2829 | atomic_inc(&log_root_tree->log_batch); |
@@ -2851,6 +2867,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2851 | 2867 | ||
2852 | if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) { | 2868 | if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) { |
2853 | blk_finish_plug(&plug); | 2869 | blk_finish_plug(&plug); |
2870 | list_del_init(&root_log_ctx.list); | ||
2854 | mutex_unlock(&log_root_tree->log_mutex); | 2871 | mutex_unlock(&log_root_tree->log_mutex); |
2855 | ret = root_log_ctx.log_ret; | 2872 | ret = root_log_ctx.log_ret; |
2856 | goto out; | 2873 | goto out; |
@@ -4469,7 +4486,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, | |||
4469 | static int btrfs_check_ref_name_override(struct extent_buffer *eb, | 4486 | static int btrfs_check_ref_name_override(struct extent_buffer *eb, |
4470 | const int slot, | 4487 | const int slot, |
4471 | const struct btrfs_key *key, | 4488 | const struct btrfs_key *key, |
4472 | struct inode *inode) | 4489 | struct inode *inode, |
4490 | u64 *other_ino) | ||
4473 | { | 4491 | { |
4474 | int ret; | 4492 | int ret; |
4475 | struct btrfs_path *search_path; | 4493 | struct btrfs_path *search_path; |
@@ -4528,7 +4546,16 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb, | |||
4528 | search_path, parent, | 4546 | search_path, parent, |
4529 | name, this_name_len, 0); | 4547 | name, this_name_len, 0); |
4530 | if (di && !IS_ERR(di)) { | 4548 | if (di && !IS_ERR(di)) { |
4531 | ret = 1; | 4549 | struct btrfs_key di_key; |
4550 | |||
4551 | btrfs_dir_item_key_to_cpu(search_path->nodes[0], | ||
4552 | di, &di_key); | ||
4553 | if (di_key.type == BTRFS_INODE_ITEM_KEY) { | ||
4554 | ret = 1; | ||
4555 | *other_ino = di_key.objectid; | ||
4556 | } else { | ||
4557 | ret = -EAGAIN; | ||
4558 | } | ||
4532 | goto out; | 4559 | goto out; |
4533 | } else if (IS_ERR(di)) { | 4560 | } else if (IS_ERR(di)) { |
4534 | ret = PTR_ERR(di); | 4561 | ret = PTR_ERR(di); |
@@ -4722,16 +4749,72 @@ again: | |||
4722 | if ((min_key.type == BTRFS_INODE_REF_KEY || | 4749 | if ((min_key.type == BTRFS_INODE_REF_KEY || |
4723 | min_key.type == BTRFS_INODE_EXTREF_KEY) && | 4750 | min_key.type == BTRFS_INODE_EXTREF_KEY) && |
4724 | BTRFS_I(inode)->generation == trans->transid) { | 4751 | BTRFS_I(inode)->generation == trans->transid) { |
4752 | u64 other_ino = 0; | ||
4753 | |||
4725 | ret = btrfs_check_ref_name_override(path->nodes[0], | 4754 | ret = btrfs_check_ref_name_override(path->nodes[0], |
4726 | path->slots[0], | 4755 | path->slots[0], |
4727 | &min_key, inode); | 4756 | &min_key, inode, |
4757 | &other_ino); | ||
4728 | if (ret < 0) { | 4758 | if (ret < 0) { |
4729 | err = ret; | 4759 | err = ret; |
4730 | goto out_unlock; | 4760 | goto out_unlock; |
4731 | } else if (ret > 0) { | 4761 | } else if (ret > 0 && ctx && |
4732 | err = 1; | 4762 | other_ino != btrfs_ino(ctx->inode)) { |
4733 | btrfs_set_log_full_commit(root->fs_info, trans); | 4763 | struct btrfs_key inode_key; |
4734 | goto out_unlock; | 4764 | struct inode *other_inode; |
4765 | |||
4766 | if (ins_nr > 0) { | ||
4767 | ins_nr++; | ||
4768 | } else { | ||
4769 | ins_nr = 1; | ||
4770 | ins_start_slot = path->slots[0]; | ||
4771 | } | ||
4772 | ret = copy_items(trans, inode, dst_path, path, | ||
4773 | &last_extent, ins_start_slot, | ||
4774 | ins_nr, inode_only, | ||
4775 | logged_isize); | ||
4776 | if (ret < 0) { | ||
4777 | err = ret; | ||
4778 | goto out_unlock; | ||
4779 | } | ||
4780 | ins_nr = 0; | ||
4781 | btrfs_release_path(path); | ||
4782 | inode_key.objectid = other_ino; | ||
4783 | inode_key.type = BTRFS_INODE_ITEM_KEY; | ||
4784 | inode_key.offset = 0; | ||
4785 | other_inode = btrfs_iget(root->fs_info->sb, | ||
4786 | &inode_key, root, | ||
4787 | NULL); | ||
4788 | /* | ||
4789 | * If the other inode that had a conflicting dir | ||
4790 | * entry was deleted in the current transaction, | ||
4791 | * we don't need to do more work nor fallback to | ||
4792 | * a transaction commit. | ||
4793 | */ | ||
4794 | if (IS_ERR(other_inode) && | ||
4795 | PTR_ERR(other_inode) == -ENOENT) { | ||
4796 | goto next_key; | ||
4797 | } else if (IS_ERR(other_inode)) { | ||
4798 | err = PTR_ERR(other_inode); | ||
4799 | goto out_unlock; | ||
4800 | } | ||
4801 | /* | ||
4802 | * We are safe logging the other inode without | ||
4803 | * acquiring its i_mutex as long as we log with | ||
4804 | * the LOG_INODE_EXISTS mode. We're safe against | ||
4805 | * concurrent renames of the other inode as well | ||
4806 | * because during a rename we pin the log and | ||
4807 | * update the log with the new name before we | ||
4808 | * unpin it. | ||
4809 | */ | ||
4810 | err = btrfs_log_inode(trans, root, other_inode, | ||
4811 | LOG_INODE_EXISTS, | ||
4812 | 0, LLONG_MAX, ctx); | ||
4813 | iput(other_inode); | ||
4814 | if (err) | ||
4815 | goto out_unlock; | ||
4816 | else | ||
4817 | goto next_key; | ||
4735 | } | 4818 | } |
4736 | } | 4819 | } |
4737 | 4820 | ||
@@ -4799,7 +4882,7 @@ next_slot: | |||
4799 | ins_nr = 0; | 4882 | ins_nr = 0; |
4800 | } | 4883 | } |
4801 | btrfs_release_path(path); | 4884 | btrfs_release_path(path); |
4802 | 4885 | next_key: | |
4803 | if (min_key.offset < (u64)-1) { | 4886 | if (min_key.offset < (u64)-1) { |
4804 | min_key.offset++; | 4887 | min_key.offset++; |
4805 | } else if (min_key.type < max_key.type) { | 4888 | } else if (min_key.type < max_key.type) { |
@@ -4993,8 +5076,12 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | |||
4993 | if (!parent || d_really_is_negative(parent) || sb != parent->d_sb) | 5076 | if (!parent || d_really_is_negative(parent) || sb != parent->d_sb) |
4994 | break; | 5077 | break; |
4995 | 5078 | ||
4996 | if (IS_ROOT(parent)) | 5079 | if (IS_ROOT(parent)) { |
5080 | inode = d_inode(parent); | ||
5081 | if (btrfs_must_commit_transaction(trans, inode)) | ||
5082 | ret = 1; | ||
4997 | break; | 5083 | break; |
5084 | } | ||
4998 | 5085 | ||
4999 | parent = dget_parent(parent); | 5086 | parent = dget_parent(parent); |
5000 | dput(old_parent); | 5087 | dput(old_parent); |
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index a9f1b75d080d..ab858e31ccbc 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
@@ -30,15 +30,18 @@ struct btrfs_log_ctx { | |||
30 | int log_transid; | 30 | int log_transid; |
31 | int io_err; | 31 | int io_err; |
32 | bool log_new_dentries; | 32 | bool log_new_dentries; |
33 | struct inode *inode; | ||
33 | struct list_head list; | 34 | struct list_head list; |
34 | }; | 35 | }; |
35 | 36 | ||
36 | static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) | 37 | static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, |
38 | struct inode *inode) | ||
37 | { | 39 | { |
38 | ctx->log_ret = 0; | 40 | ctx->log_ret = 0; |
39 | ctx->log_transid = 0; | 41 | ctx->log_transid = 0; |
40 | ctx->io_err = 0; | 42 | ctx->io_err = 0; |
41 | ctx->log_new_dentries = false; | 43 | ctx->log_new_dentries = false; |
44 | ctx->inode = inode; | ||
42 | INIT_LIST_HEAD(&ctx->list); | 45 | INIT_LIST_HEAD(&ctx->list); |
43 | } | 46 | } |
44 | 47 | ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 51f125508771..035efce603a9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -834,10 +834,6 @@ static void __free_device(struct work_struct *work) | |||
834 | struct btrfs_device *device; | 834 | struct btrfs_device *device; |
835 | 835 | ||
836 | device = container_of(work, struct btrfs_device, rcu_work); | 836 | device = container_of(work, struct btrfs_device, rcu_work); |
837 | |||
838 | if (device->bdev) | ||
839 | blkdev_put(device->bdev, device->mode); | ||
840 | |||
841 | rcu_string_free(device->name); | 837 | rcu_string_free(device->name); |
842 | kfree(device); | 838 | kfree(device); |
843 | } | 839 | } |
@@ -852,6 +848,17 @@ static void free_device(struct rcu_head *head) | |||
852 | schedule_work(&device->rcu_work); | 848 | schedule_work(&device->rcu_work); |
853 | } | 849 | } |
854 | 850 | ||
851 | static void btrfs_close_bdev(struct btrfs_device *device) | ||
852 | { | ||
853 | if (device->bdev && device->writeable) { | ||
854 | sync_blockdev(device->bdev); | ||
855 | invalidate_bdev(device->bdev); | ||
856 | } | ||
857 | |||
858 | if (device->bdev) | ||
859 | blkdev_put(device->bdev, device->mode); | ||
860 | } | ||
861 | |||
855 | static void btrfs_close_one_device(struct btrfs_device *device) | 862 | static void btrfs_close_one_device(struct btrfs_device *device) |
856 | { | 863 | { |
857 | struct btrfs_fs_devices *fs_devices = device->fs_devices; | 864 | struct btrfs_fs_devices *fs_devices = device->fs_devices; |
@@ -870,10 +877,7 @@ static void btrfs_close_one_device(struct btrfs_device *device) | |||
870 | if (device->missing) | 877 | if (device->missing) |
871 | fs_devices->missing_devices--; | 878 | fs_devices->missing_devices--; |
872 | 879 | ||
873 | if (device->bdev && device->writeable) { | 880 | btrfs_close_bdev(device); |
874 | sync_blockdev(device->bdev); | ||
875 | invalidate_bdev(device->bdev); | ||
876 | } | ||
877 | 881 | ||
878 | new_device = btrfs_alloc_device(NULL, &device->devid, | 882 | new_device = btrfs_alloc_device(NULL, &device->devid, |
879 | device->uuid); | 883 | device->uuid); |
@@ -1932,6 +1936,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path, u64 devid) | |||
1932 | btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device); | 1936 | btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device); |
1933 | } | 1937 | } |
1934 | 1938 | ||
1939 | btrfs_close_bdev(device); | ||
1940 | |||
1935 | call_rcu(&device->rcu, free_device); | 1941 | call_rcu(&device->rcu, free_device); |
1936 | 1942 | ||
1937 | num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; | 1943 | num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; |
@@ -2025,6 +2031,9 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, | |||
2025 | /* zero out the old super if it is writable */ | 2031 | /* zero out the old super if it is writable */ |
2026 | btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str); | 2032 | btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str); |
2027 | } | 2033 | } |
2034 | |||
2035 | btrfs_close_bdev(srcdev); | ||
2036 | |||
2028 | call_rcu(&srcdev->rcu, free_device); | 2037 | call_rcu(&srcdev->rcu, free_device); |
2029 | 2038 | ||
2030 | /* | 2039 | /* |
@@ -2080,6 +2089,8 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, | |||
2080 | * the device_list_mutex lock. | 2089 | * the device_list_mutex lock. |
2081 | */ | 2090 | */ |
2082 | btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); | 2091 | btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); |
2092 | |||
2093 | btrfs_close_bdev(tgtdev); | ||
2083 | call_rcu(&tgtdev->rcu, free_device); | 2094 | call_rcu(&tgtdev->rcu, free_device); |
2084 | } | 2095 | } |
2085 | 2096 | ||
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 99115cae1652..16e6ded0b7f2 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -1347,9 +1347,12 @@ void ceph_flush_snaps(struct ceph_inode_info *ci, | |||
1347 | { | 1347 | { |
1348 | struct inode *inode = &ci->vfs_inode; | 1348 | struct inode *inode = &ci->vfs_inode; |
1349 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | 1349 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
1350 | struct ceph_mds_session *session = *psession; | 1350 | struct ceph_mds_session *session = NULL; |
1351 | int mds; | 1351 | int mds; |
1352 | |||
1352 | dout("ceph_flush_snaps %p\n", inode); | 1353 | dout("ceph_flush_snaps %p\n", inode); |
1354 | if (psession) | ||
1355 | session = *psession; | ||
1353 | retry: | 1356 | retry: |
1354 | spin_lock(&ci->i_ceph_lock); | 1357 | spin_lock(&ci->i_ceph_lock); |
1355 | if (!(ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)) { | 1358 | if (!(ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)) { |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index c64a0b794d49..df4b3e6fa563 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -597,7 +597,7 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos) | |||
597 | if (is_hash_order(new_pos)) { | 597 | if (is_hash_order(new_pos)) { |
598 | /* no need to reset last_name for a forward seek when | 598 | /* no need to reset last_name for a forward seek when |
599 | * dentries are sotred in hash order */ | 599 | * dentries are sotred in hash order */ |
600 | } else if (fi->frag |= fpos_frag(new_pos)) { | 600 | } else if (fi->frag != fpos_frag(new_pos)) { |
601 | return true; | 601 | return true; |
602 | } | 602 | } |
603 | rinfo = fi->last_readdir ? &fi->last_readdir->r_reply_info : NULL; | 603 | rinfo = fi->last_readdir ? &fi->last_readdir->r_reply_info : NULL; |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index fa59a85226b2..f72d4ae303b2 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -2759,6 +2759,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
2759 | } else { | 2759 | } else { |
2760 | path = NULL; | 2760 | path = NULL; |
2761 | pathlen = 0; | 2761 | pathlen = 0; |
2762 | pathbase = 0; | ||
2762 | } | 2763 | } |
2763 | 2764 | ||
2764 | spin_lock(&ci->i_ceph_lock); | 2765 | spin_lock(&ci->i_ceph_lock); |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 6bbec5e784cd..14ae4b8e1a3c 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -609,6 +609,9 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) | |||
609 | char *s, *p; | 609 | char *s, *p; |
610 | char sep; | 610 | char sep; |
611 | 611 | ||
612 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) | ||
613 | return dget(sb->s_root); | ||
614 | |||
612 | full_path = cifs_build_path_to_root(vol, cifs_sb, | 615 | full_path = cifs_build_path_to_root(vol, cifs_sb, |
613 | cifs_sb_master_tcon(cifs_sb)); | 616 | cifs_sb_master_tcon(cifs_sb)); |
614 | if (full_path == NULL) | 617 | if (full_path == NULL) |
@@ -686,26 +689,22 @@ cifs_do_mount(struct file_system_type *fs_type, | |||
686 | cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL); | 689 | cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL); |
687 | if (cifs_sb->mountdata == NULL) { | 690 | if (cifs_sb->mountdata == NULL) { |
688 | root = ERR_PTR(-ENOMEM); | 691 | root = ERR_PTR(-ENOMEM); |
689 | goto out_cifs_sb; | 692 | goto out_free; |
690 | } | 693 | } |
691 | 694 | ||
692 | if (volume_info->prepath) { | 695 | rc = cifs_setup_cifs_sb(volume_info, cifs_sb); |
693 | cifs_sb->prepath = kstrdup(volume_info->prepath, GFP_KERNEL); | 696 | if (rc) { |
694 | if (cifs_sb->prepath == NULL) { | 697 | root = ERR_PTR(rc); |
695 | root = ERR_PTR(-ENOMEM); | 698 | goto out_free; |
696 | goto out_cifs_sb; | ||
697 | } | ||
698 | } | 699 | } |
699 | 700 | ||
700 | cifs_setup_cifs_sb(volume_info, cifs_sb); | ||
701 | |||
702 | rc = cifs_mount(cifs_sb, volume_info); | 701 | rc = cifs_mount(cifs_sb, volume_info); |
703 | if (rc) { | 702 | if (rc) { |
704 | if (!(flags & MS_SILENT)) | 703 | if (!(flags & MS_SILENT)) |
705 | cifs_dbg(VFS, "cifs_mount failed w/return code = %d\n", | 704 | cifs_dbg(VFS, "cifs_mount failed w/return code = %d\n", |
706 | rc); | 705 | rc); |
707 | root = ERR_PTR(rc); | 706 | root = ERR_PTR(rc); |
708 | goto out_mountdata; | 707 | goto out_free; |
709 | } | 708 | } |
710 | 709 | ||
711 | mnt_data.vol = volume_info; | 710 | mnt_data.vol = volume_info; |
@@ -735,11 +734,7 @@ cifs_do_mount(struct file_system_type *fs_type, | |||
735 | sb->s_flags |= MS_ACTIVE; | 734 | sb->s_flags |= MS_ACTIVE; |
736 | } | 735 | } |
737 | 736 | ||
738 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) | 737 | root = cifs_get_root(volume_info, sb); |
739 | root = dget(sb->s_root); | ||
740 | else | ||
741 | root = cifs_get_root(volume_info, sb); | ||
742 | |||
743 | if (IS_ERR(root)) | 738 | if (IS_ERR(root)) |
744 | goto out_super; | 739 | goto out_super; |
745 | 740 | ||
@@ -752,9 +747,9 @@ out: | |||
752 | cifs_cleanup_volume_info(volume_info); | 747 | cifs_cleanup_volume_info(volume_info); |
753 | return root; | 748 | return root; |
754 | 749 | ||
755 | out_mountdata: | 750 | out_free: |
751 | kfree(cifs_sb->prepath); | ||
756 | kfree(cifs_sb->mountdata); | 752 | kfree(cifs_sb->mountdata); |
757 | out_cifs_sb: | ||
758 | kfree(cifs_sb); | 753 | kfree(cifs_sb); |
759 | out_nls: | 754 | out_nls: |
760 | unload_nls(volume_info->local_nls); | 755 | unload_nls(volume_info->local_nls); |
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 1243bd326591..95dab43646f0 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
@@ -184,7 +184,7 @@ extern int cifs_read_from_socket(struct TCP_Server_Info *server, char *buf, | |||
184 | unsigned int to_read); | 184 | unsigned int to_read); |
185 | extern int cifs_read_page_from_socket(struct TCP_Server_Info *server, | 185 | extern int cifs_read_page_from_socket(struct TCP_Server_Info *server, |
186 | struct page *page, unsigned int to_read); | 186 | struct page *page, unsigned int to_read); |
187 | extern void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, | 187 | extern int cifs_setup_cifs_sb(struct smb_vol *pvolume_info, |
188 | struct cifs_sb_info *cifs_sb); | 188 | struct cifs_sb_info *cifs_sb); |
189 | extern int cifs_match_super(struct super_block *, void *); | 189 | extern int cifs_match_super(struct super_block *, void *); |
190 | extern void cifs_cleanup_volume_info(struct smb_vol *pvolume_info); | 190 | extern void cifs_cleanup_volume_info(struct smb_vol *pvolume_info); |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7ae03283bd61..2e4f4bad8b1e 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -2781,6 +2781,24 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data) | |||
2781 | return 1; | 2781 | return 1; |
2782 | } | 2782 | } |
2783 | 2783 | ||
2784 | static int | ||
2785 | match_prepath(struct super_block *sb, struct cifs_mnt_data *mnt_data) | ||
2786 | { | ||
2787 | struct cifs_sb_info *old = CIFS_SB(sb); | ||
2788 | struct cifs_sb_info *new = mnt_data->cifs_sb; | ||
2789 | |||
2790 | if (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) { | ||
2791 | if (!(new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)) | ||
2792 | return 0; | ||
2793 | /* The prepath should be null terminated strings */ | ||
2794 | if (strcmp(new->prepath, old->prepath)) | ||
2795 | return 0; | ||
2796 | |||
2797 | return 1; | ||
2798 | } | ||
2799 | return 0; | ||
2800 | } | ||
2801 | |||
2784 | int | 2802 | int |
2785 | cifs_match_super(struct super_block *sb, void *data) | 2803 | cifs_match_super(struct super_block *sb, void *data) |
2786 | { | 2804 | { |
@@ -2808,7 +2826,8 @@ cifs_match_super(struct super_block *sb, void *data) | |||
2808 | 2826 | ||
2809 | if (!match_server(tcp_srv, volume_info) || | 2827 | if (!match_server(tcp_srv, volume_info) || |
2810 | !match_session(ses, volume_info) || | 2828 | !match_session(ses, volume_info) || |
2811 | !match_tcon(tcon, volume_info->UNC)) { | 2829 | !match_tcon(tcon, volume_info->UNC) || |
2830 | !match_prepath(sb, mnt_data)) { | ||
2812 | rc = 0; | 2831 | rc = 0; |
2813 | goto out; | 2832 | goto out; |
2814 | } | 2833 | } |
@@ -3222,7 +3241,7 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, | |||
3222 | } | 3241 | } |
3223 | } | 3242 | } |
3224 | 3243 | ||
3225 | void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, | 3244 | int cifs_setup_cifs_sb(struct smb_vol *pvolume_info, |
3226 | struct cifs_sb_info *cifs_sb) | 3245 | struct cifs_sb_info *cifs_sb) |
3227 | { | 3246 | { |
3228 | INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks); | 3247 | INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks); |
@@ -3316,6 +3335,14 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, | |||
3316 | 3335 | ||
3317 | if ((pvolume_info->cifs_acl) && (pvolume_info->dynperm)) | 3336 | if ((pvolume_info->cifs_acl) && (pvolume_info->dynperm)) |
3318 | cifs_dbg(VFS, "mount option dynperm ignored if cifsacl mount option supported\n"); | 3337 | cifs_dbg(VFS, "mount option dynperm ignored if cifsacl mount option supported\n"); |
3338 | |||
3339 | if (pvolume_info->prepath) { | ||
3340 | cifs_sb->prepath = kstrdup(pvolume_info->prepath, GFP_KERNEL); | ||
3341 | if (cifs_sb->prepath == NULL) | ||
3342 | return -ENOMEM; | ||
3343 | } | ||
3344 | |||
3345 | return 0; | ||
3319 | } | 3346 | } |
3320 | 3347 | ||
3321 | static void | 3348 | static void |
diff --git a/fs/configfs/file.c b/fs/configfs/file.c index c30cf49b69d2..2c6312db8516 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c | |||
@@ -333,6 +333,7 @@ configfs_write_bin_file(struct file *file, const char __user *buf, | |||
333 | if (bin_attr->cb_max_size && | 333 | if (bin_attr->cb_max_size && |
334 | *ppos + count > bin_attr->cb_max_size) { | 334 | *ppos + count > bin_attr->cb_max_size) { |
335 | len = -EFBIG; | 335 | len = -EFBIG; |
336 | goto out; | ||
336 | } | 337 | } |
337 | 338 | ||
338 | tbuf = vmalloc(*ppos + count); | 339 | tbuf = vmalloc(*ppos + count); |
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 0f9961eede1e..ed115acb5dee 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/random.h> | 11 | #include <linux/random.h> |
12 | #include <linux/string.h> | 12 | #include <linux/string.h> |
13 | #include <linux/fscrypto.h> | 13 | #include <linux/fscrypto.h> |
14 | #include <linux/mount.h> | ||
14 | 15 | ||
15 | static int inode_has_encryption_context(struct inode *inode) | 16 | static int inode_has_encryption_context(struct inode *inode) |
16 | { | 17 | { |
@@ -92,26 +93,42 @@ static int create_encryption_context_from_policy(struct inode *inode, | |||
92 | return inode->i_sb->s_cop->set_context(inode, &ctx, sizeof(ctx), NULL); | 93 | return inode->i_sb->s_cop->set_context(inode, &ctx, sizeof(ctx), NULL); |
93 | } | 94 | } |
94 | 95 | ||
95 | int fscrypt_process_policy(struct inode *inode, | 96 | int fscrypt_process_policy(struct file *filp, |
96 | const struct fscrypt_policy *policy) | 97 | const struct fscrypt_policy *policy) |
97 | { | 98 | { |
99 | struct inode *inode = file_inode(filp); | ||
100 | int ret; | ||
101 | |||
102 | if (!inode_owner_or_capable(inode)) | ||
103 | return -EACCES; | ||
104 | |||
98 | if (policy->version != 0) | 105 | if (policy->version != 0) |
99 | return -EINVAL; | 106 | return -EINVAL; |
100 | 107 | ||
108 | ret = mnt_want_write_file(filp); | ||
109 | if (ret) | ||
110 | return ret; | ||
111 | |||
101 | if (!inode_has_encryption_context(inode)) { | 112 | if (!inode_has_encryption_context(inode)) { |
102 | if (!inode->i_sb->s_cop->empty_dir) | 113 | if (!S_ISDIR(inode->i_mode)) |
103 | return -EOPNOTSUPP; | 114 | ret = -EINVAL; |
104 | if (!inode->i_sb->s_cop->empty_dir(inode)) | 115 | else if (!inode->i_sb->s_cop->empty_dir) |
105 | return -ENOTEMPTY; | 116 | ret = -EOPNOTSUPP; |
106 | return create_encryption_context_from_policy(inode, policy); | 117 | else if (!inode->i_sb->s_cop->empty_dir(inode)) |
118 | ret = -ENOTEMPTY; | ||
119 | else | ||
120 | ret = create_encryption_context_from_policy(inode, | ||
121 | policy); | ||
122 | } else if (!is_encryption_context_consistent_with_policy(inode, | ||
123 | policy)) { | ||
124 | printk(KERN_WARNING | ||
125 | "%s: Policy inconsistent with encryption context\n", | ||
126 | __func__); | ||
127 | ret = -EINVAL; | ||
107 | } | 128 | } |
108 | 129 | ||
109 | if (is_encryption_context_consistent_with_policy(inode, policy)) | 130 | mnt_drop_write_file(filp); |
110 | return 0; | 131 | return ret; |
111 | |||
112 | printk(KERN_WARNING "%s: Policy inconsistent with encryption context\n", | ||
113 | __func__); | ||
114 | return -EINVAL; | ||
115 | } | 132 | } |
116 | EXPORT_SYMBOL(fscrypt_process_policy); | 133 | EXPORT_SYMBOL(fscrypt_process_policy); |
117 | 134 | ||
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index d116453b0276..79a5941c2474 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c | |||
@@ -585,7 +585,8 @@ struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv) | |||
585 | */ | 585 | */ |
586 | void *devpts_get_priv(struct dentry *dentry) | 586 | void *devpts_get_priv(struct dentry *dentry) |
587 | { | 587 | { |
588 | WARN_ON_ONCE(dentry->d_sb->s_magic != DEVPTS_SUPER_MAGIC); | 588 | if (dentry->d_sb->s_magic != DEVPTS_SUPER_MAGIC) |
589 | return NULL; | ||
589 | return dentry->d_fsdata; | 590 | return dentry->d_fsdata; |
590 | } | 591 | } |
591 | 592 | ||
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index eea64912c9c0..466f7d60edc2 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c | |||
@@ -607,20 +607,54 @@ static const struct file_operations format2_fops; | |||
607 | static const struct file_operations format3_fops; | 607 | static const struct file_operations format3_fops; |
608 | static const struct file_operations format4_fops; | 608 | static const struct file_operations format4_fops; |
609 | 609 | ||
610 | static int table_open(struct inode *inode, struct file *file) | 610 | static int table_open1(struct inode *inode, struct file *file) |
611 | { | 611 | { |
612 | struct seq_file *seq; | 612 | struct seq_file *seq; |
613 | int ret = -1; | 613 | int ret; |
614 | 614 | ||
615 | if (file->f_op == &format1_fops) | 615 | ret = seq_open(file, &format1_seq_ops); |
616 | ret = seq_open(file, &format1_seq_ops); | 616 | if (ret) |
617 | else if (file->f_op == &format2_fops) | 617 | return ret; |
618 | ret = seq_open(file, &format2_seq_ops); | 618 | |
619 | else if (file->f_op == &format3_fops) | 619 | seq = file->private_data; |
620 | ret = seq_open(file, &format3_seq_ops); | 620 | seq->private = inode->i_private; /* the dlm_ls */ |
621 | else if (file->f_op == &format4_fops) | 621 | return 0; |
622 | ret = seq_open(file, &format4_seq_ops); | 622 | } |
623 | |||
624 | static int table_open2(struct inode *inode, struct file *file) | ||
625 | { | ||
626 | struct seq_file *seq; | ||
627 | int ret; | ||
628 | |||
629 | ret = seq_open(file, &format2_seq_ops); | ||
630 | if (ret) | ||
631 | return ret; | ||
632 | |||
633 | seq = file->private_data; | ||
634 | seq->private = inode->i_private; /* the dlm_ls */ | ||
635 | return 0; | ||
636 | } | ||
637 | |||
638 | static int table_open3(struct inode *inode, struct file *file) | ||
639 | { | ||
640 | struct seq_file *seq; | ||
641 | int ret; | ||
642 | |||
643 | ret = seq_open(file, &format3_seq_ops); | ||
644 | if (ret) | ||
645 | return ret; | ||
646 | |||
647 | seq = file->private_data; | ||
648 | seq->private = inode->i_private; /* the dlm_ls */ | ||
649 | return 0; | ||
650 | } | ||
651 | |||
652 | static int table_open4(struct inode *inode, struct file *file) | ||
653 | { | ||
654 | struct seq_file *seq; | ||
655 | int ret; | ||
623 | 656 | ||
657 | ret = seq_open(file, &format4_seq_ops); | ||
624 | if (ret) | 658 | if (ret) |
625 | return ret; | 659 | return ret; |
626 | 660 | ||
@@ -631,7 +665,7 @@ static int table_open(struct inode *inode, struct file *file) | |||
631 | 665 | ||
632 | static const struct file_operations format1_fops = { | 666 | static const struct file_operations format1_fops = { |
633 | .owner = THIS_MODULE, | 667 | .owner = THIS_MODULE, |
634 | .open = table_open, | 668 | .open = table_open1, |
635 | .read = seq_read, | 669 | .read = seq_read, |
636 | .llseek = seq_lseek, | 670 | .llseek = seq_lseek, |
637 | .release = seq_release | 671 | .release = seq_release |
@@ -639,7 +673,7 @@ static const struct file_operations format1_fops = { | |||
639 | 673 | ||
640 | static const struct file_operations format2_fops = { | 674 | static const struct file_operations format2_fops = { |
641 | .owner = THIS_MODULE, | 675 | .owner = THIS_MODULE, |
642 | .open = table_open, | 676 | .open = table_open2, |
643 | .read = seq_read, | 677 | .read = seq_read, |
644 | .llseek = seq_lseek, | 678 | .llseek = seq_lseek, |
645 | .release = seq_release | 679 | .release = seq_release |
@@ -647,7 +681,7 @@ static const struct file_operations format2_fops = { | |||
647 | 681 | ||
648 | static const struct file_operations format3_fops = { | 682 | static const struct file_operations format3_fops = { |
649 | .owner = THIS_MODULE, | 683 | .owner = THIS_MODULE, |
650 | .open = table_open, | 684 | .open = table_open3, |
651 | .read = seq_read, | 685 | .read = seq_read, |
652 | .llseek = seq_lseek, | 686 | .llseek = seq_lseek, |
653 | .release = seq_release | 687 | .release = seq_release |
@@ -655,7 +689,7 @@ static const struct file_operations format3_fops = { | |||
655 | 689 | ||
656 | static const struct file_operations format4_fops = { | 690 | static const struct file_operations format4_fops = { |
657 | .owner = THIS_MODULE, | 691 | .owner = THIS_MODULE, |
658 | .open = table_open, | 692 | .open = table_open4, |
659 | .read = seq_read, | 693 | .read = seq_read, |
660 | .llseek = seq_lseek, | 694 | .llseek = seq_lseek, |
661 | .release = seq_release | 695 | .release = seq_release |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3131747199e1..c6ea25a190f8 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -5466,8 +5466,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
5466 | sbi->s_want_extra_isize, | 5466 | sbi->s_want_extra_isize, |
5467 | iloc, handle); | 5467 | iloc, handle); |
5468 | if (ret) { | 5468 | if (ret) { |
5469 | ext4_set_inode_state(inode, | ||
5470 | EXT4_STATE_NO_EXPAND); | ||
5471 | if (mnt_count != | 5469 | if (mnt_count != |
5472 | le16_to_cpu(sbi->s_es->s_mnt_count)) { | 5470 | le16_to_cpu(sbi->s_es->s_mnt_count)) { |
5473 | ext4_warning(inode->i_sb, | 5471 | ext4_warning(inode->i_sb, |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 10686fd67fb4..1bb7df5e4536 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -776,7 +776,7 @@ resizefs_out: | |||
776 | (struct fscrypt_policy __user *)arg, | 776 | (struct fscrypt_policy __user *)arg, |
777 | sizeof(policy))) | 777 | sizeof(policy))) |
778 | return -EFAULT; | 778 | return -EFAULT; |
779 | return fscrypt_process_policy(inode, &policy); | 779 | return fscrypt_process_policy(filp, &policy); |
780 | #else | 780 | #else |
781 | return -EOPNOTSUPP; | 781 | return -EOPNOTSUPP; |
782 | #endif | 782 | #endif |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1c593aa0218e..3ec8708989ca 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -2211,6 +2211,7 @@ void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group, | |||
2211 | 2211 | ||
2212 | /* Called at mount-time, super-block is locked */ | 2212 | /* Called at mount-time, super-block is locked */ |
2213 | static int ext4_check_descriptors(struct super_block *sb, | 2213 | static int ext4_check_descriptors(struct super_block *sb, |
2214 | ext4_fsblk_t sb_block, | ||
2214 | ext4_group_t *first_not_zeroed) | 2215 | ext4_group_t *first_not_zeroed) |
2215 | { | 2216 | { |
2216 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2217 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
@@ -2241,6 +2242,11 @@ static int ext4_check_descriptors(struct super_block *sb, | |||
2241 | grp = i; | 2242 | grp = i; |
2242 | 2243 | ||
2243 | block_bitmap = ext4_block_bitmap(sb, gdp); | 2244 | block_bitmap = ext4_block_bitmap(sb, gdp); |
2245 | if (block_bitmap == sb_block) { | ||
2246 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " | ||
2247 | "Block bitmap for group %u overlaps " | ||
2248 | "superblock", i); | ||
2249 | } | ||
2244 | if (block_bitmap < first_block || block_bitmap > last_block) { | 2250 | if (block_bitmap < first_block || block_bitmap > last_block) { |
2245 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " | 2251 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " |
2246 | "Block bitmap for group %u not in group " | 2252 | "Block bitmap for group %u not in group " |
@@ -2248,6 +2254,11 @@ static int ext4_check_descriptors(struct super_block *sb, | |||
2248 | return 0; | 2254 | return 0; |
2249 | } | 2255 | } |
2250 | inode_bitmap = ext4_inode_bitmap(sb, gdp); | 2256 | inode_bitmap = ext4_inode_bitmap(sb, gdp); |
2257 | if (inode_bitmap == sb_block) { | ||
2258 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " | ||
2259 | "Inode bitmap for group %u overlaps " | ||
2260 | "superblock", i); | ||
2261 | } | ||
2251 | if (inode_bitmap < first_block || inode_bitmap > last_block) { | 2262 | if (inode_bitmap < first_block || inode_bitmap > last_block) { |
2252 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " | 2263 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " |
2253 | "Inode bitmap for group %u not in group " | 2264 | "Inode bitmap for group %u not in group " |
@@ -2255,6 +2266,11 @@ static int ext4_check_descriptors(struct super_block *sb, | |||
2255 | return 0; | 2266 | return 0; |
2256 | } | 2267 | } |
2257 | inode_table = ext4_inode_table(sb, gdp); | 2268 | inode_table = ext4_inode_table(sb, gdp); |
2269 | if (inode_table == sb_block) { | ||
2270 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " | ||
2271 | "Inode table for group %u overlaps " | ||
2272 | "superblock", i); | ||
2273 | } | ||
2258 | if (inode_table < first_block || | 2274 | if (inode_table < first_block || |
2259 | inode_table + sbi->s_itb_per_group - 1 > last_block) { | 2275 | inode_table + sbi->s_itb_per_group - 1 > last_block) { |
2260 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " | 2276 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " |
@@ -3757,7 +3773,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3757 | goto failed_mount2; | 3773 | goto failed_mount2; |
3758 | } | 3774 | } |
3759 | } | 3775 | } |
3760 | if (!ext4_check_descriptors(sb, &first_not_zeroed)) { | 3776 | if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) { |
3761 | ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); | 3777 | ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); |
3762 | ret = -EFSCORRUPTED; | 3778 | ret = -EFSCORRUPTED; |
3763 | goto failed_mount2; | 3779 | goto failed_mount2; |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 39e9cfb1b371..2eb935ca5d9e 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -1353,15 +1353,19 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, | |||
1353 | size_t min_offs, free; | 1353 | size_t min_offs, free; |
1354 | int total_ino; | 1354 | int total_ino; |
1355 | void *base, *start, *end; | 1355 | void *base, *start, *end; |
1356 | int extra_isize = 0, error = 0, tried_min_extra_isize = 0; | 1356 | int error = 0, tried_min_extra_isize = 0; |
1357 | int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); | 1357 | int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); |
1358 | int isize_diff; /* How much do we need to grow i_extra_isize */ | ||
1358 | 1359 | ||
1359 | down_write(&EXT4_I(inode)->xattr_sem); | 1360 | down_write(&EXT4_I(inode)->xattr_sem); |
1361 | /* | ||
1362 | * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty | ||
1363 | */ | ||
1364 | ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); | ||
1360 | retry: | 1365 | retry: |
1361 | if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) { | 1366 | isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize; |
1362 | up_write(&EXT4_I(inode)->xattr_sem); | 1367 | if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) |
1363 | return 0; | 1368 | goto out; |
1364 | } | ||
1365 | 1369 | ||
1366 | header = IHDR(inode, raw_inode); | 1370 | header = IHDR(inode, raw_inode); |
1367 | entry = IFIRST(header); | 1371 | entry = IFIRST(header); |
@@ -1382,7 +1386,7 @@ retry: | |||
1382 | goto cleanup; | 1386 | goto cleanup; |
1383 | 1387 | ||
1384 | free = ext4_xattr_free_space(last, &min_offs, base, &total_ino); | 1388 | free = ext4_xattr_free_space(last, &min_offs, base, &total_ino); |
1385 | if (free >= new_extra_isize) { | 1389 | if (free >= isize_diff) { |
1386 | entry = IFIRST(header); | 1390 | entry = IFIRST(header); |
1387 | ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize | 1391 | ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize |
1388 | - new_extra_isize, (void *)raw_inode + | 1392 | - new_extra_isize, (void *)raw_inode + |
@@ -1390,8 +1394,7 @@ retry: | |||
1390 | (void *)header, total_ino, | 1394 | (void *)header, total_ino, |
1391 | inode->i_sb->s_blocksize); | 1395 | inode->i_sb->s_blocksize); |
1392 | EXT4_I(inode)->i_extra_isize = new_extra_isize; | 1396 | EXT4_I(inode)->i_extra_isize = new_extra_isize; |
1393 | error = 0; | 1397 | goto out; |
1394 | goto cleanup; | ||
1395 | } | 1398 | } |
1396 | 1399 | ||
1397 | /* | 1400 | /* |
@@ -1414,7 +1417,7 @@ retry: | |||
1414 | end = bh->b_data + bh->b_size; | 1417 | end = bh->b_data + bh->b_size; |
1415 | min_offs = end - base; | 1418 | min_offs = end - base; |
1416 | free = ext4_xattr_free_space(first, &min_offs, base, NULL); | 1419 | free = ext4_xattr_free_space(first, &min_offs, base, NULL); |
1417 | if (free < new_extra_isize) { | 1420 | if (free < isize_diff) { |
1418 | if (!tried_min_extra_isize && s_min_extra_isize) { | 1421 | if (!tried_min_extra_isize && s_min_extra_isize) { |
1419 | tried_min_extra_isize++; | 1422 | tried_min_extra_isize++; |
1420 | new_extra_isize = s_min_extra_isize; | 1423 | new_extra_isize = s_min_extra_isize; |
@@ -1428,7 +1431,7 @@ retry: | |||
1428 | free = inode->i_sb->s_blocksize; | 1431 | free = inode->i_sb->s_blocksize; |
1429 | } | 1432 | } |
1430 | 1433 | ||
1431 | while (new_extra_isize > 0) { | 1434 | while (isize_diff > 0) { |
1432 | size_t offs, size, entry_size; | 1435 | size_t offs, size, entry_size; |
1433 | struct ext4_xattr_entry *small_entry = NULL; | 1436 | struct ext4_xattr_entry *small_entry = NULL; |
1434 | struct ext4_xattr_info i = { | 1437 | struct ext4_xattr_info i = { |
@@ -1459,7 +1462,7 @@ retry: | |||
1459 | EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) + | 1462 | EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) + |
1460 | EXT4_XATTR_LEN(last->e_name_len); | 1463 | EXT4_XATTR_LEN(last->e_name_len); |
1461 | if (total_size <= free && total_size < min_total_size) { | 1464 | if (total_size <= free && total_size < min_total_size) { |
1462 | if (total_size < new_extra_isize) { | 1465 | if (total_size < isize_diff) { |
1463 | small_entry = last; | 1466 | small_entry = last; |
1464 | } else { | 1467 | } else { |
1465 | entry = last; | 1468 | entry = last; |
@@ -1514,22 +1517,22 @@ retry: | |||
1514 | error = ext4_xattr_ibody_set(handle, inode, &i, is); | 1517 | error = ext4_xattr_ibody_set(handle, inode, &i, is); |
1515 | if (error) | 1518 | if (error) |
1516 | goto cleanup; | 1519 | goto cleanup; |
1520 | total_ino -= entry_size; | ||
1517 | 1521 | ||
1518 | entry = IFIRST(header); | 1522 | entry = IFIRST(header); |
1519 | if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize) | 1523 | if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff) |
1520 | shift_bytes = new_extra_isize; | 1524 | shift_bytes = isize_diff; |
1521 | else | 1525 | else |
1522 | shift_bytes = entry_size + size; | 1526 | shift_bytes = entry_size + EXT4_XATTR_SIZE(size); |
1523 | /* Adjust the offsets and shift the remaining entries ahead */ | 1527 | /* Adjust the offsets and shift the remaining entries ahead */ |
1524 | ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize - | 1528 | ext4_xattr_shift_entries(entry, -shift_bytes, |
1525 | shift_bytes, (void *)raw_inode + | 1529 | (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + |
1526 | EXT4_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes, | 1530 | EXT4_I(inode)->i_extra_isize + shift_bytes, |
1527 | (void *)header, total_ino - entry_size, | 1531 | (void *)header, total_ino, inode->i_sb->s_blocksize); |
1528 | inode->i_sb->s_blocksize); | ||
1529 | 1532 | ||
1530 | extra_isize += shift_bytes; | 1533 | isize_diff -= shift_bytes; |
1531 | new_extra_isize -= shift_bytes; | 1534 | EXT4_I(inode)->i_extra_isize += shift_bytes; |
1532 | EXT4_I(inode)->i_extra_isize = extra_isize; | 1535 | header = IHDR(inode, raw_inode); |
1533 | 1536 | ||
1534 | i.name = b_entry_name; | 1537 | i.name = b_entry_name; |
1535 | i.value = buffer; | 1538 | i.value = buffer; |
@@ -1551,6 +1554,8 @@ retry: | |||
1551 | kfree(bs); | 1554 | kfree(bs); |
1552 | } | 1555 | } |
1553 | brelse(bh); | 1556 | brelse(bh); |
1557 | out: | ||
1558 | ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); | ||
1554 | up_write(&EXT4_I(inode)->xattr_sem); | 1559 | up_write(&EXT4_I(inode)->xattr_sem); |
1555 | return 0; | 1560 | return 0; |
1556 | 1561 | ||
@@ -1562,6 +1567,10 @@ cleanup: | |||
1562 | kfree(is); | 1567 | kfree(is); |
1563 | kfree(bs); | 1568 | kfree(bs); |
1564 | brelse(bh); | 1569 | brelse(bh); |
1570 | /* | ||
1571 | * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode | ||
1572 | * size expansion failed. | ||
1573 | */ | ||
1565 | up_write(&EXT4_I(inode)->xattr_sem); | 1574 | up_write(&EXT4_I(inode)->xattr_sem); |
1566 | return error; | 1575 | return error; |
1567 | } | 1576 | } |
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 69dd3e6566e0..a92e783fa057 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h | |||
@@ -24,6 +24,7 @@ | |||
24 | #define EXT4_XATTR_INDEX_SYSTEM 7 | 24 | #define EXT4_XATTR_INDEX_SYSTEM 7 |
25 | #define EXT4_XATTR_INDEX_RICHACL 8 | 25 | #define EXT4_XATTR_INDEX_RICHACL 8 |
26 | #define EXT4_XATTR_INDEX_ENCRYPTION 9 | 26 | #define EXT4_XATTR_INDEX_ENCRYPTION 9 |
27 | #define EXT4_XATTR_INDEX_HURD 10 /* Reserved for Hurd */ | ||
27 | 28 | ||
28 | struct ext4_xattr_header { | 29 | struct ext4_xattr_header { |
29 | __le32 h_magic; /* magic number for identification */ | 30 | __le32 h_magic; /* magic number for identification */ |
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d64d2a515cb2..ccb401eebc11 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c | |||
@@ -1699,11 +1699,11 @@ static int f2fs_write_end(struct file *file, | |||
1699 | trace_f2fs_write_end(inode, pos, len, copied); | 1699 | trace_f2fs_write_end(inode, pos, len, copied); |
1700 | 1700 | ||
1701 | set_page_dirty(page); | 1701 | set_page_dirty(page); |
1702 | f2fs_put_page(page, 1); | ||
1703 | 1702 | ||
1704 | if (pos + copied > i_size_read(inode)) | 1703 | if (pos + copied > i_size_read(inode)) |
1705 | f2fs_i_size_write(inode, pos + copied); | 1704 | f2fs_i_size_write(inode, pos + copied); |
1706 | 1705 | ||
1706 | f2fs_put_page(page, 1); | ||
1707 | f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); | 1707 | f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); |
1708 | return copied; | 1708 | return copied; |
1709 | } | 1709 | } |
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 675fa79d86f6..14f5fe2b841e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h | |||
@@ -538,7 +538,7 @@ struct f2fs_nm_info { | |||
538 | /* NAT cache management */ | 538 | /* NAT cache management */ |
539 | struct radix_tree_root nat_root;/* root of the nat entry cache */ | 539 | struct radix_tree_root nat_root;/* root of the nat entry cache */ |
540 | struct radix_tree_root nat_set_root;/* root of the nat set cache */ | 540 | struct radix_tree_root nat_set_root;/* root of the nat set cache */ |
541 | struct percpu_rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ | 541 | struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ |
542 | struct list_head nat_entries; /* cached nat entry list (clean) */ | 542 | struct list_head nat_entries; /* cached nat entry list (clean) */ |
543 | unsigned int nat_cnt; /* the # of cached nat entries */ | 543 | unsigned int nat_cnt; /* the # of cached nat entries */ |
544 | unsigned int dirty_nat_cnt; /* total num of nat entries in set */ | 544 | unsigned int dirty_nat_cnt; /* total num of nat entries in set */ |
@@ -787,7 +787,7 @@ struct f2fs_sb_info { | |||
787 | struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ | 787 | struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ |
788 | struct inode *meta_inode; /* cache meta blocks */ | 788 | struct inode *meta_inode; /* cache meta blocks */ |
789 | struct mutex cp_mutex; /* checkpoint procedure lock */ | 789 | struct mutex cp_mutex; /* checkpoint procedure lock */ |
790 | struct percpu_rw_semaphore cp_rwsem; /* blocking FS operations */ | 790 | struct rw_semaphore cp_rwsem; /* blocking FS operations */ |
791 | struct rw_semaphore node_write; /* locking node writes */ | 791 | struct rw_semaphore node_write; /* locking node writes */ |
792 | wait_queue_head_t cp_wait; | 792 | wait_queue_head_t cp_wait; |
793 | unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ | 793 | unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ |
@@ -1074,22 +1074,22 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) | |||
1074 | 1074 | ||
1075 | static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) | 1075 | static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) |
1076 | { | 1076 | { |
1077 | percpu_down_read(&sbi->cp_rwsem); | 1077 | down_read(&sbi->cp_rwsem); |
1078 | } | 1078 | } |
1079 | 1079 | ||
1080 | static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi) | 1080 | static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi) |
1081 | { | 1081 | { |
1082 | percpu_up_read(&sbi->cp_rwsem); | 1082 | up_read(&sbi->cp_rwsem); |
1083 | } | 1083 | } |
1084 | 1084 | ||
1085 | static inline void f2fs_lock_all(struct f2fs_sb_info *sbi) | 1085 | static inline void f2fs_lock_all(struct f2fs_sb_info *sbi) |
1086 | { | 1086 | { |
1087 | percpu_down_write(&sbi->cp_rwsem); | 1087 | down_write(&sbi->cp_rwsem); |
1088 | } | 1088 | } |
1089 | 1089 | ||
1090 | static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) | 1090 | static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) |
1091 | { | 1091 | { |
1092 | percpu_up_write(&sbi->cp_rwsem); | 1092 | up_write(&sbi->cp_rwsem); |
1093 | } | 1093 | } |
1094 | 1094 | ||
1095 | static inline int __get_cp_reason(struct f2fs_sb_info *sbi) | 1095 | static inline int __get_cp_reason(struct f2fs_sb_info *sbi) |
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0e493f63ea41..28f4f4cbb8d8 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c | |||
@@ -1757,21 +1757,14 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) | |||
1757 | { | 1757 | { |
1758 | struct fscrypt_policy policy; | 1758 | struct fscrypt_policy policy; |
1759 | struct inode *inode = file_inode(filp); | 1759 | struct inode *inode = file_inode(filp); |
1760 | int ret; | ||
1761 | 1760 | ||
1762 | if (copy_from_user(&policy, (struct fscrypt_policy __user *)arg, | 1761 | if (copy_from_user(&policy, (struct fscrypt_policy __user *)arg, |
1763 | sizeof(policy))) | 1762 | sizeof(policy))) |
1764 | return -EFAULT; | 1763 | return -EFAULT; |
1765 | 1764 | ||
1766 | ret = mnt_want_write_file(filp); | ||
1767 | if (ret) | ||
1768 | return ret; | ||
1769 | |||
1770 | f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); | 1765 | f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); |
1771 | ret = fscrypt_process_policy(inode, &policy); | ||
1772 | 1766 | ||
1773 | mnt_drop_write_file(filp); | 1767 | return fscrypt_process_policy(filp, &policy); |
1774 | return ret; | ||
1775 | } | 1768 | } |
1776 | 1769 | ||
1777 | static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) | 1770 | static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) |
@@ -2086,15 +2079,19 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, | |||
2086 | if (unlikely(f2fs_readonly(src->i_sb))) | 2079 | if (unlikely(f2fs_readonly(src->i_sb))) |
2087 | return -EROFS; | 2080 | return -EROFS; |
2088 | 2081 | ||
2089 | if (S_ISDIR(src->i_mode) || S_ISDIR(dst->i_mode)) | 2082 | if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode)) |
2090 | return -EISDIR; | 2083 | return -EINVAL; |
2091 | 2084 | ||
2092 | if (f2fs_encrypted_inode(src) || f2fs_encrypted_inode(dst)) | 2085 | if (f2fs_encrypted_inode(src) || f2fs_encrypted_inode(dst)) |
2093 | return -EOPNOTSUPP; | 2086 | return -EOPNOTSUPP; |
2094 | 2087 | ||
2095 | inode_lock(src); | 2088 | inode_lock(src); |
2096 | if (src != dst) | 2089 | if (src != dst) { |
2097 | inode_lock(dst); | 2090 | if (!inode_trylock(dst)) { |
2091 | ret = -EBUSY; | ||
2092 | goto out; | ||
2093 | } | ||
2094 | } | ||
2098 | 2095 | ||
2099 | ret = -EINVAL; | 2096 | ret = -EINVAL; |
2100 | if (pos_in + len > src->i_size || pos_in + len < pos_in) | 2097 | if (pos_in + len > src->i_size || pos_in + len < pos_in) |
@@ -2152,6 +2149,7 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, | |||
2152 | out_unlock: | 2149 | out_unlock: |
2153 | if (src != dst) | 2150 | if (src != dst) |
2154 | inode_unlock(dst); | 2151 | inode_unlock(dst); |
2152 | out: | ||
2155 | inode_unlock(src); | 2153 | inode_unlock(src); |
2156 | return ret; | 2154 | return ret; |
2157 | } | 2155 | } |
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b2fa4b615925..f75d197d5beb 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c | |||
@@ -206,14 +206,14 @@ int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) | |||
206 | struct nat_entry *e; | 206 | struct nat_entry *e; |
207 | bool need = false; | 207 | bool need = false; |
208 | 208 | ||
209 | percpu_down_read(&nm_i->nat_tree_lock); | 209 | down_read(&nm_i->nat_tree_lock); |
210 | e = __lookup_nat_cache(nm_i, nid); | 210 | e = __lookup_nat_cache(nm_i, nid); |
211 | if (e) { | 211 | if (e) { |
212 | if (!get_nat_flag(e, IS_CHECKPOINTED) && | 212 | if (!get_nat_flag(e, IS_CHECKPOINTED) && |
213 | !get_nat_flag(e, HAS_FSYNCED_INODE)) | 213 | !get_nat_flag(e, HAS_FSYNCED_INODE)) |
214 | need = true; | 214 | need = true; |
215 | } | 215 | } |
216 | percpu_up_read(&nm_i->nat_tree_lock); | 216 | up_read(&nm_i->nat_tree_lock); |
217 | return need; | 217 | return need; |
218 | } | 218 | } |
219 | 219 | ||
@@ -223,11 +223,11 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) | |||
223 | struct nat_entry *e; | 223 | struct nat_entry *e; |
224 | bool is_cp = true; | 224 | bool is_cp = true; |
225 | 225 | ||
226 | percpu_down_read(&nm_i->nat_tree_lock); | 226 | down_read(&nm_i->nat_tree_lock); |
227 | e = __lookup_nat_cache(nm_i, nid); | 227 | e = __lookup_nat_cache(nm_i, nid); |
228 | if (e && !get_nat_flag(e, IS_CHECKPOINTED)) | 228 | if (e && !get_nat_flag(e, IS_CHECKPOINTED)) |
229 | is_cp = false; | 229 | is_cp = false; |
230 | percpu_up_read(&nm_i->nat_tree_lock); | 230 | up_read(&nm_i->nat_tree_lock); |
231 | return is_cp; | 231 | return is_cp; |
232 | } | 232 | } |
233 | 233 | ||
@@ -237,13 +237,13 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) | |||
237 | struct nat_entry *e; | 237 | struct nat_entry *e; |
238 | bool need_update = true; | 238 | bool need_update = true; |
239 | 239 | ||
240 | percpu_down_read(&nm_i->nat_tree_lock); | 240 | down_read(&nm_i->nat_tree_lock); |
241 | e = __lookup_nat_cache(nm_i, ino); | 241 | e = __lookup_nat_cache(nm_i, ino); |
242 | if (e && get_nat_flag(e, HAS_LAST_FSYNC) && | 242 | if (e && get_nat_flag(e, HAS_LAST_FSYNC) && |
243 | (get_nat_flag(e, IS_CHECKPOINTED) || | 243 | (get_nat_flag(e, IS_CHECKPOINTED) || |
244 | get_nat_flag(e, HAS_FSYNCED_INODE))) | 244 | get_nat_flag(e, HAS_FSYNCED_INODE))) |
245 | need_update = false; | 245 | need_update = false; |
246 | percpu_up_read(&nm_i->nat_tree_lock); | 246 | up_read(&nm_i->nat_tree_lock); |
247 | return need_update; | 247 | return need_update; |
248 | } | 248 | } |
249 | 249 | ||
@@ -284,7 +284,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, | |||
284 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 284 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
285 | struct nat_entry *e; | 285 | struct nat_entry *e; |
286 | 286 | ||
287 | percpu_down_write(&nm_i->nat_tree_lock); | 287 | down_write(&nm_i->nat_tree_lock); |
288 | e = __lookup_nat_cache(nm_i, ni->nid); | 288 | e = __lookup_nat_cache(nm_i, ni->nid); |
289 | if (!e) { | 289 | if (!e) { |
290 | e = grab_nat_entry(nm_i, ni->nid); | 290 | e = grab_nat_entry(nm_i, ni->nid); |
@@ -334,7 +334,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, | |||
334 | set_nat_flag(e, HAS_FSYNCED_INODE, true); | 334 | set_nat_flag(e, HAS_FSYNCED_INODE, true); |
335 | set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); | 335 | set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); |
336 | } | 336 | } |
337 | percpu_up_write(&nm_i->nat_tree_lock); | 337 | up_write(&nm_i->nat_tree_lock); |
338 | } | 338 | } |
339 | 339 | ||
340 | int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) | 340 | int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) |
@@ -342,7 +342,8 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) | |||
342 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 342 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
343 | int nr = nr_shrink; | 343 | int nr = nr_shrink; |
344 | 344 | ||
345 | percpu_down_write(&nm_i->nat_tree_lock); | 345 | if (!down_write_trylock(&nm_i->nat_tree_lock)) |
346 | return 0; | ||
346 | 347 | ||
347 | while (nr_shrink && !list_empty(&nm_i->nat_entries)) { | 348 | while (nr_shrink && !list_empty(&nm_i->nat_entries)) { |
348 | struct nat_entry *ne; | 349 | struct nat_entry *ne; |
@@ -351,7 +352,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) | |||
351 | __del_from_nat_cache(nm_i, ne); | 352 | __del_from_nat_cache(nm_i, ne); |
352 | nr_shrink--; | 353 | nr_shrink--; |
353 | } | 354 | } |
354 | percpu_up_write(&nm_i->nat_tree_lock); | 355 | up_write(&nm_i->nat_tree_lock); |
355 | return nr - nr_shrink; | 356 | return nr - nr_shrink; |
356 | } | 357 | } |
357 | 358 | ||
@@ -373,13 +374,13 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) | |||
373 | ni->nid = nid; | 374 | ni->nid = nid; |
374 | 375 | ||
375 | /* Check nat cache */ | 376 | /* Check nat cache */ |
376 | percpu_down_read(&nm_i->nat_tree_lock); | 377 | down_read(&nm_i->nat_tree_lock); |
377 | e = __lookup_nat_cache(nm_i, nid); | 378 | e = __lookup_nat_cache(nm_i, nid); |
378 | if (e) { | 379 | if (e) { |
379 | ni->ino = nat_get_ino(e); | 380 | ni->ino = nat_get_ino(e); |
380 | ni->blk_addr = nat_get_blkaddr(e); | 381 | ni->blk_addr = nat_get_blkaddr(e); |
381 | ni->version = nat_get_version(e); | 382 | ni->version = nat_get_version(e); |
382 | percpu_up_read(&nm_i->nat_tree_lock); | 383 | up_read(&nm_i->nat_tree_lock); |
383 | return; | 384 | return; |
384 | } | 385 | } |
385 | 386 | ||
@@ -403,11 +404,11 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) | |||
403 | node_info_from_raw_nat(ni, &ne); | 404 | node_info_from_raw_nat(ni, &ne); |
404 | f2fs_put_page(page, 1); | 405 | f2fs_put_page(page, 1); |
405 | cache: | 406 | cache: |
406 | percpu_up_read(&nm_i->nat_tree_lock); | 407 | up_read(&nm_i->nat_tree_lock); |
407 | /* cache nat entry */ | 408 | /* cache nat entry */ |
408 | percpu_down_write(&nm_i->nat_tree_lock); | 409 | down_write(&nm_i->nat_tree_lock); |
409 | cache_nat_entry(sbi, nid, &ne); | 410 | cache_nat_entry(sbi, nid, &ne); |
410 | percpu_up_write(&nm_i->nat_tree_lock); | 411 | up_write(&nm_i->nat_tree_lock); |
411 | } | 412 | } |
412 | 413 | ||
413 | /* | 414 | /* |
@@ -1788,7 +1789,7 @@ void build_free_nids(struct f2fs_sb_info *sbi) | |||
1788 | ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, | 1789 | ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, |
1789 | META_NAT, true); | 1790 | META_NAT, true); |
1790 | 1791 | ||
1791 | percpu_down_read(&nm_i->nat_tree_lock); | 1792 | down_read(&nm_i->nat_tree_lock); |
1792 | 1793 | ||
1793 | while (1) { | 1794 | while (1) { |
1794 | struct page *page = get_current_nat_page(sbi, nid); | 1795 | struct page *page = get_current_nat_page(sbi, nid); |
@@ -1820,7 +1821,7 @@ void build_free_nids(struct f2fs_sb_info *sbi) | |||
1820 | remove_free_nid(nm_i, nid); | 1821 | remove_free_nid(nm_i, nid); |
1821 | } | 1822 | } |
1822 | up_read(&curseg->journal_rwsem); | 1823 | up_read(&curseg->journal_rwsem); |
1823 | percpu_up_read(&nm_i->nat_tree_lock); | 1824 | up_read(&nm_i->nat_tree_lock); |
1824 | 1825 | ||
1825 | ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), | 1826 | ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), |
1826 | nm_i->ra_nid_pages, META_NAT, false); | 1827 | nm_i->ra_nid_pages, META_NAT, false); |
@@ -2209,7 +2210,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) | |||
2209 | if (!nm_i->dirty_nat_cnt) | 2210 | if (!nm_i->dirty_nat_cnt) |
2210 | return; | 2211 | return; |
2211 | 2212 | ||
2212 | percpu_down_write(&nm_i->nat_tree_lock); | 2213 | down_write(&nm_i->nat_tree_lock); |
2213 | 2214 | ||
2214 | /* | 2215 | /* |
2215 | * if there are no enough space in journal to store dirty nat | 2216 | * if there are no enough space in journal to store dirty nat |
@@ -2232,7 +2233,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) | |||
2232 | list_for_each_entry_safe(set, tmp, &sets, set_list) | 2233 | list_for_each_entry_safe(set, tmp, &sets, set_list) |
2233 | __flush_nat_entry_set(sbi, set); | 2234 | __flush_nat_entry_set(sbi, set); |
2234 | 2235 | ||
2235 | percpu_up_write(&nm_i->nat_tree_lock); | 2236 | up_write(&nm_i->nat_tree_lock); |
2236 | 2237 | ||
2237 | f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); | 2238 | f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); |
2238 | } | 2239 | } |
@@ -2268,8 +2269,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) | |||
2268 | 2269 | ||
2269 | mutex_init(&nm_i->build_lock); | 2270 | mutex_init(&nm_i->build_lock); |
2270 | spin_lock_init(&nm_i->free_nid_list_lock); | 2271 | spin_lock_init(&nm_i->free_nid_list_lock); |
2271 | if (percpu_init_rwsem(&nm_i->nat_tree_lock)) | 2272 | init_rwsem(&nm_i->nat_tree_lock); |
2272 | return -ENOMEM; | ||
2273 | 2273 | ||
2274 | nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); | 2274 | nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); |
2275 | nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); | 2275 | nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); |
@@ -2326,7 +2326,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) | |||
2326 | spin_unlock(&nm_i->free_nid_list_lock); | 2326 | spin_unlock(&nm_i->free_nid_list_lock); |
2327 | 2327 | ||
2328 | /* destroy nat cache */ | 2328 | /* destroy nat cache */ |
2329 | percpu_down_write(&nm_i->nat_tree_lock); | 2329 | down_write(&nm_i->nat_tree_lock); |
2330 | while ((found = __gang_lookup_nat_cache(nm_i, | 2330 | while ((found = __gang_lookup_nat_cache(nm_i, |
2331 | nid, NATVEC_SIZE, natvec))) { | 2331 | nid, NATVEC_SIZE, natvec))) { |
2332 | unsigned idx; | 2332 | unsigned idx; |
@@ -2351,9 +2351,8 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) | |||
2351 | kmem_cache_free(nat_entry_set_slab, setvec[idx]); | 2351 | kmem_cache_free(nat_entry_set_slab, setvec[idx]); |
2352 | } | 2352 | } |
2353 | } | 2353 | } |
2354 | percpu_up_write(&nm_i->nat_tree_lock); | 2354 | up_write(&nm_i->nat_tree_lock); |
2355 | 2355 | ||
2356 | percpu_free_rwsem(&nm_i->nat_tree_lock); | ||
2357 | kfree(nm_i->nat_bitmap); | 2356 | kfree(nm_i->nat_bitmap); |
2358 | sbi->nm_info = NULL; | 2357 | sbi->nm_info = NULL; |
2359 | kfree(nm_i); | 2358 | kfree(nm_i); |
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1b86d3f638ef..7f863a645ab1 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c | |||
@@ -706,8 +706,6 @@ static void destroy_percpu_info(struct f2fs_sb_info *sbi) | |||
706 | percpu_counter_destroy(&sbi->nr_pages[i]); | 706 | percpu_counter_destroy(&sbi->nr_pages[i]); |
707 | percpu_counter_destroy(&sbi->alloc_valid_block_count); | 707 | percpu_counter_destroy(&sbi->alloc_valid_block_count); |
708 | percpu_counter_destroy(&sbi->total_valid_inode_count); | 708 | percpu_counter_destroy(&sbi->total_valid_inode_count); |
709 | |||
710 | percpu_free_rwsem(&sbi->cp_rwsem); | ||
711 | } | 709 | } |
712 | 710 | ||
713 | static void f2fs_put_super(struct super_block *sb) | 711 | static void f2fs_put_super(struct super_block *sb) |
@@ -1483,9 +1481,6 @@ static int init_percpu_info(struct f2fs_sb_info *sbi) | |||
1483 | { | 1481 | { |
1484 | int i, err; | 1482 | int i, err; |
1485 | 1483 | ||
1486 | if (percpu_init_rwsem(&sbi->cp_rwsem)) | ||
1487 | return -ENOMEM; | ||
1488 | |||
1489 | for (i = 0; i < NR_COUNT_TYPE; i++) { | 1484 | for (i = 0; i < NR_COUNT_TYPE; i++) { |
1490 | err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL); | 1485 | err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL); |
1491 | if (err) | 1486 | if (err) |
@@ -1686,6 +1681,7 @@ try_onemore: | |||
1686 | sbi->write_io[i].bio = NULL; | 1681 | sbi->write_io[i].bio = NULL; |
1687 | } | 1682 | } |
1688 | 1683 | ||
1684 | init_rwsem(&sbi->cp_rwsem); | ||
1689 | init_waitqueue_head(&sbi->cp_wait); | 1685 | init_waitqueue_head(&sbi->cp_wait); |
1690 | init_sb_info(sbi); | 1686 | init_sb_info(sbi); |
1691 | 1687 | ||
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 4d09d4441e3e..05713a5da083 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -1949,6 +1949,12 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason) | |||
1949 | { | 1949 | { |
1950 | struct backing_dev_info *bdi; | 1950 | struct backing_dev_info *bdi; |
1951 | 1951 | ||
1952 | /* | ||
1953 | * If we are expecting writeback progress we must submit plugged IO. | ||
1954 | */ | ||
1955 | if (blk_needs_flush_plug(current)) | ||
1956 | blk_schedule_flush_plug(current); | ||
1957 | |||
1952 | if (!nr_pages) | 1958 | if (!nr_pages) |
1953 | nr_pages = get_nr_dirty_pages(); | 1959 | nr_pages = get_nr_dirty_pages(); |
1954 | 1960 | ||
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f394aff59c36..3988b43c2f5a 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -530,13 +530,13 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos, | |||
530 | req->out.args[0].size = count; | 530 | req->out.args[0].size = count; |
531 | } | 531 | } |
532 | 532 | ||
533 | static void fuse_release_user_pages(struct fuse_req *req, int write) | 533 | static void fuse_release_user_pages(struct fuse_req *req, bool should_dirty) |
534 | { | 534 | { |
535 | unsigned i; | 535 | unsigned i; |
536 | 536 | ||
537 | for (i = 0; i < req->num_pages; i++) { | 537 | for (i = 0; i < req->num_pages; i++) { |
538 | struct page *page = req->pages[i]; | 538 | struct page *page = req->pages[i]; |
539 | if (write) | 539 | if (should_dirty) |
540 | set_page_dirty_lock(page); | 540 | set_page_dirty_lock(page); |
541 | put_page(page); | 541 | put_page(page); |
542 | } | 542 | } |
@@ -1320,6 +1320,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, | |||
1320 | loff_t *ppos, int flags) | 1320 | loff_t *ppos, int flags) |
1321 | { | 1321 | { |
1322 | int write = flags & FUSE_DIO_WRITE; | 1322 | int write = flags & FUSE_DIO_WRITE; |
1323 | bool should_dirty = !write && iter_is_iovec(iter); | ||
1323 | int cuse = flags & FUSE_DIO_CUSE; | 1324 | int cuse = flags & FUSE_DIO_CUSE; |
1324 | struct file *file = io->file; | 1325 | struct file *file = io->file; |
1325 | struct inode *inode = file->f_mapping->host; | 1326 | struct inode *inode = file->f_mapping->host; |
@@ -1363,7 +1364,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, | |||
1363 | nres = fuse_send_read(req, io, pos, nbytes, owner); | 1364 | nres = fuse_send_read(req, io, pos, nbytes, owner); |
1364 | 1365 | ||
1365 | if (!io->async) | 1366 | if (!io->async) |
1366 | fuse_release_user_pages(req, !write); | 1367 | fuse_release_user_pages(req, should_dirty); |
1367 | if (req->out.h.error) { | 1368 | if (req->out.h.error) { |
1368 | err = req->out.h.error; | 1369 | err = req->out.h.error; |
1369 | break; | 1370 | break; |
diff --git a/fs/ioctl.c b/fs/ioctl.c index 0f56deb24ce6..c415668c86d4 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c | |||
@@ -568,7 +568,7 @@ static int ioctl_fsthaw(struct file *filp) | |||
568 | return thaw_super(sb); | 568 | return thaw_super(sb); |
569 | } | 569 | } |
570 | 570 | ||
571 | static long ioctl_file_dedupe_range(struct file *file, void __user *arg) | 571 | static int ioctl_file_dedupe_range(struct file *file, void __user *arg) |
572 | { | 572 | { |
573 | struct file_dedupe_range __user *argp = arg; | 573 | struct file_dedupe_range __user *argp = arg; |
574 | struct file_dedupe_range *same = NULL; | 574 | struct file_dedupe_range *same = NULL; |
@@ -582,6 +582,10 @@ static long ioctl_file_dedupe_range(struct file *file, void __user *arg) | |||
582 | } | 582 | } |
583 | 583 | ||
584 | size = offsetof(struct file_dedupe_range __user, info[count]); | 584 | size = offsetof(struct file_dedupe_range __user, info[count]); |
585 | if (size > PAGE_SIZE) { | ||
586 | ret = -ENOMEM; | ||
587 | goto out; | ||
588 | } | ||
585 | 589 | ||
586 | same = memdup_user(argp, size); | 590 | same = memdup_user(argp, size); |
587 | if (IS_ERR(same)) { | 591 | if (IS_ERR(same)) { |
diff --git a/fs/iomap.c b/fs/iomap.c index 48141b8eff5f..706270f21b35 100644 --- a/fs/iomap.c +++ b/fs/iomap.c | |||
@@ -84,8 +84,11 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags, | |||
84 | * Now the data has been copied, commit the range we've copied. This | 84 | * Now the data has been copied, commit the range we've copied. This |
85 | * should not fail unless the filesystem has had a fatal error. | 85 | * should not fail unless the filesystem has had a fatal error. |
86 | */ | 86 | */ |
87 | ret = ops->iomap_end(inode, pos, length, written > 0 ? written : 0, | 87 | if (ops->iomap_end) { |
88 | flags, &iomap); | 88 | ret = ops->iomap_end(inode, pos, length, |
89 | written > 0 ? written : 0, | ||
90 | flags, &iomap); | ||
91 | } | ||
89 | 92 | ||
90 | return written ? written : ret; | 93 | return written ? written : ret; |
91 | } | 94 | } |
@@ -194,12 +197,9 @@ again: | |||
194 | if (mapping_writably_mapped(inode->i_mapping)) | 197 | if (mapping_writably_mapped(inode->i_mapping)) |
195 | flush_dcache_page(page); | 198 | flush_dcache_page(page); |
196 | 199 | ||
197 | pagefault_disable(); | ||
198 | copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); | 200 | copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); |
199 | pagefault_enable(); | ||
200 | 201 | ||
201 | flush_dcache_page(page); | 202 | flush_dcache_page(page); |
202 | mark_page_accessed(page); | ||
203 | 203 | ||
204 | status = iomap_write_end(inode, pos, bytes, copied, page); | 204 | status = iomap_write_end(inode, pos, bytes, copied, page); |
205 | if (unlikely(status < 0)) | 205 | if (unlikely(status < 0)) |
@@ -428,9 +428,12 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi, | |||
428 | break; | 428 | break; |
429 | } | 429 | } |
430 | 430 | ||
431 | if (iomap->flags & IOMAP_F_MERGED) | ||
432 | flags |= FIEMAP_EXTENT_MERGED; | ||
433 | |||
431 | return fiemap_fill_next_extent(fi, iomap->offset, | 434 | return fiemap_fill_next_extent(fi, iomap->offset, |
432 | iomap->blkno != IOMAP_NULL_BLOCK ? iomap->blkno << 9: 0, | 435 | iomap->blkno != IOMAP_NULL_BLOCK ? iomap->blkno << 9: 0, |
433 | iomap->length, flags | FIEMAP_EXTENT_MERGED); | 436 | iomap->length, flags); |
434 | 437 | ||
435 | } | 438 | } |
436 | 439 | ||
@@ -470,13 +473,18 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi, | |||
470 | if (ret) | 473 | if (ret) |
471 | return ret; | 474 | return ret; |
472 | 475 | ||
473 | ret = filemap_write_and_wait(inode->i_mapping); | 476 | if (fi->fi_flags & FIEMAP_FLAG_SYNC) { |
474 | if (ret) | 477 | ret = filemap_write_and_wait(inode->i_mapping); |
475 | return ret; | 478 | if (ret) |
479 | return ret; | ||
480 | } | ||
476 | 481 | ||
477 | while (len > 0) { | 482 | while (len > 0) { |
478 | ret = iomap_apply(inode, start, len, 0, ops, &ctx, | 483 | ret = iomap_apply(inode, start, len, 0, ops, &ctx, |
479 | iomap_fiemap_actor); | 484 | iomap_fiemap_actor); |
485 | /* inode with no (attribute) mapping will give ENOENT */ | ||
486 | if (ret == -ENOENT) | ||
487 | break; | ||
480 | if (ret < 0) | 488 | if (ret < 0) |
481 | return ret; | 489 | return ret; |
482 | if (ret == 0) | 490 | if (ret == 0) |
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index e1574008adc9..2bcb86e6e6ca 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c | |||
@@ -840,21 +840,35 @@ repeat: | |||
840 | mutex_lock(&kernfs_mutex); | 840 | mutex_lock(&kernfs_mutex); |
841 | 841 | ||
842 | list_for_each_entry(info, &kernfs_root(kn)->supers, node) { | 842 | list_for_each_entry(info, &kernfs_root(kn)->supers, node) { |
843 | struct kernfs_node *parent; | ||
843 | struct inode *inode; | 844 | struct inode *inode; |
844 | struct dentry *dentry; | ||
845 | 845 | ||
846 | /* | ||
847 | * We want fsnotify_modify() on @kn but as the | ||
848 | * modifications aren't originating from userland don't | ||
849 | * have the matching @file available. Look up the inodes | ||
850 | * and generate the events manually. | ||
851 | */ | ||
846 | inode = ilookup(info->sb, kn->ino); | 852 | inode = ilookup(info->sb, kn->ino); |
847 | if (!inode) | 853 | if (!inode) |
848 | continue; | 854 | continue; |
849 | 855 | ||
850 | dentry = d_find_any_alias(inode); | 856 | parent = kernfs_get_parent(kn); |
851 | if (dentry) { | 857 | if (parent) { |
852 | fsnotify_parent(NULL, dentry, FS_MODIFY); | 858 | struct inode *p_inode; |
853 | fsnotify(inode, FS_MODIFY, inode, FSNOTIFY_EVENT_INODE, | 859 | |
854 | NULL, 0); | 860 | p_inode = ilookup(info->sb, parent->ino); |
855 | dput(dentry); | 861 | if (p_inode) { |
862 | fsnotify(p_inode, FS_MODIFY | FS_EVENT_ON_CHILD, | ||
863 | inode, FSNOTIFY_EVENT_INODE, kn->name, 0); | ||
864 | iput(p_inode); | ||
865 | } | ||
866 | |||
867 | kernfs_put(parent); | ||
856 | } | 868 | } |
857 | 869 | ||
870 | fsnotify(inode, FS_MODIFY, inode, FSNOTIFY_EVENT_INODE, | ||
871 | kn->name, 0); | ||
858 | iput(inode); | 872 | iput(inode); |
859 | } | 873 | } |
860 | 874 | ||
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index f55a4e756047..217847679f0e 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
@@ -346,7 +346,7 @@ static void bl_write_cleanup(struct work_struct *work) | |||
346 | PAGE_SIZE - 1) & (loff_t)PAGE_MASK; | 346 | PAGE_SIZE - 1) & (loff_t)PAGE_MASK; |
347 | 347 | ||
348 | ext_tree_mark_written(bl, start >> SECTOR_SHIFT, | 348 | ext_tree_mark_written(bl, start >> SECTOR_SHIFT, |
349 | (end - start) >> SECTOR_SHIFT); | 349 | (end - start) >> SECTOR_SHIFT, end); |
350 | } | 350 | } |
351 | 351 | ||
352 | pnfs_ld_write_done(hdr); | 352 | pnfs_ld_write_done(hdr); |
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 18e6fd0b9506..efc007f00742 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h | |||
@@ -141,6 +141,7 @@ struct pnfs_block_layout { | |||
141 | struct rb_root bl_ext_ro; | 141 | struct rb_root bl_ext_ro; |
142 | spinlock_t bl_ext_lock; /* Protects list manipulation */ | 142 | spinlock_t bl_ext_lock; /* Protects list manipulation */ |
143 | bool bl_scsi_layout; | 143 | bool bl_scsi_layout; |
144 | u64 bl_lwb; | ||
144 | }; | 145 | }; |
145 | 146 | ||
146 | static inline struct pnfs_block_layout * | 147 | static inline struct pnfs_block_layout * |
@@ -182,7 +183,7 @@ int ext_tree_insert(struct pnfs_block_layout *bl, | |||
182 | int ext_tree_remove(struct pnfs_block_layout *bl, bool rw, sector_t start, | 183 | int ext_tree_remove(struct pnfs_block_layout *bl, bool rw, sector_t start, |
183 | sector_t end); | 184 | sector_t end); |
184 | int ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start, | 185 | int ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start, |
185 | sector_t len); | 186 | sector_t len, u64 lwb); |
186 | bool ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect, | 187 | bool ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect, |
187 | struct pnfs_block_extent *ret, bool rw); | 188 | struct pnfs_block_extent *ret, bool rw); |
188 | int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg); | 189 | int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg); |
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c index 992bcb19c11e..c85fbfd2d0d9 100644 --- a/fs/nfs/blocklayout/extent_tree.c +++ b/fs/nfs/blocklayout/extent_tree.c | |||
@@ -402,7 +402,7 @@ ext_tree_split(struct rb_root *root, struct pnfs_block_extent *be, | |||
402 | 402 | ||
403 | int | 403 | int |
404 | ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start, | 404 | ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start, |
405 | sector_t len) | 405 | sector_t len, u64 lwb) |
406 | { | 406 | { |
407 | struct rb_root *root = &bl->bl_ext_rw; | 407 | struct rb_root *root = &bl->bl_ext_rw; |
408 | sector_t end = start + len; | 408 | sector_t end = start + len; |
@@ -471,6 +471,8 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start, | |||
471 | } | 471 | } |
472 | } | 472 | } |
473 | out: | 473 | out: |
474 | if (bl->bl_lwb < lwb) | ||
475 | bl->bl_lwb = lwb; | ||
474 | spin_unlock(&bl->bl_ext_lock); | 476 | spin_unlock(&bl->bl_ext_lock); |
475 | 477 | ||
476 | __ext_put_deviceids(&tmp); | 478 | __ext_put_deviceids(&tmp); |
@@ -518,7 +520,7 @@ static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p) | |||
518 | } | 520 | } |
519 | 521 | ||
520 | static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, | 522 | static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, |
521 | size_t buffer_size, size_t *count) | 523 | size_t buffer_size, size_t *count, __u64 *lastbyte) |
522 | { | 524 | { |
523 | struct pnfs_block_extent *be; | 525 | struct pnfs_block_extent *be; |
524 | int ret = 0; | 526 | int ret = 0; |
@@ -542,6 +544,8 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, | |||
542 | p = encode_block_extent(be, p); | 544 | p = encode_block_extent(be, p); |
543 | be->be_tag = EXTENT_COMMITTING; | 545 | be->be_tag = EXTENT_COMMITTING; |
544 | } | 546 | } |
547 | *lastbyte = bl->bl_lwb - 1; | ||
548 | bl->bl_lwb = 0; | ||
545 | spin_unlock(&bl->bl_ext_lock); | 549 | spin_unlock(&bl->bl_ext_lock); |
546 | 550 | ||
547 | return ret; | 551 | return ret; |
@@ -564,7 +568,7 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) | |||
564 | arg->layoutupdate_pages = &arg->layoutupdate_page; | 568 | arg->layoutupdate_pages = &arg->layoutupdate_page; |
565 | 569 | ||
566 | retry: | 570 | retry: |
567 | ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count); | 571 | ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count, &arg->lastbytewritten); |
568 | if (unlikely(ret)) { | 572 | if (unlikely(ret)) { |
569 | ext_tree_free_commitdata(arg, buffer_size); | 573 | ext_tree_free_commitdata(arg, buffer_size); |
570 | 574 | ||
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index a7f2e6e33305..52a28311e2a4 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -275,6 +275,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, | |||
275 | err_socks: | 275 | err_socks: |
276 | svc_rpcb_cleanup(serv, net); | 276 | svc_rpcb_cleanup(serv, net); |
277 | err_bind: | 277 | err_bind: |
278 | nn->cb_users[minorversion]--; | ||
278 | dprintk("NFS: Couldn't create callback socket: err = %d; " | 279 | dprintk("NFS: Couldn't create callback socket: err = %d; " |
279 | "net = %p\n", ret, net); | 280 | "net = %p\n", ret, net); |
280 | return ret; | 281 | return ret; |
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index c92a75e066a6..f953ef6b2f2e 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -454,11 +454,8 @@ static bool referring_call_exists(struct nfs_client *clp, | |||
454 | ((u32 *)&rclist->rcl_sessionid.data)[3], | 454 | ((u32 *)&rclist->rcl_sessionid.data)[3], |
455 | ref->rc_sequenceid, ref->rc_slotid); | 455 | ref->rc_sequenceid, ref->rc_slotid); |
456 | 456 | ||
457 | spin_lock(&tbl->slot_tbl_lock); | 457 | status = nfs4_slot_wait_on_seqid(tbl, ref->rc_slotid, |
458 | status = (test_bit(ref->rc_slotid, tbl->used_slots) && | 458 | ref->rc_sequenceid, HZ >> 1) < 0; |
459 | tbl->slots[ref->rc_slotid].seq_nr == | ||
460 | ref->rc_sequenceid); | ||
461 | spin_unlock(&tbl->slot_tbl_lock); | ||
462 | if (status) | 459 | if (status) |
463 | goto out; | 460 | goto out; |
464 | } | 461 | } |
@@ -487,7 +484,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, | |||
487 | goto out; | 484 | goto out; |
488 | 485 | ||
489 | tbl = &clp->cl_session->bc_slot_table; | 486 | tbl = &clp->cl_session->bc_slot_table; |
490 | slot = tbl->slots + args->csa_slotid; | ||
491 | 487 | ||
492 | /* Set up res before grabbing the spinlock */ | 488 | /* Set up res before grabbing the spinlock */ |
493 | memcpy(&res->csr_sessionid, &args->csa_sessionid, | 489 | memcpy(&res->csr_sessionid, &args->csa_sessionid, |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 003ebce4bbc4..1e106780a237 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -426,7 +426,7 @@ EXPORT_SYMBOL_GPL(nfs_mark_client_ready); | |||
426 | * Initialise the timeout values for a connection | 426 | * Initialise the timeout values for a connection |
427 | */ | 427 | */ |
428 | void nfs_init_timeout_values(struct rpc_timeout *to, int proto, | 428 | void nfs_init_timeout_values(struct rpc_timeout *to, int proto, |
429 | unsigned int timeo, unsigned int retrans) | 429 | int timeo, int retrans) |
430 | { | 430 | { |
431 | to->to_initval = timeo * HZ / 10; | 431 | to->to_initval = timeo * HZ / 10; |
432 | to->to_retries = retrans; | 432 | to->to_retries = retrans; |
@@ -434,9 +434,9 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, | |||
434 | switch (proto) { | 434 | switch (proto) { |
435 | case XPRT_TRANSPORT_TCP: | 435 | case XPRT_TRANSPORT_TCP: |
436 | case XPRT_TRANSPORT_RDMA: | 436 | case XPRT_TRANSPORT_RDMA: |
437 | if (to->to_retries == 0) | 437 | if (retrans == NFS_UNSPEC_RETRANS) |
438 | to->to_retries = NFS_DEF_TCP_RETRANS; | 438 | to->to_retries = NFS_DEF_TCP_RETRANS; |
439 | if (to->to_initval == 0) | 439 | if (timeo == NFS_UNSPEC_TIMEO || to->to_retries == 0) |
440 | to->to_initval = NFS_DEF_TCP_TIMEO * HZ / 10; | 440 | to->to_initval = NFS_DEF_TCP_TIMEO * HZ / 10; |
441 | if (to->to_initval > NFS_MAX_TCP_TIMEOUT) | 441 | if (to->to_initval > NFS_MAX_TCP_TIMEOUT) |
442 | to->to_initval = NFS_MAX_TCP_TIMEOUT; | 442 | to->to_initval = NFS_MAX_TCP_TIMEOUT; |
@@ -449,9 +449,9 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, | |||
449 | to->to_exponential = 0; | 449 | to->to_exponential = 0; |
450 | break; | 450 | break; |
451 | case XPRT_TRANSPORT_UDP: | 451 | case XPRT_TRANSPORT_UDP: |
452 | if (to->to_retries == 0) | 452 | if (retrans == NFS_UNSPEC_RETRANS) |
453 | to->to_retries = NFS_DEF_UDP_RETRANS; | 453 | to->to_retries = NFS_DEF_UDP_RETRANS; |
454 | if (!to->to_initval) | 454 | if (timeo == NFS_UNSPEC_TIMEO || to->to_initval == 0) |
455 | to->to_initval = NFS_DEF_UDP_TIMEO * HZ / 10; | 455 | to->to_initval = NFS_DEF_UDP_TIMEO * HZ / 10; |
456 | if (to->to_initval > NFS_MAX_UDP_TIMEOUT) | 456 | if (to->to_initval > NFS_MAX_UDP_TIMEOUT) |
457 | to->to_initval = NFS_MAX_UDP_TIMEOUT; | 457 | to->to_initval = NFS_MAX_UDP_TIMEOUT; |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 7d620970f2e1..ca699ddc11c1 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -657,7 +657,10 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) | |||
657 | if (result <= 0) | 657 | if (result <= 0) |
658 | goto out; | 658 | goto out; |
659 | 659 | ||
660 | written = generic_write_sync(iocb, result); | 660 | result = generic_write_sync(iocb, result); |
661 | if (result < 0) | ||
662 | goto out; | ||
663 | written = result; | ||
661 | iocb->ki_pos += written; | 664 | iocb->ki_pos += written; |
662 | 665 | ||
663 | /* Return error values */ | 666 | /* Return error values */ |
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index e6206eaf2bdf..51b51369704c 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c | |||
@@ -37,6 +37,7 @@ ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) | |||
37 | if (ffl) { | 37 | if (ffl) { |
38 | INIT_LIST_HEAD(&ffl->error_list); | 38 | INIT_LIST_HEAD(&ffl->error_list); |
39 | INIT_LIST_HEAD(&ffl->mirrors); | 39 | INIT_LIST_HEAD(&ffl->mirrors); |
40 | ffl->last_report_time = ktime_get(); | ||
40 | return &ffl->generic_hdr; | 41 | return &ffl->generic_hdr; |
41 | } else | 42 | } else |
42 | return NULL; | 43 | return NULL; |
@@ -640,19 +641,18 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, | |||
640 | { | 641 | { |
641 | static const ktime_t notime = {0}; | 642 | static const ktime_t notime = {0}; |
642 | s64 report_interval = FF_LAYOUTSTATS_REPORT_INTERVAL; | 643 | s64 report_interval = FF_LAYOUTSTATS_REPORT_INTERVAL; |
644 | struct nfs4_flexfile_layout *ffl = FF_LAYOUT_FROM_HDR(mirror->layout); | ||
643 | 645 | ||
644 | nfs4_ff_start_busy_timer(&layoutstat->busy_timer, now); | 646 | nfs4_ff_start_busy_timer(&layoutstat->busy_timer, now); |
645 | if (ktime_equal(mirror->start_time, notime)) | 647 | if (ktime_equal(mirror->start_time, notime)) |
646 | mirror->start_time = now; | 648 | mirror->start_time = now; |
647 | if (ktime_equal(mirror->last_report_time, notime)) | ||
648 | mirror->last_report_time = now; | ||
649 | if (mirror->report_interval != 0) | 649 | if (mirror->report_interval != 0) |
650 | report_interval = (s64)mirror->report_interval * 1000LL; | 650 | report_interval = (s64)mirror->report_interval * 1000LL; |
651 | else if (layoutstats_timer != 0) | 651 | else if (layoutstats_timer != 0) |
652 | report_interval = (s64)layoutstats_timer * 1000LL; | 652 | report_interval = (s64)layoutstats_timer * 1000LL; |
653 | if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >= | 653 | if (ktime_to_ms(ktime_sub(now, ffl->last_report_time)) >= |
654 | report_interval) { | 654 | report_interval) { |
655 | mirror->last_report_time = now; | 655 | ffl->last_report_time = now; |
656 | return true; | 656 | return true; |
657 | } | 657 | } |
658 | 658 | ||
@@ -806,11 +806,14 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, | |||
806 | { | 806 | { |
807 | struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); | 807 | struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); |
808 | struct nfs4_pnfs_ds *ds; | 808 | struct nfs4_pnfs_ds *ds; |
809 | bool fail_return = false; | ||
809 | int idx; | 810 | int idx; |
810 | 811 | ||
811 | /* mirrors are sorted by efficiency */ | 812 | /* mirrors are sorted by efficiency */ |
812 | for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) { | 813 | for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) { |
813 | ds = nfs4_ff_layout_prepare_ds(lseg, idx, false); | 814 | if (idx+1 == fls->mirror_array_cnt) |
815 | fail_return = true; | ||
816 | ds = nfs4_ff_layout_prepare_ds(lseg, idx, fail_return); | ||
814 | if (ds) { | 817 | if (ds) { |
815 | *best_idx = idx; | 818 | *best_idx = idx; |
816 | return ds; | 819 | return ds; |
@@ -859,6 +862,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, | |||
859 | struct nfs4_pnfs_ds *ds; | 862 | struct nfs4_pnfs_ds *ds; |
860 | int ds_idx; | 863 | int ds_idx; |
861 | 864 | ||
865 | retry: | ||
862 | /* Use full layout for now */ | 866 | /* Use full layout for now */ |
863 | if (!pgio->pg_lseg) | 867 | if (!pgio->pg_lseg) |
864 | ff_layout_pg_get_read(pgio, req, false); | 868 | ff_layout_pg_get_read(pgio, req, false); |
@@ -871,10 +875,13 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, | |||
871 | 875 | ||
872 | ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx); | 876 | ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx); |
873 | if (!ds) { | 877 | if (!ds) { |
874 | if (ff_layout_no_fallback_to_mds(pgio->pg_lseg)) | 878 | if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) |
875 | goto out_pnfs; | ||
876 | else | ||
877 | goto out_mds; | 879 | goto out_mds; |
880 | pnfs_put_lseg(pgio->pg_lseg); | ||
881 | pgio->pg_lseg = NULL; | ||
882 | /* Sleep for 1 second before retrying */ | ||
883 | ssleep(1); | ||
884 | goto retry; | ||
878 | } | 885 | } |
879 | 886 | ||
880 | mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); | 887 | mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); |
@@ -890,12 +897,6 @@ out_mds: | |||
890 | pnfs_put_lseg(pgio->pg_lseg); | 897 | pnfs_put_lseg(pgio->pg_lseg); |
891 | pgio->pg_lseg = NULL; | 898 | pgio->pg_lseg = NULL; |
892 | nfs_pageio_reset_read_mds(pgio); | 899 | nfs_pageio_reset_read_mds(pgio); |
893 | return; | ||
894 | |||
895 | out_pnfs: | ||
896 | pnfs_set_lo_fail(pgio->pg_lseg); | ||
897 | pnfs_put_lseg(pgio->pg_lseg); | ||
898 | pgio->pg_lseg = NULL; | ||
899 | } | 900 | } |
900 | 901 | ||
901 | static void | 902 | static void |
@@ -909,6 +910,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, | |||
909 | int i; | 910 | int i; |
910 | int status; | 911 | int status; |
911 | 912 | ||
913 | retry: | ||
912 | if (!pgio->pg_lseg) { | 914 | if (!pgio->pg_lseg) { |
913 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 915 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, |
914 | req->wb_context, | 916 | req->wb_context, |
@@ -940,10 +942,13 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, | |||
940 | for (i = 0; i < pgio->pg_mirror_count; i++) { | 942 | for (i = 0; i < pgio->pg_mirror_count; i++) { |
941 | ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true); | 943 | ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true); |
942 | if (!ds) { | 944 | if (!ds) { |
943 | if (ff_layout_no_fallback_to_mds(pgio->pg_lseg)) | 945 | if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) |
944 | goto out_pnfs; | ||
945 | else | ||
946 | goto out_mds; | 946 | goto out_mds; |
947 | pnfs_put_lseg(pgio->pg_lseg); | ||
948 | pgio->pg_lseg = NULL; | ||
949 | /* Sleep for 1 second before retrying */ | ||
950 | ssleep(1); | ||
951 | goto retry; | ||
947 | } | 952 | } |
948 | pgm = &pgio->pg_mirrors[i]; | 953 | pgm = &pgio->pg_mirrors[i]; |
949 | mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); | 954 | mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); |
@@ -956,12 +961,6 @@ out_mds: | |||
956 | pnfs_put_lseg(pgio->pg_lseg); | 961 | pnfs_put_lseg(pgio->pg_lseg); |
957 | pgio->pg_lseg = NULL; | 962 | pgio->pg_lseg = NULL; |
958 | nfs_pageio_reset_write_mds(pgio); | 963 | nfs_pageio_reset_write_mds(pgio); |
959 | return; | ||
960 | |||
961 | out_pnfs: | ||
962 | pnfs_set_lo_fail(pgio->pg_lseg); | ||
963 | pnfs_put_lseg(pgio->pg_lseg); | ||
964 | pgio->pg_lseg = NULL; | ||
965 | } | 964 | } |
966 | 965 | ||
967 | static unsigned int | 966 | static unsigned int |
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 1bcdb15d0c41..3ee0c9fcea76 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h | |||
@@ -84,7 +84,6 @@ struct nfs4_ff_layout_mirror { | |||
84 | struct nfs4_ff_layoutstat read_stat; | 84 | struct nfs4_ff_layoutstat read_stat; |
85 | struct nfs4_ff_layoutstat write_stat; | 85 | struct nfs4_ff_layoutstat write_stat; |
86 | ktime_t start_time; | 86 | ktime_t start_time; |
87 | ktime_t last_report_time; | ||
88 | u32 report_interval; | 87 | u32 report_interval; |
89 | }; | 88 | }; |
90 | 89 | ||
@@ -101,6 +100,7 @@ struct nfs4_flexfile_layout { | |||
101 | struct pnfs_ds_commit_info commit_info; | 100 | struct pnfs_ds_commit_info commit_info; |
102 | struct list_head mirrors; | 101 | struct list_head mirrors; |
103 | struct list_head error_list; /* nfs4_ff_layout_ds_err */ | 102 | struct list_head error_list; /* nfs4_ff_layout_ds_err */ |
103 | ktime_t last_report_time; /* Layoutstat report times */ | ||
104 | }; | 104 | }; |
105 | 105 | ||
106 | static inline struct nfs4_flexfile_layout * | 106 | static inline struct nfs4_flexfile_layout * |
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 0aa36be71fce..f7a3f6b05369 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c | |||
@@ -17,8 +17,8 @@ | |||
17 | 17 | ||
18 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | 18 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD |
19 | 19 | ||
20 | static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; | 20 | static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS; |
21 | static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; | 21 | static unsigned int dataserver_retrans; |
22 | 22 | ||
23 | void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds) | 23 | void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds) |
24 | { | 24 | { |
@@ -379,7 +379,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, | |||
379 | 379 | ||
380 | devid = &mirror->mirror_ds->id_node; | 380 | devid = &mirror->mirror_ds->id_node; |
381 | if (ff_layout_test_devid_unavailable(devid)) | 381 | if (ff_layout_test_devid_unavailable(devid)) |
382 | goto out; | 382 | goto out_fail; |
383 | 383 | ||
384 | ds = mirror->mirror_ds->ds; | 384 | ds = mirror->mirror_ds->ds; |
385 | /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ | 385 | /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ |
@@ -405,15 +405,16 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, | |||
405 | mirror->mirror_ds->ds_versions[0].rsize = max_payload; | 405 | mirror->mirror_ds->ds_versions[0].rsize = max_payload; |
406 | if (mirror->mirror_ds->ds_versions[0].wsize > max_payload) | 406 | if (mirror->mirror_ds->ds_versions[0].wsize > max_payload) |
407 | mirror->mirror_ds->ds_versions[0].wsize = max_payload; | 407 | mirror->mirror_ds->ds_versions[0].wsize = max_payload; |
408 | } else { | 408 | goto out; |
409 | ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), | ||
410 | mirror, lseg->pls_range.offset, | ||
411 | lseg->pls_range.length, NFS4ERR_NXIO, | ||
412 | OP_ILLEGAL, GFP_NOIO); | ||
413 | if (fail_return || !ff_layout_has_available_ds(lseg)) | ||
414 | pnfs_error_mark_layout_for_return(ino, lseg); | ||
415 | ds = NULL; | ||
416 | } | 409 | } |
410 | ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), | ||
411 | mirror, lseg->pls_range.offset, | ||
412 | lseg->pls_range.length, NFS4ERR_NXIO, | ||
413 | OP_ILLEGAL, GFP_NOIO); | ||
414 | out_fail: | ||
415 | if (fail_return || !ff_layout_has_available_ds(lseg)) | ||
416 | pnfs_error_mark_layout_for_return(ino, lseg); | ||
417 | ds = NULL; | ||
417 | out: | 418 | out: |
418 | return ds; | 419 | return ds; |
419 | } | 420 | } |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7ce5e023c3c3..74935a19e4bf 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -58,6 +58,9 @@ struct nfs_clone_mount { | |||
58 | */ | 58 | */ |
59 | #define NFS_UNSPEC_PORT (-1) | 59 | #define NFS_UNSPEC_PORT (-1) |
60 | 60 | ||
61 | #define NFS_UNSPEC_RETRANS (UINT_MAX) | ||
62 | #define NFS_UNSPEC_TIMEO (UINT_MAX) | ||
63 | |||
61 | /* | 64 | /* |
62 | * Maximum number of pages that readdir can use for creating | 65 | * Maximum number of pages that readdir can use for creating |
63 | * a vmapped array of pages. | 66 | * a vmapped array of pages. |
@@ -156,7 +159,7 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *, | |||
156 | int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *); | 159 | int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *); |
157 | void nfs_server_insert_lists(struct nfs_server *); | 160 | void nfs_server_insert_lists(struct nfs_server *); |
158 | void nfs_server_remove_lists(struct nfs_server *); | 161 | void nfs_server_remove_lists(struct nfs_server *); |
159 | void nfs_init_timeout_values(struct rpc_timeout *, int, unsigned int, unsigned int); | 162 | void nfs_init_timeout_values(struct rpc_timeout *to, int proto, int timeo, int retrans); |
160 | int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t, | 163 | int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t, |
161 | rpc_authflavor_t); | 164 | rpc_authflavor_t); |
162 | struct nfs_server *nfs_alloc_server(void); | 165 | struct nfs_server *nfs_alloc_server(void); |
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 33da841a21bb..64b43b4ad9dd 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c | |||
@@ -318,10 +318,22 @@ static void | |||
318 | nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata) | 318 | nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata) |
319 | { | 319 | { |
320 | struct nfs42_layoutstat_data *data = calldata; | 320 | struct nfs42_layoutstat_data *data = calldata; |
321 | struct nfs_server *server = NFS_SERVER(data->args.inode); | 321 | struct inode *inode = data->inode; |
322 | struct nfs_server *server = NFS_SERVER(inode); | ||
323 | struct pnfs_layout_hdr *lo; | ||
322 | 324 | ||
325 | spin_lock(&inode->i_lock); | ||
326 | lo = NFS_I(inode)->layout; | ||
327 | if (!pnfs_layout_is_valid(lo)) { | ||
328 | spin_unlock(&inode->i_lock); | ||
329 | rpc_exit(task, 0); | ||
330 | return; | ||
331 | } | ||
332 | nfs4_stateid_copy(&data->args.stateid, &lo->plh_stateid); | ||
333 | spin_unlock(&inode->i_lock); | ||
323 | nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args, | 334 | nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args, |
324 | &data->res.seq_res, task); | 335 | &data->res.seq_res, task); |
336 | |||
325 | } | 337 | } |
326 | 338 | ||
327 | static void | 339 | static void |
@@ -338,12 +350,14 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata) | |||
338 | case 0: | 350 | case 0: |
339 | break; | 351 | break; |
340 | case -NFS4ERR_EXPIRED: | 352 | case -NFS4ERR_EXPIRED: |
353 | case -NFS4ERR_ADMIN_REVOKED: | ||
354 | case -NFS4ERR_DELEG_REVOKED: | ||
341 | case -NFS4ERR_STALE_STATEID: | 355 | case -NFS4ERR_STALE_STATEID: |
342 | case -NFS4ERR_OLD_STATEID: | ||
343 | case -NFS4ERR_BAD_STATEID: | 356 | case -NFS4ERR_BAD_STATEID: |
344 | spin_lock(&inode->i_lock); | 357 | spin_lock(&inode->i_lock); |
345 | lo = NFS_I(inode)->layout; | 358 | lo = NFS_I(inode)->layout; |
346 | if (lo && nfs4_stateid_match(&data->args.stateid, | 359 | if (pnfs_layout_is_valid(lo) && |
360 | nfs4_stateid_match(&data->args.stateid, | ||
347 | &lo->plh_stateid)) { | 361 | &lo->plh_stateid)) { |
348 | LIST_HEAD(head); | 362 | LIST_HEAD(head); |
349 | 363 | ||
@@ -357,11 +371,23 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata) | |||
357 | } else | 371 | } else |
358 | spin_unlock(&inode->i_lock); | 372 | spin_unlock(&inode->i_lock); |
359 | break; | 373 | break; |
374 | case -NFS4ERR_OLD_STATEID: | ||
375 | spin_lock(&inode->i_lock); | ||
376 | lo = NFS_I(inode)->layout; | ||
377 | if (pnfs_layout_is_valid(lo) && | ||
378 | nfs4_stateid_match_other(&data->args.stateid, | ||
379 | &lo->plh_stateid)) { | ||
380 | /* Do we need to delay before resending? */ | ||
381 | if (!nfs4_stateid_is_newer(&lo->plh_stateid, | ||
382 | &data->args.stateid)) | ||
383 | rpc_delay(task, HZ); | ||
384 | rpc_restart_call_prepare(task); | ||
385 | } | ||
386 | spin_unlock(&inode->i_lock); | ||
387 | break; | ||
360 | case -ENOTSUPP: | 388 | case -ENOTSUPP: |
361 | case -EOPNOTSUPP: | 389 | case -EOPNOTSUPP: |
362 | NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTSTATS; | 390 | NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTSTATS; |
363 | default: | ||
364 | break; | ||
365 | } | 391 | } |
366 | 392 | ||
367 | dprintk("%s server returns %d\n", __func__, task->tk_status); | 393 | dprintk("%s server returns %d\n", __func__, task->tk_status); |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 324bfdc21250..9bf64eacba5b 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -396,6 +396,10 @@ extern void nfs4_schedule_state_renewal(struct nfs_client *); | |||
396 | extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); | 396 | extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); |
397 | extern void nfs4_kill_renewd(struct nfs_client *); | 397 | extern void nfs4_kill_renewd(struct nfs_client *); |
398 | extern void nfs4_renew_state(struct work_struct *); | 398 | extern void nfs4_renew_state(struct work_struct *); |
399 | extern void nfs4_set_lease_period(struct nfs_client *clp, | ||
400 | unsigned long lease, | ||
401 | unsigned long lastrenewed); | ||
402 | |||
399 | 403 | ||
400 | /* nfs4state.c */ | 404 | /* nfs4state.c */ |
401 | struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp); | 405 | struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp); |
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 8d7d08d4f95f..cd3b7cfdde16 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c | |||
@@ -817,6 +817,11 @@ static int nfs4_set_client(struct nfs_server *server, | |||
817 | goto error; | 817 | goto error; |
818 | } | 818 | } |
819 | 819 | ||
820 | if (server->nfs_client == clp) { | ||
821 | error = -ELOOP; | ||
822 | goto error; | ||
823 | } | ||
824 | |||
820 | /* | 825 | /* |
821 | * Query for the lease time on clientid setup or renewal | 826 | * Query for the lease time on clientid setup or renewal |
822 | * | 827 | * |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a036e93bdf96..a9dec32ba9ba 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -634,15 +634,11 @@ out_sleep: | |||
634 | } | 634 | } |
635 | EXPORT_SYMBOL_GPL(nfs40_setup_sequence); | 635 | EXPORT_SYMBOL_GPL(nfs40_setup_sequence); |
636 | 636 | ||
637 | static int nfs40_sequence_done(struct rpc_task *task, | 637 | static void nfs40_sequence_free_slot(struct nfs4_sequence_res *res) |
638 | struct nfs4_sequence_res *res) | ||
639 | { | 638 | { |
640 | struct nfs4_slot *slot = res->sr_slot; | 639 | struct nfs4_slot *slot = res->sr_slot; |
641 | struct nfs4_slot_table *tbl; | 640 | struct nfs4_slot_table *tbl; |
642 | 641 | ||
643 | if (slot == NULL) | ||
644 | goto out; | ||
645 | |||
646 | tbl = slot->table; | 642 | tbl = slot->table; |
647 | spin_lock(&tbl->slot_tbl_lock); | 643 | spin_lock(&tbl->slot_tbl_lock); |
648 | if (!nfs41_wake_and_assign_slot(tbl, slot)) | 644 | if (!nfs41_wake_and_assign_slot(tbl, slot)) |
@@ -650,7 +646,13 @@ static int nfs40_sequence_done(struct rpc_task *task, | |||
650 | spin_unlock(&tbl->slot_tbl_lock); | 646 | spin_unlock(&tbl->slot_tbl_lock); |
651 | 647 | ||
652 | res->sr_slot = NULL; | 648 | res->sr_slot = NULL; |
653 | out: | 649 | } |
650 | |||
651 | static int nfs40_sequence_done(struct rpc_task *task, | ||
652 | struct nfs4_sequence_res *res) | ||
653 | { | ||
654 | if (res->sr_slot != NULL) | ||
655 | nfs40_sequence_free_slot(res); | ||
654 | return 1; | 656 | return 1; |
655 | } | 657 | } |
656 | 658 | ||
@@ -666,6 +668,11 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) | |||
666 | tbl = slot->table; | 668 | tbl = slot->table; |
667 | session = tbl->session; | 669 | session = tbl->session; |
668 | 670 | ||
671 | /* Bump the slot sequence number */ | ||
672 | if (slot->seq_done) | ||
673 | slot->seq_nr++; | ||
674 | slot->seq_done = 0; | ||
675 | |||
669 | spin_lock(&tbl->slot_tbl_lock); | 676 | spin_lock(&tbl->slot_tbl_lock); |
670 | /* Be nice to the server: try to ensure that the last transmitted | 677 | /* Be nice to the server: try to ensure that the last transmitted |
671 | * value for highest_user_slotid <= target_highest_slotid | 678 | * value for highest_user_slotid <= target_highest_slotid |
@@ -686,9 +693,12 @@ out_unlock: | |||
686 | res->sr_slot = NULL; | 693 | res->sr_slot = NULL; |
687 | if (send_new_highest_used_slotid) | 694 | if (send_new_highest_used_slotid) |
688 | nfs41_notify_server(session->clp); | 695 | nfs41_notify_server(session->clp); |
696 | if (waitqueue_active(&tbl->slot_waitq)) | ||
697 | wake_up_all(&tbl->slot_waitq); | ||
689 | } | 698 | } |
690 | 699 | ||
691 | int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) | 700 | static int nfs41_sequence_process(struct rpc_task *task, |
701 | struct nfs4_sequence_res *res) | ||
692 | { | 702 | { |
693 | struct nfs4_session *session; | 703 | struct nfs4_session *session; |
694 | struct nfs4_slot *slot = res->sr_slot; | 704 | struct nfs4_slot *slot = res->sr_slot; |
@@ -714,7 +724,7 @@ int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) | |||
714 | switch (res->sr_status) { | 724 | switch (res->sr_status) { |
715 | case 0: | 725 | case 0: |
716 | /* Update the slot's sequence and clientid lease timer */ | 726 | /* Update the slot's sequence and clientid lease timer */ |
717 | ++slot->seq_nr; | 727 | slot->seq_done = 1; |
718 | clp = session->clp; | 728 | clp = session->clp; |
719 | do_renew_lease(clp, res->sr_timestamp); | 729 | do_renew_lease(clp, res->sr_timestamp); |
720 | /* Check sequence flags */ | 730 | /* Check sequence flags */ |
@@ -769,16 +779,16 @@ int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) | |||
769 | goto retry_nowait; | 779 | goto retry_nowait; |
770 | default: | 780 | default: |
771 | /* Just update the slot sequence no. */ | 781 | /* Just update the slot sequence no. */ |
772 | ++slot->seq_nr; | 782 | slot->seq_done = 1; |
773 | } | 783 | } |
774 | out: | 784 | out: |
775 | /* The session may be reset by one of the error handlers. */ | 785 | /* The session may be reset by one of the error handlers. */ |
776 | dprintk("%s: Error %d free the slot \n", __func__, res->sr_status); | 786 | dprintk("%s: Error %d free the slot \n", __func__, res->sr_status); |
777 | nfs41_sequence_free_slot(res); | ||
778 | out_noaction: | 787 | out_noaction: |
779 | return ret; | 788 | return ret; |
780 | retry_nowait: | 789 | retry_nowait: |
781 | if (rpc_restart_call_prepare(task)) { | 790 | if (rpc_restart_call_prepare(task)) { |
791 | nfs41_sequence_free_slot(res); | ||
782 | task->tk_status = 0; | 792 | task->tk_status = 0; |
783 | ret = 0; | 793 | ret = 0; |
784 | } | 794 | } |
@@ -789,8 +799,37 @@ out_retry: | |||
789 | rpc_delay(task, NFS4_POLL_RETRY_MAX); | 799 | rpc_delay(task, NFS4_POLL_RETRY_MAX); |
790 | return 0; | 800 | return 0; |
791 | } | 801 | } |
802 | |||
803 | int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) | ||
804 | { | ||
805 | if (!nfs41_sequence_process(task, res)) | ||
806 | return 0; | ||
807 | if (res->sr_slot != NULL) | ||
808 | nfs41_sequence_free_slot(res); | ||
809 | return 1; | ||
810 | |||
811 | } | ||
792 | EXPORT_SYMBOL_GPL(nfs41_sequence_done); | 812 | EXPORT_SYMBOL_GPL(nfs41_sequence_done); |
793 | 813 | ||
814 | static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) | ||
815 | { | ||
816 | if (res->sr_slot == NULL) | ||
817 | return 1; | ||
818 | if (res->sr_slot->table->session != NULL) | ||
819 | return nfs41_sequence_process(task, res); | ||
820 | return nfs40_sequence_done(task, res); | ||
821 | } | ||
822 | |||
823 | static void nfs4_sequence_free_slot(struct nfs4_sequence_res *res) | ||
824 | { | ||
825 | if (res->sr_slot != NULL) { | ||
826 | if (res->sr_slot->table->session != NULL) | ||
827 | nfs41_sequence_free_slot(res); | ||
828 | else | ||
829 | nfs40_sequence_free_slot(res); | ||
830 | } | ||
831 | } | ||
832 | |||
794 | int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) | 833 | int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) |
795 | { | 834 | { |
796 | if (res->sr_slot == NULL) | 835 | if (res->sr_slot == NULL) |
@@ -920,6 +959,17 @@ static int nfs4_setup_sequence(const struct nfs_server *server, | |||
920 | args, res, task); | 959 | args, res, task); |
921 | } | 960 | } |
922 | 961 | ||
962 | static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) | ||
963 | { | ||
964 | return nfs40_sequence_done(task, res); | ||
965 | } | ||
966 | |||
967 | static void nfs4_sequence_free_slot(struct nfs4_sequence_res *res) | ||
968 | { | ||
969 | if (res->sr_slot != NULL) | ||
970 | nfs40_sequence_free_slot(res); | ||
971 | } | ||
972 | |||
923 | int nfs4_sequence_done(struct rpc_task *task, | 973 | int nfs4_sequence_done(struct rpc_task *task, |
924 | struct nfs4_sequence_res *res) | 974 | struct nfs4_sequence_res *res) |
925 | { | 975 | { |
@@ -1197,6 +1247,7 @@ static void nfs4_opendata_free(struct kref *kref) | |||
1197 | struct super_block *sb = p->dentry->d_sb; | 1247 | struct super_block *sb = p->dentry->d_sb; |
1198 | 1248 | ||
1199 | nfs_free_seqid(p->o_arg.seqid); | 1249 | nfs_free_seqid(p->o_arg.seqid); |
1250 | nfs4_sequence_free_slot(&p->o_res.seq_res); | ||
1200 | if (p->state != NULL) | 1251 | if (p->state != NULL) |
1201 | nfs4_put_open_state(p->state); | 1252 | nfs4_put_open_state(p->state); |
1202 | nfs4_put_state_owner(p->owner); | 1253 | nfs4_put_state_owner(p->owner); |
@@ -1656,9 +1707,14 @@ err: | |||
1656 | static struct nfs4_state * | 1707 | static struct nfs4_state * |
1657 | nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) | 1708 | nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) |
1658 | { | 1709 | { |
1710 | struct nfs4_state *ret; | ||
1711 | |||
1659 | if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) | 1712 | if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) |
1660 | return _nfs4_opendata_reclaim_to_nfs4_state(data); | 1713 | ret =_nfs4_opendata_reclaim_to_nfs4_state(data); |
1661 | return _nfs4_opendata_to_nfs4_state(data); | 1714 | else |
1715 | ret = _nfs4_opendata_to_nfs4_state(data); | ||
1716 | nfs4_sequence_free_slot(&data->o_res.seq_res); | ||
1717 | return ret; | ||
1662 | } | 1718 | } |
1663 | 1719 | ||
1664 | static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state) | 1720 | static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state) |
@@ -2056,7 +2112,7 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata) | |||
2056 | 2112 | ||
2057 | data->rpc_status = task->tk_status; | 2113 | data->rpc_status = task->tk_status; |
2058 | 2114 | ||
2059 | if (!nfs4_sequence_done(task, &data->o_res.seq_res)) | 2115 | if (!nfs4_sequence_process(task, &data->o_res.seq_res)) |
2060 | return; | 2116 | return; |
2061 | 2117 | ||
2062 | if (task->tk_status == 0) { | 2118 | if (task->tk_status == 0) { |
@@ -4237,12 +4293,9 @@ static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, str | |||
4237 | err = _nfs4_do_fsinfo(server, fhandle, fsinfo); | 4293 | err = _nfs4_do_fsinfo(server, fhandle, fsinfo); |
4238 | trace_nfs4_fsinfo(server, fhandle, fsinfo->fattr, err); | 4294 | trace_nfs4_fsinfo(server, fhandle, fsinfo->fattr, err); |
4239 | if (err == 0) { | 4295 | if (err == 0) { |
4240 | struct nfs_client *clp = server->nfs_client; | 4296 | nfs4_set_lease_period(server->nfs_client, |
4241 | 4297 | fsinfo->lease_time * HZ, | |
4242 | spin_lock(&clp->cl_lock); | 4298 | now); |
4243 | clp->cl_lease_time = fsinfo->lease_time * HZ; | ||
4244 | clp->cl_last_renewal = now; | ||
4245 | spin_unlock(&clp->cl_lock); | ||
4246 | break; | 4299 | break; |
4247 | } | 4300 | } |
4248 | err = nfs4_handle_exception(server, err, &exception); | 4301 | err = nfs4_handle_exception(server, err, &exception); |
@@ -7517,12 +7570,20 @@ static int _nfs4_proc_create_session(struct nfs_client *clp, | |||
7517 | status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); | 7570 | status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); |
7518 | trace_nfs4_create_session(clp, status); | 7571 | trace_nfs4_create_session(clp, status); |
7519 | 7572 | ||
7573 | switch (status) { | ||
7574 | case -NFS4ERR_STALE_CLIENTID: | ||
7575 | case -NFS4ERR_DELAY: | ||
7576 | case -ETIMEDOUT: | ||
7577 | case -EACCES: | ||
7578 | case -EAGAIN: | ||
7579 | goto out; | ||
7580 | }; | ||
7581 | |||
7582 | clp->cl_seqid++; | ||
7520 | if (!status) { | 7583 | if (!status) { |
7521 | /* Verify the session's negotiated channel_attrs values */ | 7584 | /* Verify the session's negotiated channel_attrs values */ |
7522 | status = nfs4_verify_channel_attrs(&args, &res); | 7585 | status = nfs4_verify_channel_attrs(&args, &res); |
7523 | /* Increment the clientid slot sequence id */ | 7586 | /* Increment the clientid slot sequence id */ |
7524 | if (clp->cl_seqid == res.seqid) | ||
7525 | clp->cl_seqid++; | ||
7526 | if (status) | 7587 | if (status) |
7527 | goto out; | 7588 | goto out; |
7528 | nfs4_update_session(session, &res); | 7589 | nfs4_update_session(session, &res); |
@@ -7867,7 +7928,7 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) | |||
7867 | struct nfs4_layoutget *lgp = calldata; | 7928 | struct nfs4_layoutget *lgp = calldata; |
7868 | 7929 | ||
7869 | dprintk("--> %s\n", __func__); | 7930 | dprintk("--> %s\n", __func__); |
7870 | nfs41_sequence_done(task, &lgp->res.seq_res); | 7931 | nfs41_sequence_process(task, &lgp->res.seq_res); |
7871 | dprintk("<-- %s\n", __func__); | 7932 | dprintk("<-- %s\n", __func__); |
7872 | } | 7933 | } |
7873 | 7934 | ||
@@ -8083,6 +8144,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags) | |||
8083 | /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ | 8144 | /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ |
8084 | if (status == 0 && lgp->res.layoutp->len) | 8145 | if (status == 0 && lgp->res.layoutp->len) |
8085 | lseg = pnfs_layout_process(lgp); | 8146 | lseg = pnfs_layout_process(lgp); |
8147 | nfs4_sequence_free_slot(&lgp->res.seq_res); | ||
8086 | rpc_put_task(task); | 8148 | rpc_put_task(task); |
8087 | dprintk("<-- %s status=%d\n", __func__, status); | 8149 | dprintk("<-- %s status=%d\n", __func__, status); |
8088 | if (status) | 8150 | if (status) |
@@ -8109,7 +8171,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) | |||
8109 | 8171 | ||
8110 | dprintk("--> %s\n", __func__); | 8172 | dprintk("--> %s\n", __func__); |
8111 | 8173 | ||
8112 | if (!nfs41_sequence_done(task, &lrp->res.seq_res)) | 8174 | if (!nfs41_sequence_process(task, &lrp->res.seq_res)) |
8113 | return; | 8175 | return; |
8114 | 8176 | ||
8115 | server = NFS_SERVER(lrp->args.inode); | 8177 | server = NFS_SERVER(lrp->args.inode); |
@@ -8121,6 +8183,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) | |||
8121 | case -NFS4ERR_DELAY: | 8183 | case -NFS4ERR_DELAY: |
8122 | if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN) | 8184 | if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN) |
8123 | break; | 8185 | break; |
8186 | nfs4_sequence_free_slot(&lrp->res.seq_res); | ||
8124 | rpc_restart_call_prepare(task); | 8187 | rpc_restart_call_prepare(task); |
8125 | return; | 8188 | return; |
8126 | } | 8189 | } |
@@ -8135,12 +8198,16 @@ static void nfs4_layoutreturn_release(void *calldata) | |||
8135 | 8198 | ||
8136 | dprintk("--> %s\n", __func__); | 8199 | dprintk("--> %s\n", __func__); |
8137 | spin_lock(&lo->plh_inode->i_lock); | 8200 | spin_lock(&lo->plh_inode->i_lock); |
8138 | pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range, | 8201 | if (lrp->res.lrs_present) { |
8139 | be32_to_cpu(lrp->args.stateid.seqid)); | 8202 | pnfs_mark_matching_lsegs_invalid(lo, &freeme, |
8140 | if (lrp->res.lrs_present && pnfs_layout_is_valid(lo)) | 8203 | &lrp->args.range, |
8204 | be32_to_cpu(lrp->args.stateid.seqid)); | ||
8141 | pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); | 8205 | pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); |
8206 | } else | ||
8207 | pnfs_mark_layout_stateid_invalid(lo, &freeme); | ||
8142 | pnfs_clear_layoutreturn_waitbit(lo); | 8208 | pnfs_clear_layoutreturn_waitbit(lo); |
8143 | spin_unlock(&lo->plh_inode->i_lock); | 8209 | spin_unlock(&lo->plh_inode->i_lock); |
8210 | nfs4_sequence_free_slot(&lrp->res.seq_res); | ||
8144 | pnfs_free_lseg_list(&freeme); | 8211 | pnfs_free_lseg_list(&freeme); |
8145 | pnfs_put_layout_hdr(lrp->args.layout); | 8212 | pnfs_put_layout_hdr(lrp->args.layout); |
8146 | nfs_iput_and_deactive(lrp->inode); | 8213 | nfs_iput_and_deactive(lrp->inode); |
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index e1ba58c3d1ad..82e77198d17e 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c | |||
@@ -136,6 +136,26 @@ nfs4_kill_renewd(struct nfs_client *clp) | |||
136 | cancel_delayed_work_sync(&clp->cl_renewd); | 136 | cancel_delayed_work_sync(&clp->cl_renewd); |
137 | } | 137 | } |
138 | 138 | ||
139 | /** | ||
140 | * nfs4_set_lease_period - Sets the lease period on a nfs_client | ||
141 | * | ||
142 | * @clp: pointer to nfs_client | ||
143 | * @lease: new value for lease period | ||
144 | * @lastrenewed: time at which lease was last renewed | ||
145 | */ | ||
146 | void nfs4_set_lease_period(struct nfs_client *clp, | ||
147 | unsigned long lease, | ||
148 | unsigned long lastrenewed) | ||
149 | { | ||
150 | spin_lock(&clp->cl_lock); | ||
151 | clp->cl_lease_time = lease; | ||
152 | clp->cl_last_renewal = lastrenewed; | ||
153 | spin_unlock(&clp->cl_lock); | ||
154 | |||
155 | /* Cap maximum reconnect timeout at 1/2 lease period */ | ||
156 | rpc_cap_max_reconnect_timeout(clp->cl_rpcclient, lease >> 1); | ||
157 | } | ||
158 | |||
139 | /* | 159 | /* |
140 | * Local variables: | 160 | * Local variables: |
141 | * c-basic-offset: 8 | 161 | * c-basic-offset: 8 |
diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index 332d06e64fa9..b62973045a3e 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c | |||
@@ -28,6 +28,7 @@ static void nfs4_init_slot_table(struct nfs4_slot_table *tbl, const char *queue) | |||
28 | tbl->highest_used_slotid = NFS4_NO_SLOT; | 28 | tbl->highest_used_slotid = NFS4_NO_SLOT; |
29 | spin_lock_init(&tbl->slot_tbl_lock); | 29 | spin_lock_init(&tbl->slot_tbl_lock); |
30 | rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, queue); | 30 | rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, queue); |
31 | init_waitqueue_head(&tbl->slot_waitq); | ||
31 | init_completion(&tbl->complete); | 32 | init_completion(&tbl->complete); |
32 | } | 33 | } |
33 | 34 | ||
@@ -172,6 +173,58 @@ struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid) | |||
172 | return ERR_PTR(-E2BIG); | 173 | return ERR_PTR(-E2BIG); |
173 | } | 174 | } |
174 | 175 | ||
176 | static int nfs4_slot_get_seqid(struct nfs4_slot_table *tbl, u32 slotid, | ||
177 | u32 *seq_nr) | ||
178 | __must_hold(&tbl->slot_tbl_lock) | ||
179 | { | ||
180 | struct nfs4_slot *slot; | ||
181 | |||
182 | slot = nfs4_lookup_slot(tbl, slotid); | ||
183 | if (IS_ERR(slot)) | ||
184 | return PTR_ERR(slot); | ||
185 | *seq_nr = slot->seq_nr; | ||
186 | return 0; | ||
187 | } | ||
188 | |||
189 | /* | ||
190 | * nfs4_slot_seqid_in_use - test if a slot sequence id is still in use | ||
191 | * | ||
192 | * Given a slot table, slot id and sequence number, determine if the | ||
193 | * RPC call in question is still in flight. This function is mainly | ||
194 | * intended for use by the callback channel. | ||
195 | */ | ||
196 | static bool nfs4_slot_seqid_in_use(struct nfs4_slot_table *tbl, | ||
197 | u32 slotid, u32 seq_nr) | ||
198 | { | ||
199 | u32 cur_seq; | ||
200 | bool ret = false; | ||
201 | |||
202 | spin_lock(&tbl->slot_tbl_lock); | ||
203 | if (nfs4_slot_get_seqid(tbl, slotid, &cur_seq) == 0 && | ||
204 | cur_seq == seq_nr && test_bit(slotid, tbl->used_slots)) | ||
205 | ret = true; | ||
206 | spin_unlock(&tbl->slot_tbl_lock); | ||
207 | return ret; | ||
208 | } | ||
209 | |||
210 | /* | ||
211 | * nfs4_slot_wait_on_seqid - wait until a slot sequence id is complete | ||
212 | * | ||
213 | * Given a slot table, slot id and sequence number, wait until the | ||
214 | * corresponding RPC call completes. This function is mainly | ||
215 | * intended for use by the callback channel. | ||
216 | */ | ||
217 | int nfs4_slot_wait_on_seqid(struct nfs4_slot_table *tbl, | ||
218 | u32 slotid, u32 seq_nr, | ||
219 | unsigned long timeout) | ||
220 | { | ||
221 | if (wait_event_timeout(tbl->slot_waitq, | ||
222 | !nfs4_slot_seqid_in_use(tbl, slotid, seq_nr), | ||
223 | timeout) == 0) | ||
224 | return -ETIMEDOUT; | ||
225 | return 0; | ||
226 | } | ||
227 | |||
175 | /* | 228 | /* |
176 | * nfs4_alloc_slot - efficiently look for a free slot | 229 | * nfs4_alloc_slot - efficiently look for a free slot |
177 | * | 230 | * |
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index 5b51298d1d03..f703b755351b 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h | |||
@@ -21,7 +21,8 @@ struct nfs4_slot { | |||
21 | unsigned long generation; | 21 | unsigned long generation; |
22 | u32 slot_nr; | 22 | u32 slot_nr; |
23 | u32 seq_nr; | 23 | u32 seq_nr; |
24 | unsigned int interrupted : 1; | 24 | unsigned int interrupted : 1, |
25 | seq_done : 1; | ||
25 | }; | 26 | }; |
26 | 27 | ||
27 | /* Sessions */ | 28 | /* Sessions */ |
@@ -36,6 +37,7 @@ struct nfs4_slot_table { | |||
36 | unsigned long used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */ | 37 | unsigned long used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */ |
37 | spinlock_t slot_tbl_lock; | 38 | spinlock_t slot_tbl_lock; |
38 | struct rpc_wait_queue slot_tbl_waitq; /* allocators may wait here */ | 39 | struct rpc_wait_queue slot_tbl_waitq; /* allocators may wait here */ |
40 | wait_queue_head_t slot_waitq; /* Completion wait on slot */ | ||
39 | u32 max_slots; /* # slots in table */ | 41 | u32 max_slots; /* # slots in table */ |
40 | u32 max_slotid; /* Max allowed slotid value */ | 42 | u32 max_slotid; /* Max allowed slotid value */ |
41 | u32 highest_used_slotid; /* sent to server on each SEQ. | 43 | u32 highest_used_slotid; /* sent to server on each SEQ. |
@@ -78,6 +80,9 @@ extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl, | |||
78 | extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl); | 80 | extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl); |
79 | extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl); | 81 | extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl); |
80 | extern struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid); | 82 | extern struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid); |
83 | extern int nfs4_slot_wait_on_seqid(struct nfs4_slot_table *tbl, | ||
84 | u32 slotid, u32 seq_nr, | ||
85 | unsigned long timeout); | ||
81 | extern bool nfs4_try_to_lock_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); | 86 | extern bool nfs4_try_to_lock_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); |
82 | extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); | 87 | extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); |
83 | extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl); | 88 | extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl); |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 834b875900d6..cada00aa5096 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -277,20 +277,17 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp) | |||
277 | { | 277 | { |
278 | int status; | 278 | int status; |
279 | struct nfs_fsinfo fsinfo; | 279 | struct nfs_fsinfo fsinfo; |
280 | unsigned long now; | ||
280 | 281 | ||
281 | if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) { | 282 | if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) { |
282 | nfs4_schedule_state_renewal(clp); | 283 | nfs4_schedule_state_renewal(clp); |
283 | return 0; | 284 | return 0; |
284 | } | 285 | } |
285 | 286 | ||
287 | now = jiffies; | ||
286 | status = nfs4_proc_get_lease_time(clp, &fsinfo); | 288 | status = nfs4_proc_get_lease_time(clp, &fsinfo); |
287 | if (status == 0) { | 289 | if (status == 0) { |
288 | /* Update lease time and schedule renewal */ | 290 | nfs4_set_lease_period(clp, fsinfo.lease_time * HZ, now); |
289 | spin_lock(&clp->cl_lock); | ||
290 | clp->cl_lease_time = fsinfo.lease_time * HZ; | ||
291 | clp->cl_last_renewal = jiffies; | ||
292 | spin_unlock(&clp->cl_lock); | ||
293 | |||
294 | nfs4_schedule_state_renewal(clp); | 291 | nfs4_schedule_state_renewal(clp); |
295 | } | 292 | } |
296 | 293 | ||
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 70806cae0d36..2c93a85eda51 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -365,7 +365,8 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, | |||
365 | /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ | 365 | /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ |
366 | atomic_dec(&lo->plh_refcount); | 366 | atomic_dec(&lo->plh_refcount); |
367 | if (list_empty(&lo->plh_segs)) { | 367 | if (list_empty(&lo->plh_segs)) { |
368 | set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); | 368 | if (atomic_read(&lo->plh_outstanding) == 0) |
369 | set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); | ||
369 | clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); | 370 | clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); |
370 | } | 371 | } |
371 | rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); | 372 | rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); |
@@ -768,17 +769,32 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) | |||
768 | pnfs_destroy_layouts_byclid(clp, false); | 769 | pnfs_destroy_layouts_byclid(clp, false); |
769 | } | 770 | } |
770 | 771 | ||
772 | static void | ||
773 | pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) | ||
774 | { | ||
775 | lo->plh_return_iomode = 0; | ||
776 | lo->plh_return_seq = 0; | ||
777 | clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); | ||
778 | } | ||
779 | |||
771 | /* update lo->plh_stateid with new if is more recent */ | 780 | /* update lo->plh_stateid with new if is more recent */ |
772 | void | 781 | void |
773 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, | 782 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, |
774 | bool update_barrier) | 783 | bool update_barrier) |
775 | { | 784 | { |
776 | u32 oldseq, newseq, new_barrier = 0; | 785 | u32 oldseq, newseq, new_barrier = 0; |
777 | bool invalid = !pnfs_layout_is_valid(lo); | ||
778 | 786 | ||
779 | oldseq = be32_to_cpu(lo->plh_stateid.seqid); | 787 | oldseq = be32_to_cpu(lo->plh_stateid.seqid); |
780 | newseq = be32_to_cpu(new->seqid); | 788 | newseq = be32_to_cpu(new->seqid); |
781 | if (invalid || pnfs_seqid_is_newer(newseq, oldseq)) { | 789 | |
790 | if (!pnfs_layout_is_valid(lo)) { | ||
791 | nfs4_stateid_copy(&lo->plh_stateid, new); | ||
792 | lo->plh_barrier = newseq; | ||
793 | pnfs_clear_layoutreturn_info(lo); | ||
794 | clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); | ||
795 | return; | ||
796 | } | ||
797 | if (pnfs_seqid_is_newer(newseq, oldseq)) { | ||
782 | nfs4_stateid_copy(&lo->plh_stateid, new); | 798 | nfs4_stateid_copy(&lo->plh_stateid, new); |
783 | /* | 799 | /* |
784 | * Because of wraparound, we want to keep the barrier | 800 | * Because of wraparound, we want to keep the barrier |
@@ -790,7 +806,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, | |||
790 | new_barrier = be32_to_cpu(new->seqid); | 806 | new_barrier = be32_to_cpu(new->seqid); |
791 | else if (new_barrier == 0) | 807 | else if (new_barrier == 0) |
792 | return; | 808 | return; |
793 | if (invalid || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) | 809 | if (pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) |
794 | lo->plh_barrier = new_barrier; | 810 | lo->plh_barrier = new_barrier; |
795 | } | 811 | } |
796 | 812 | ||
@@ -886,19 +902,14 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) | |||
886 | rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); | 902 | rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); |
887 | } | 903 | } |
888 | 904 | ||
889 | static void | ||
890 | pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) | ||
891 | { | ||
892 | lo->plh_return_iomode = 0; | ||
893 | lo->plh_return_seq = 0; | ||
894 | clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); | ||
895 | } | ||
896 | |||
897 | static bool | 905 | static bool |
898 | pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, | 906 | pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, |
899 | nfs4_stateid *stateid, | 907 | nfs4_stateid *stateid, |
900 | enum pnfs_iomode *iomode) | 908 | enum pnfs_iomode *iomode) |
901 | { | 909 | { |
910 | /* Serialise LAYOUTGET/LAYOUTRETURN */ | ||
911 | if (atomic_read(&lo->plh_outstanding) != 0) | ||
912 | return false; | ||
902 | if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) | 913 | if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) |
903 | return false; | 914 | return false; |
904 | pnfs_get_layout_hdr(lo); | 915 | pnfs_get_layout_hdr(lo); |
@@ -1555,6 +1566,7 @@ pnfs_update_layout(struct inode *ino, | |||
1555 | } | 1566 | } |
1556 | 1567 | ||
1557 | lookup_again: | 1568 | lookup_again: |
1569 | nfs4_client_recover_expired_lease(clp); | ||
1558 | first = false; | 1570 | first = false; |
1559 | spin_lock(&ino->i_lock); | 1571 | spin_lock(&ino->i_lock); |
1560 | lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); | 1572 | lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); |
@@ -1797,16 +1809,11 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
1797 | */ | 1809 | */ |
1798 | pnfs_mark_layout_stateid_invalid(lo, &free_me); | 1810 | pnfs_mark_layout_stateid_invalid(lo, &free_me); |
1799 | 1811 | ||
1800 | nfs4_stateid_copy(&lo->plh_stateid, &res->stateid); | 1812 | pnfs_set_layout_stateid(lo, &res->stateid, true); |
1801 | lo->plh_barrier = be32_to_cpu(res->stateid.seqid); | ||
1802 | } | 1813 | } |
1803 | 1814 | ||
1804 | pnfs_get_lseg(lseg); | 1815 | pnfs_get_lseg(lseg); |
1805 | pnfs_layout_insert_lseg(lo, lseg, &free_me); | 1816 | pnfs_layout_insert_lseg(lo, lseg, &free_me); |
1806 | if (!pnfs_layout_is_valid(lo)) { | ||
1807 | pnfs_clear_layoutreturn_info(lo); | ||
1808 | clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); | ||
1809 | } | ||
1810 | 1817 | ||
1811 | 1818 | ||
1812 | if (res->return_on_close) | 1819 | if (res->return_on_close) |
@@ -2510,7 +2517,6 @@ pnfs_report_layoutstat(struct inode *inode, gfp_t gfp_flags) | |||
2510 | 2517 | ||
2511 | data->args.fh = NFS_FH(inode); | 2518 | data->args.fh = NFS_FH(inode); |
2512 | data->args.inode = inode; | 2519 | data->args.inode = inode; |
2513 | nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid); | ||
2514 | status = ld->prepare_layoutstats(&data->args); | 2520 | status = ld->prepare_layoutstats(&data->args); |
2515 | if (status) | 2521 | if (status) |
2516 | goto out_free; | 2522 | goto out_free; |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 18d446e1a82b..d39601381adf 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -923,6 +923,8 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void) | |||
923 | 923 | ||
924 | data = kzalloc(sizeof(*data), GFP_KERNEL); | 924 | data = kzalloc(sizeof(*data), GFP_KERNEL); |
925 | if (data) { | 925 | if (data) { |
926 | data->timeo = NFS_UNSPEC_TIMEO; | ||
927 | data->retrans = NFS_UNSPEC_RETRANS; | ||
926 | data->acregmin = NFS_DEF_ACREGMIN; | 928 | data->acregmin = NFS_DEF_ACREGMIN; |
927 | data->acregmax = NFS_DEF_ACREGMAX; | 929 | data->acregmax = NFS_DEF_ACREGMAX; |
928 | data->acdirmin = NFS_DEF_ACDIRMIN; | 930 | data->acdirmin = NFS_DEF_ACDIRMIN; |
@@ -1189,6 +1191,19 @@ static int nfs_get_option_ul(substring_t args[], unsigned long *option) | |||
1189 | return rc; | 1191 | return rc; |
1190 | } | 1192 | } |
1191 | 1193 | ||
1194 | static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option, | ||
1195 | unsigned long l_bound, unsigned long u_bound) | ||
1196 | { | ||
1197 | int ret; | ||
1198 | |||
1199 | ret = nfs_get_option_ul(args, option); | ||
1200 | if (ret != 0) | ||
1201 | return ret; | ||
1202 | if (*option < l_bound || *option > u_bound) | ||
1203 | return -ERANGE; | ||
1204 | return 0; | ||
1205 | } | ||
1206 | |||
1192 | /* | 1207 | /* |
1193 | * Error-check and convert a string of mount options from user space into | 1208 | * Error-check and convert a string of mount options from user space into |
1194 | * a data structure. The whole mount string is processed; bad options are | 1209 | * a data structure. The whole mount string is processed; bad options are |
@@ -1352,12 +1367,12 @@ static int nfs_parse_mount_options(char *raw, | |||
1352 | mnt->bsize = option; | 1367 | mnt->bsize = option; |
1353 | break; | 1368 | break; |
1354 | case Opt_timeo: | 1369 | case Opt_timeo: |
1355 | if (nfs_get_option_ul(args, &option) || option == 0) | 1370 | if (nfs_get_option_ul_bound(args, &option, 1, INT_MAX)) |
1356 | goto out_invalid_value; | 1371 | goto out_invalid_value; |
1357 | mnt->timeo = option; | 1372 | mnt->timeo = option; |
1358 | break; | 1373 | break; |
1359 | case Opt_retrans: | 1374 | case Opt_retrans: |
1360 | if (nfs_get_option_ul(args, &option) || option == 0) | 1375 | if (nfs_get_option_ul_bound(args, &option, 0, INT_MAX)) |
1361 | goto out_invalid_value; | 1376 | goto out_invalid_value; |
1362 | mnt->retrans = option; | 1377 | mnt->retrans = option; |
1363 | break; | 1378 | break; |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8410ca275db1..a204d7e109d4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -4903,6 +4903,32 @@ nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4903 | return nfs_ok; | 4903 | return nfs_ok; |
4904 | } | 4904 | } |
4905 | 4905 | ||
4906 | static __be32 | ||
4907 | nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s) | ||
4908 | { | ||
4909 | struct nfs4_ol_stateid *stp = openlockstateid(s); | ||
4910 | __be32 ret; | ||
4911 | |||
4912 | mutex_lock(&stp->st_mutex); | ||
4913 | |||
4914 | ret = check_stateid_generation(stateid, &s->sc_stateid, 1); | ||
4915 | if (ret) | ||
4916 | goto out; | ||
4917 | |||
4918 | ret = nfserr_locks_held; | ||
4919 | if (check_for_locks(stp->st_stid.sc_file, | ||
4920 | lockowner(stp->st_stateowner))) | ||
4921 | goto out; | ||
4922 | |||
4923 | release_lock_stateid(stp); | ||
4924 | ret = nfs_ok; | ||
4925 | |||
4926 | out: | ||
4927 | mutex_unlock(&stp->st_mutex); | ||
4928 | nfs4_put_stid(s); | ||
4929 | return ret; | ||
4930 | } | ||
4931 | |||
4906 | __be32 | 4932 | __be32 |
4907 | nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 4933 | nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
4908 | struct nfsd4_free_stateid *free_stateid) | 4934 | struct nfsd4_free_stateid *free_stateid) |
@@ -4910,7 +4936,6 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4910 | stateid_t *stateid = &free_stateid->fr_stateid; | 4936 | stateid_t *stateid = &free_stateid->fr_stateid; |
4911 | struct nfs4_stid *s; | 4937 | struct nfs4_stid *s; |
4912 | struct nfs4_delegation *dp; | 4938 | struct nfs4_delegation *dp; |
4913 | struct nfs4_ol_stateid *stp; | ||
4914 | struct nfs4_client *cl = cstate->session->se_client; | 4939 | struct nfs4_client *cl = cstate->session->se_client; |
4915 | __be32 ret = nfserr_bad_stateid; | 4940 | __be32 ret = nfserr_bad_stateid; |
4916 | 4941 | ||
@@ -4929,18 +4954,9 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4929 | ret = nfserr_locks_held; | 4954 | ret = nfserr_locks_held; |
4930 | break; | 4955 | break; |
4931 | case NFS4_LOCK_STID: | 4956 | case NFS4_LOCK_STID: |
4932 | ret = check_stateid_generation(stateid, &s->sc_stateid, 1); | 4957 | atomic_inc(&s->sc_count); |
4933 | if (ret) | ||
4934 | break; | ||
4935 | stp = openlockstateid(s); | ||
4936 | ret = nfserr_locks_held; | ||
4937 | if (check_for_locks(stp->st_stid.sc_file, | ||
4938 | lockowner(stp->st_stateowner))) | ||
4939 | break; | ||
4940 | WARN_ON(!unhash_lock_stateid(stp)); | ||
4941 | spin_unlock(&cl->cl_lock); | 4958 | spin_unlock(&cl->cl_lock); |
4942 | nfs4_put_stid(s); | 4959 | ret = nfsd4_free_lock_stateid(stateid, s); |
4943 | ret = nfs_ok; | ||
4944 | goto out; | 4960 | goto out; |
4945 | case NFS4_REVOKED_DELEG_STID: | 4961 | case NFS4_REVOKED_DELEG_STID: |
4946 | dp = delegstateid(s); | 4962 | dp = delegstateid(s); |
@@ -5507,7 +5523,7 @@ static __be32 | |||
5507 | lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, | 5523 | lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, |
5508 | struct nfs4_ol_stateid *ost, | 5524 | struct nfs4_ol_stateid *ost, |
5509 | struct nfsd4_lock *lock, | 5525 | struct nfsd4_lock *lock, |
5510 | struct nfs4_ol_stateid **lst, bool *new) | 5526 | struct nfs4_ol_stateid **plst, bool *new) |
5511 | { | 5527 | { |
5512 | __be32 status; | 5528 | __be32 status; |
5513 | struct nfs4_file *fi = ost->st_stid.sc_file; | 5529 | struct nfs4_file *fi = ost->st_stid.sc_file; |
@@ -5515,7 +5531,9 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, | |||
5515 | struct nfs4_client *cl = oo->oo_owner.so_client; | 5531 | struct nfs4_client *cl = oo->oo_owner.so_client; |
5516 | struct inode *inode = d_inode(cstate->current_fh.fh_dentry); | 5532 | struct inode *inode = d_inode(cstate->current_fh.fh_dentry); |
5517 | struct nfs4_lockowner *lo; | 5533 | struct nfs4_lockowner *lo; |
5534 | struct nfs4_ol_stateid *lst; | ||
5518 | unsigned int strhashval; | 5535 | unsigned int strhashval; |
5536 | bool hashed; | ||
5519 | 5537 | ||
5520 | lo = find_lockowner_str(cl, &lock->lk_new_owner); | 5538 | lo = find_lockowner_str(cl, &lock->lk_new_owner); |
5521 | if (!lo) { | 5539 | if (!lo) { |
@@ -5531,12 +5549,27 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, | |||
5531 | goto out; | 5549 | goto out; |
5532 | } | 5550 | } |
5533 | 5551 | ||
5534 | *lst = find_or_create_lock_stateid(lo, fi, inode, ost, new); | 5552 | retry: |
5535 | if (*lst == NULL) { | 5553 | lst = find_or_create_lock_stateid(lo, fi, inode, ost, new); |
5554 | if (lst == NULL) { | ||
5536 | status = nfserr_jukebox; | 5555 | status = nfserr_jukebox; |
5537 | goto out; | 5556 | goto out; |
5538 | } | 5557 | } |
5558 | |||
5559 | mutex_lock(&lst->st_mutex); | ||
5560 | |||
5561 | /* See if it's still hashed to avoid race with FREE_STATEID */ | ||
5562 | spin_lock(&cl->cl_lock); | ||
5563 | hashed = !list_empty(&lst->st_perfile); | ||
5564 | spin_unlock(&cl->cl_lock); | ||
5565 | |||
5566 | if (!hashed) { | ||
5567 | mutex_unlock(&lst->st_mutex); | ||
5568 | nfs4_put_stid(&lst->st_stid); | ||
5569 | goto retry; | ||
5570 | } | ||
5539 | status = nfs_ok; | 5571 | status = nfs_ok; |
5572 | *plst = lst; | ||
5540 | out: | 5573 | out: |
5541 | nfs4_put_stateowner(&lo->lo_owner); | 5574 | nfs4_put_stateowner(&lo->lo_owner); |
5542 | return status; | 5575 | return status; |
@@ -5603,8 +5636,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
5603 | goto out; | 5636 | goto out; |
5604 | status = lookup_or_create_lock_state(cstate, open_stp, lock, | 5637 | status = lookup_or_create_lock_state(cstate, open_stp, lock, |
5605 | &lock_stp, &new); | 5638 | &lock_stp, &new); |
5606 | if (status == nfs_ok) | ||
5607 | mutex_lock(&lock_stp->st_mutex); | ||
5608 | } else { | 5639 | } else { |
5609 | status = nfs4_preprocess_seqid_op(cstate, | 5640 | status = nfs4_preprocess_seqid_op(cstate, |
5610 | lock->lk_old_lock_seqid, | 5641 | lock->lk_old_lock_seqid, |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ba944123167b..ff476e654b8f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -1252,10 +1252,13 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1252 | if (IS_ERR(dchild)) | 1252 | if (IS_ERR(dchild)) |
1253 | return nfserrno(host_err); | 1253 | return nfserrno(host_err); |
1254 | err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); | 1254 | err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); |
1255 | if (err) { | 1255 | /* |
1256 | dput(dchild); | 1256 | * We unconditionally drop our ref to dchild as fh_compose will have |
1257 | * already grabbed its own ref for it. | ||
1258 | */ | ||
1259 | dput(dchild); | ||
1260 | if (err) | ||
1257 | return err; | 1261 | return err; |
1258 | } | ||
1259 | return nfsd_create_locked(rqstp, fhp, fname, flen, iap, type, | 1262 | return nfsd_create_locked(rqstp, fhp, fname, flen, iap, type, |
1260 | rdev, resfhp); | 1263 | rdev, resfhp); |
1261 | } | 1264 | } |
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index d2f97ecca6a5..e0e5f7c3c99f 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c | |||
@@ -67,18 +67,7 @@ static int fanotify_get_response(struct fsnotify_group *group, | |||
67 | 67 | ||
68 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); | 68 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); |
69 | 69 | ||
70 | wait_event(group->fanotify_data.access_waitq, event->response || | 70 | wait_event(group->fanotify_data.access_waitq, event->response); |
71 | atomic_read(&group->fanotify_data.bypass_perm)); | ||
72 | |||
73 | if (!event->response) { /* bypass_perm set */ | ||
74 | /* | ||
75 | * Event was canceled because group is being destroyed. Remove | ||
76 | * it from group's event list because we are responsible for | ||
77 | * freeing the permission event. | ||
78 | */ | ||
79 | fsnotify_remove_event(group, &event->fae.fse); | ||
80 | return 0; | ||
81 | } | ||
82 | 71 | ||
83 | /* userspace responded, convert to something usable */ | 72 | /* userspace responded, convert to something usable */ |
84 | switch (event->response) { | 73 | switch (event->response) { |
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 8e8e6bcd1d43..a64313868d3a 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c | |||
@@ -358,16 +358,20 @@ static int fanotify_release(struct inode *ignored, struct file *file) | |||
358 | 358 | ||
359 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | 359 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
360 | struct fanotify_perm_event_info *event, *next; | 360 | struct fanotify_perm_event_info *event, *next; |
361 | struct fsnotify_event *fsn_event; | ||
361 | 362 | ||
362 | /* | 363 | /* |
363 | * There may be still new events arriving in the notification queue | 364 | * Stop new events from arriving in the notification queue. since |
364 | * but since userspace cannot use fanotify fd anymore, no event can | 365 | * userspace cannot use fanotify fd anymore, no event can enter or |
365 | * enter or leave access_list by now. | 366 | * leave access_list by now either. |
366 | */ | 367 | */ |
367 | spin_lock(&group->fanotify_data.access_lock); | 368 | fsnotify_group_stop_queueing(group); |
368 | |||
369 | atomic_inc(&group->fanotify_data.bypass_perm); | ||
370 | 369 | ||
370 | /* | ||
371 | * Process all permission events on access_list and notification queue | ||
372 | * and simulate reply from userspace. | ||
373 | */ | ||
374 | spin_lock(&group->fanotify_data.access_lock); | ||
371 | list_for_each_entry_safe(event, next, &group->fanotify_data.access_list, | 375 | list_for_each_entry_safe(event, next, &group->fanotify_data.access_list, |
372 | fae.fse.list) { | 376 | fae.fse.list) { |
373 | pr_debug("%s: found group=%p event=%p\n", __func__, group, | 377 | pr_debug("%s: found group=%p event=%p\n", __func__, group, |
@@ -379,12 +383,21 @@ static int fanotify_release(struct inode *ignored, struct file *file) | |||
379 | spin_unlock(&group->fanotify_data.access_lock); | 383 | spin_unlock(&group->fanotify_data.access_lock); |
380 | 384 | ||
381 | /* | 385 | /* |
382 | * Since bypass_perm is set, newly queued events will not wait for | 386 | * Destroy all non-permission events. For permission events just |
383 | * access response. Wake up the already sleeping ones now. | 387 | * dequeue them and set the response. They will be freed once the |
384 | * synchronize_srcu() in fsnotify_destroy_group() will wait for all | 388 | * response is consumed and fanotify_get_response() returns. |
385 | * processes sleeping in fanotify_handle_event() waiting for access | ||
386 | * response and thus also for all permission events to be freed. | ||
387 | */ | 389 | */ |
390 | mutex_lock(&group->notification_mutex); | ||
391 | while (!fsnotify_notify_queue_is_empty(group)) { | ||
392 | fsn_event = fsnotify_remove_first_event(group); | ||
393 | if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS)) | ||
394 | fsnotify_destroy_event(group, fsn_event); | ||
395 | else | ||
396 | FANOTIFY_PE(fsn_event)->response = FAN_ALLOW; | ||
397 | } | ||
398 | mutex_unlock(&group->notification_mutex); | ||
399 | |||
400 | /* Response for all permission events it set, wakeup waiters */ | ||
388 | wake_up(&group->fanotify_data.access_waitq); | 401 | wake_up(&group->fanotify_data.access_waitq); |
389 | #endif | 402 | #endif |
390 | 403 | ||
@@ -755,7 +768,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) | |||
755 | spin_lock_init(&group->fanotify_data.access_lock); | 768 | spin_lock_init(&group->fanotify_data.access_lock); |
756 | init_waitqueue_head(&group->fanotify_data.access_waitq); | 769 | init_waitqueue_head(&group->fanotify_data.access_waitq); |
757 | INIT_LIST_HEAD(&group->fanotify_data.access_list); | 770 | INIT_LIST_HEAD(&group->fanotify_data.access_list); |
758 | atomic_set(&group->fanotify_data.bypass_perm, 0); | ||
759 | #endif | 771 | #endif |
760 | switch (flags & FAN_ALL_CLASS_BITS) { | 772 | switch (flags & FAN_ALL_CLASS_BITS) { |
761 | case FAN_CLASS_NOTIF: | 773 | case FAN_CLASS_NOTIF: |
diff --git a/fs/notify/group.c b/fs/notify/group.c index 3e2dd85be5dd..b47f7cfdcaa4 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c | |||
@@ -40,6 +40,17 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group) | |||
40 | } | 40 | } |
41 | 41 | ||
42 | /* | 42 | /* |
43 | * Stop queueing new events for this group. Once this function returns | ||
44 | * fsnotify_add_event() will not add any new events to the group's queue. | ||
45 | */ | ||
46 | void fsnotify_group_stop_queueing(struct fsnotify_group *group) | ||
47 | { | ||
48 | mutex_lock(&group->notification_mutex); | ||
49 | group->shutdown = true; | ||
50 | mutex_unlock(&group->notification_mutex); | ||
51 | } | ||
52 | |||
53 | /* | ||
43 | * Trying to get rid of a group. Remove all marks, flush all events and release | 54 | * Trying to get rid of a group. Remove all marks, flush all events and release |
44 | * the group reference. | 55 | * the group reference. |
45 | * Note that another thread calling fsnotify_clear_marks_by_group() may still | 56 | * Note that another thread calling fsnotify_clear_marks_by_group() may still |
@@ -47,6 +58,14 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group) | |||
47 | */ | 58 | */ |
48 | void fsnotify_destroy_group(struct fsnotify_group *group) | 59 | void fsnotify_destroy_group(struct fsnotify_group *group) |
49 | { | 60 | { |
61 | /* | ||
62 | * Stop queueing new events. The code below is careful enough to not | ||
63 | * require this but fanotify needs to stop queuing events even before | ||
64 | * fsnotify_destroy_group() is called and this makes the other callers | ||
65 | * of fsnotify_destroy_group() to see the same behavior. | ||
66 | */ | ||
67 | fsnotify_group_stop_queueing(group); | ||
68 | |||
50 | /* clear all inode marks for this group, attach them to destroy_list */ | 69 | /* clear all inode marks for this group, attach them to destroy_list */ |
51 | fsnotify_detach_group_marks(group); | 70 | fsnotify_detach_group_marks(group); |
52 | 71 | ||
diff --git a/fs/notify/notification.c b/fs/notify/notification.c index a95d8e037aeb..e455e83ceeeb 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c | |||
@@ -82,7 +82,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group, | |||
82 | * Add an event to the group notification queue. The group can later pull this | 82 | * Add an event to the group notification queue. The group can later pull this |
83 | * event off the queue to deal with. The function returns 0 if the event was | 83 | * event off the queue to deal with. The function returns 0 if the event was |
84 | * added to the queue, 1 if the event was merged with some other queued event, | 84 | * added to the queue, 1 if the event was merged with some other queued event, |
85 | * 2 if the queue of events has overflown. | 85 | * 2 if the event was not queued - either the queue of events has overflown |
86 | * or the group is shutting down. | ||
86 | */ | 87 | */ |
87 | int fsnotify_add_event(struct fsnotify_group *group, | 88 | int fsnotify_add_event(struct fsnotify_group *group, |
88 | struct fsnotify_event *event, | 89 | struct fsnotify_event *event, |
@@ -96,6 +97,11 @@ int fsnotify_add_event(struct fsnotify_group *group, | |||
96 | 97 | ||
97 | mutex_lock(&group->notification_mutex); | 98 | mutex_lock(&group->notification_mutex); |
98 | 99 | ||
100 | if (group->shutdown) { | ||
101 | mutex_unlock(&group->notification_mutex); | ||
102 | return 2; | ||
103 | } | ||
104 | |||
99 | if (group->q_len >= group->max_events) { | 105 | if (group->q_len >= group->max_events) { |
100 | ret = 2; | 106 | ret = 2; |
101 | /* Queue overflow event only if it isn't already queued */ | 107 | /* Queue overflow event only if it isn't already queued */ |
@@ -126,21 +132,6 @@ queue: | |||
126 | } | 132 | } |
127 | 133 | ||
128 | /* | 134 | /* |
129 | * Remove @event from group's notification queue. It is the responsibility of | ||
130 | * the caller to destroy the event. | ||
131 | */ | ||
132 | void fsnotify_remove_event(struct fsnotify_group *group, | ||
133 | struct fsnotify_event *event) | ||
134 | { | ||
135 | mutex_lock(&group->notification_mutex); | ||
136 | if (!list_empty(&event->list)) { | ||
137 | list_del_init(&event->list); | ||
138 | group->q_len--; | ||
139 | } | ||
140 | mutex_unlock(&group->notification_mutex); | ||
141 | } | ||
142 | |||
143 | /* | ||
144 | * Remove and return the first event from the notification list. It is the | 135 | * Remove and return the first event from the notification list. It is the |
145 | * responsibility of the caller to destroy the obtained event | 136 | * responsibility of the caller to destroy the obtained event |
146 | */ | 137 | */ |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 7dabbc31060e..f165f867f332 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -5922,7 +5922,6 @@ bail: | |||
5922 | } | 5922 | } |
5923 | 5923 | ||
5924 | static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, | 5924 | static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, |
5925 | handle_t *handle, | ||
5926 | struct inode *data_alloc_inode, | 5925 | struct inode *data_alloc_inode, |
5927 | struct buffer_head *data_alloc_bh) | 5926 | struct buffer_head *data_alloc_bh) |
5928 | { | 5927 | { |
@@ -5935,11 +5934,19 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, | |||
5935 | struct ocfs2_truncate_log *tl; | 5934 | struct ocfs2_truncate_log *tl; |
5936 | struct inode *tl_inode = osb->osb_tl_inode; | 5935 | struct inode *tl_inode = osb->osb_tl_inode; |
5937 | struct buffer_head *tl_bh = osb->osb_tl_bh; | 5936 | struct buffer_head *tl_bh = osb->osb_tl_bh; |
5937 | handle_t *handle; | ||
5938 | 5938 | ||
5939 | di = (struct ocfs2_dinode *) tl_bh->b_data; | 5939 | di = (struct ocfs2_dinode *) tl_bh->b_data; |
5940 | tl = &di->id2.i_dealloc; | 5940 | tl = &di->id2.i_dealloc; |
5941 | i = le16_to_cpu(tl->tl_used) - 1; | 5941 | i = le16_to_cpu(tl->tl_used) - 1; |
5942 | while (i >= 0) { | 5942 | while (i >= 0) { |
5943 | handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC); | ||
5944 | if (IS_ERR(handle)) { | ||
5945 | status = PTR_ERR(handle); | ||
5946 | mlog_errno(status); | ||
5947 | goto bail; | ||
5948 | } | ||
5949 | |||
5943 | /* Caller has given us at least enough credits to | 5950 | /* Caller has given us at least enough credits to |
5944 | * update the truncate log dinode */ | 5951 | * update the truncate log dinode */ |
5945 | status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh, | 5952 | status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh, |
@@ -5974,12 +5981,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, | |||
5974 | } | 5981 | } |
5975 | } | 5982 | } |
5976 | 5983 | ||
5977 | status = ocfs2_extend_trans(handle, | 5984 | ocfs2_commit_trans(osb, handle); |
5978 | OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC); | ||
5979 | if (status < 0) { | ||
5980 | mlog_errno(status); | ||
5981 | goto bail; | ||
5982 | } | ||
5983 | i--; | 5985 | i--; |
5984 | } | 5986 | } |
5985 | 5987 | ||
@@ -5994,7 +5996,6 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) | |||
5994 | { | 5996 | { |
5995 | int status; | 5997 | int status; |
5996 | unsigned int num_to_flush; | 5998 | unsigned int num_to_flush; |
5997 | handle_t *handle; | ||
5998 | struct inode *tl_inode = osb->osb_tl_inode; | 5999 | struct inode *tl_inode = osb->osb_tl_inode; |
5999 | struct inode *data_alloc_inode = NULL; | 6000 | struct inode *data_alloc_inode = NULL; |
6000 | struct buffer_head *tl_bh = osb->osb_tl_bh; | 6001 | struct buffer_head *tl_bh = osb->osb_tl_bh; |
@@ -6038,21 +6039,11 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) | |||
6038 | goto out_mutex; | 6039 | goto out_mutex; |
6039 | } | 6040 | } |
6040 | 6041 | ||
6041 | handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC); | 6042 | status = ocfs2_replay_truncate_records(osb, data_alloc_inode, |
6042 | if (IS_ERR(handle)) { | ||
6043 | status = PTR_ERR(handle); | ||
6044 | mlog_errno(status); | ||
6045 | goto out_unlock; | ||
6046 | } | ||
6047 | |||
6048 | status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode, | ||
6049 | data_alloc_bh); | 6043 | data_alloc_bh); |
6050 | if (status < 0) | 6044 | if (status < 0) |
6051 | mlog_errno(status); | 6045 | mlog_errno(status); |
6052 | 6046 | ||
6053 | ocfs2_commit_trans(osb, handle); | ||
6054 | |||
6055 | out_unlock: | ||
6056 | brelse(data_alloc_bh); | 6047 | brelse(data_alloc_bh); |
6057 | ocfs2_inode_unlock(data_alloc_inode, 1); | 6048 | ocfs2_inode_unlock(data_alloc_inode, 1); |
6058 | 6049 | ||
@@ -6413,43 +6404,34 @@ static int ocfs2_free_cached_blocks(struct ocfs2_super *osb, | |||
6413 | goto out_mutex; | 6404 | goto out_mutex; |
6414 | } | 6405 | } |
6415 | 6406 | ||
6416 | handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); | ||
6417 | if (IS_ERR(handle)) { | ||
6418 | ret = PTR_ERR(handle); | ||
6419 | mlog_errno(ret); | ||
6420 | goto out_unlock; | ||
6421 | } | ||
6422 | |||
6423 | while (head) { | 6407 | while (head) { |
6424 | if (head->free_bg) | 6408 | if (head->free_bg) |
6425 | bg_blkno = head->free_bg; | 6409 | bg_blkno = head->free_bg; |
6426 | else | 6410 | else |
6427 | bg_blkno = ocfs2_which_suballoc_group(head->free_blk, | 6411 | bg_blkno = ocfs2_which_suballoc_group(head->free_blk, |
6428 | head->free_bit); | 6412 | head->free_bit); |
6413 | handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); | ||
6414 | if (IS_ERR(handle)) { | ||
6415 | ret = PTR_ERR(handle); | ||
6416 | mlog_errno(ret); | ||
6417 | goto out_unlock; | ||
6418 | } | ||
6419 | |||
6429 | trace_ocfs2_free_cached_blocks( | 6420 | trace_ocfs2_free_cached_blocks( |
6430 | (unsigned long long)head->free_blk, head->free_bit); | 6421 | (unsigned long long)head->free_blk, head->free_bit); |
6431 | 6422 | ||
6432 | ret = ocfs2_free_suballoc_bits(handle, inode, di_bh, | 6423 | ret = ocfs2_free_suballoc_bits(handle, inode, di_bh, |
6433 | head->free_bit, bg_blkno, 1); | 6424 | head->free_bit, bg_blkno, 1); |
6434 | if (ret) { | 6425 | if (ret) |
6435 | mlog_errno(ret); | 6426 | mlog_errno(ret); |
6436 | goto out_journal; | ||
6437 | } | ||
6438 | 6427 | ||
6439 | ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE); | 6428 | ocfs2_commit_trans(osb, handle); |
6440 | if (ret) { | ||
6441 | mlog_errno(ret); | ||
6442 | goto out_journal; | ||
6443 | } | ||
6444 | 6429 | ||
6445 | tmp = head; | 6430 | tmp = head; |
6446 | head = head->free_next; | 6431 | head = head->free_next; |
6447 | kfree(tmp); | 6432 | kfree(tmp); |
6448 | } | 6433 | } |
6449 | 6434 | ||
6450 | out_journal: | ||
6451 | ocfs2_commit_trans(osb, handle); | ||
6452 | |||
6453 | out_unlock: | 6435 | out_unlock: |
6454 | ocfs2_inode_unlock(inode, 1); | 6436 | ocfs2_inode_unlock(inode, 1); |
6455 | brelse(di_bh); | 6437 | brelse(di_bh); |
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 94b18369b1cc..b95e7df5b76a 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
@@ -44,9 +44,6 @@ | |||
44 | * version here in tcp_internal.h should not need to be bumped for | 44 | * version here in tcp_internal.h should not need to be bumped for |
45 | * filesystem locking changes. | 45 | * filesystem locking changes. |
46 | * | 46 | * |
47 | * New in version 12 | ||
48 | * - Negotiate hb timeout when storage is down. | ||
49 | * | ||
50 | * New in version 11 | 47 | * New in version 11 |
51 | * - Negotiation of filesystem locking in the dlm join. | 48 | * - Negotiation of filesystem locking in the dlm join. |
52 | * | 49 | * |
@@ -78,7 +75,7 @@ | |||
78 | * - full 64 bit i_size in the metadata lock lvbs | 75 | * - full 64 bit i_size in the metadata lock lvbs |
79 | * - introduction of "rw" lock and pushing meta/data locking down | 76 | * - introduction of "rw" lock and pushing meta/data locking down |
80 | */ | 77 | */ |
81 | #define O2NET_PROTOCOL_VERSION 12ULL | 78 | #define O2NET_PROTOCOL_VERSION 11ULL |
82 | struct o2net_handshake { | 79 | struct o2net_handshake { |
83 | __be64 protocol_version; | 80 | __be64 protocol_version; |
84 | __be64 connector_id; | 81 | __be64 connector_id; |
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index cdeafb4e7ed6..0bb128659d4b 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c | |||
@@ -268,7 +268,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, | |||
268 | struct dlm_lock *lock, int flags, int type) | 268 | struct dlm_lock *lock, int flags, int type) |
269 | { | 269 | { |
270 | enum dlm_status status; | 270 | enum dlm_status status; |
271 | u8 old_owner = res->owner; | ||
272 | 271 | ||
273 | mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type, | 272 | mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type, |
274 | lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS); | 273 | lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS); |
@@ -335,7 +334,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, | |||
335 | 334 | ||
336 | spin_lock(&res->spinlock); | 335 | spin_lock(&res->spinlock); |
337 | res->state &= ~DLM_LOCK_RES_IN_PROGRESS; | 336 | res->state &= ~DLM_LOCK_RES_IN_PROGRESS; |
338 | lock->convert_pending = 0; | ||
339 | /* if it failed, move it back to granted queue. | 337 | /* if it failed, move it back to granted queue. |
340 | * if master returns DLM_NORMAL and then down before sending ast, | 338 | * if master returns DLM_NORMAL and then down before sending ast, |
341 | * it may have already been moved to granted queue, reset to | 339 | * it may have already been moved to granted queue, reset to |
@@ -344,12 +342,14 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, | |||
344 | if (status != DLM_NOTQUEUED) | 342 | if (status != DLM_NOTQUEUED) |
345 | dlm_error(status); | 343 | dlm_error(status); |
346 | dlm_revert_pending_convert(res, lock); | 344 | dlm_revert_pending_convert(res, lock); |
347 | } else if ((res->state & DLM_LOCK_RES_RECOVERING) || | 345 | } else if (!lock->convert_pending) { |
348 | (old_owner != res->owner)) { | 346 | mlog(0, "%s: res %.*s, owner died and lock has been moved back " |
349 | mlog(0, "res %.*s is in recovering or has been recovered.\n", | 347 | "to granted list, retry convert.\n", |
350 | res->lockname.len, res->lockname.name); | 348 | dlm->name, res->lockname.len, res->lockname.name); |
351 | status = DLM_RECOVERING; | 349 | status = DLM_RECOVERING; |
352 | } | 350 | } |
351 | |||
352 | lock->convert_pending = 0; | ||
353 | bail: | 353 | bail: |
354 | spin_unlock(&res->spinlock); | 354 | spin_unlock(&res->spinlock); |
355 | 355 | ||
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 4e7b0dc22450..0b055bfb8e86 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -1506,7 +1506,8 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, | |||
1506 | u64 start, u64 len) | 1506 | u64 start, u64 len) |
1507 | { | 1507 | { |
1508 | int ret = 0; | 1508 | int ret = 0; |
1509 | u64 tmpend, end = start + len; | 1509 | u64 tmpend = 0; |
1510 | u64 end = start + len; | ||
1510 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1511 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1511 | unsigned int csize = osb->s_clustersize; | 1512 | unsigned int csize = osb->s_clustersize; |
1512 | handle_t *handle; | 1513 | handle_t *handle; |
@@ -1538,18 +1539,31 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, | |||
1538 | } | 1539 | } |
1539 | 1540 | ||
1540 | /* | 1541 | /* |
1541 | * We want to get the byte offset of the end of the 1st cluster. | 1542 | * If start is on a cluster boundary and end is somewhere in another |
1543 | * cluster, we have not COWed the cluster starting at start, unless | ||
1544 | * end is also within the same cluster. So, in this case, we skip this | ||
1545 | * first call to ocfs2_zero_range_for_truncate() truncate and move on | ||
1546 | * to the next one. | ||
1542 | */ | 1547 | */ |
1543 | tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1)); | 1548 | if ((start & (csize - 1)) != 0) { |
1544 | if (tmpend > end) | 1549 | /* |
1545 | tmpend = end; | 1550 | * We want to get the byte offset of the end of the 1st |
1551 | * cluster. | ||
1552 | */ | ||
1553 | tmpend = (u64)osb->s_clustersize + | ||
1554 | (start & ~(osb->s_clustersize - 1)); | ||
1555 | if (tmpend > end) | ||
1556 | tmpend = end; | ||
1546 | 1557 | ||
1547 | trace_ocfs2_zero_partial_clusters_range1((unsigned long long)start, | 1558 | trace_ocfs2_zero_partial_clusters_range1( |
1548 | (unsigned long long)tmpend); | 1559 | (unsigned long long)start, |
1560 | (unsigned long long)tmpend); | ||
1549 | 1561 | ||
1550 | ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend); | 1562 | ret = ocfs2_zero_range_for_truncate(inode, handle, start, |
1551 | if (ret) | 1563 | tmpend); |
1552 | mlog_errno(ret); | 1564 | if (ret) |
1565 | mlog_errno(ret); | ||
1566 | } | ||
1553 | 1567 | ||
1554 | if (tmpend < end) { | 1568 | if (tmpend < end) { |
1555 | /* | 1569 | /* |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index ea47120a85ff..6ad3533940ba 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -1199,14 +1199,24 @@ retry: | |||
1199 | inode_unlock((*ac)->ac_inode); | 1199 | inode_unlock((*ac)->ac_inode); |
1200 | 1200 | ||
1201 | ret = ocfs2_try_to_free_truncate_log(osb, bits_wanted); | 1201 | ret = ocfs2_try_to_free_truncate_log(osb, bits_wanted); |
1202 | if (ret == 1) | 1202 | if (ret == 1) { |
1203 | iput((*ac)->ac_inode); | ||
1204 | (*ac)->ac_inode = NULL; | ||
1203 | goto retry; | 1205 | goto retry; |
1206 | } | ||
1204 | 1207 | ||
1205 | if (ret < 0) | 1208 | if (ret < 0) |
1206 | mlog_errno(ret); | 1209 | mlog_errno(ret); |
1207 | 1210 | ||
1208 | inode_lock((*ac)->ac_inode); | 1211 | inode_lock((*ac)->ac_inode); |
1209 | ocfs2_inode_lock((*ac)->ac_inode, NULL, 1); | 1212 | ret = ocfs2_inode_lock((*ac)->ac_inode, NULL, 1); |
1213 | if (ret < 0) { | ||
1214 | mlog_errno(ret); | ||
1215 | inode_unlock((*ac)->ac_inode); | ||
1216 | iput((*ac)->ac_inode); | ||
1217 | (*ac)->ac_inode = NULL; | ||
1218 | goto bail; | ||
1219 | } | ||
1210 | } | 1220 | } |
1211 | if (status < 0) { | 1221 | if (status < 0) { |
1212 | if (status != -ENOSPC) | 1222 | if (status != -ENOSPC) |
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 54e5d6681786..43fdc2765aea 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c | |||
@@ -80,6 +80,8 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new) | |||
80 | } | 80 | } |
81 | 81 | ||
82 | for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { | 82 | for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { |
83 | if (ovl_is_private_xattr(name)) | ||
84 | continue; | ||
83 | retry: | 85 | retry: |
84 | size = vfs_getxattr(old, name, value, value_size); | 86 | size = vfs_getxattr(old, name, value, value_size); |
85 | if (size == -ERANGE) | 87 | if (size == -ERANGE) |
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 12bcd07b9e32..1560fdc09a5f 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c | |||
@@ -12,6 +12,8 @@ | |||
12 | #include <linux/xattr.h> | 12 | #include <linux/xattr.h> |
13 | #include <linux/security.h> | 13 | #include <linux/security.h> |
14 | #include <linux/cred.h> | 14 | #include <linux/cred.h> |
15 | #include <linux/posix_acl.h> | ||
16 | #include <linux/posix_acl_xattr.h> | ||
15 | #include "overlayfs.h" | 17 | #include "overlayfs.h" |
16 | 18 | ||
17 | void ovl_cleanup(struct inode *wdir, struct dentry *wdentry) | 19 | void ovl_cleanup(struct inode *wdir, struct dentry *wdentry) |
@@ -186,6 +188,9 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode, | |||
186 | struct dentry *newdentry; | 188 | struct dentry *newdentry; |
187 | int err; | 189 | int err; |
188 | 190 | ||
191 | if (!hardlink && !IS_POSIXACL(udir)) | ||
192 | stat->mode &= ~current_umask(); | ||
193 | |||
189 | inode_lock_nested(udir, I_MUTEX_PARENT); | 194 | inode_lock_nested(udir, I_MUTEX_PARENT); |
190 | newdentry = lookup_one_len(dentry->d_name.name, upperdir, | 195 | newdentry = lookup_one_len(dentry->d_name.name, upperdir, |
191 | dentry->d_name.len); | 196 | dentry->d_name.len); |
@@ -335,6 +340,32 @@ out_free: | |||
335 | return ret; | 340 | return ret; |
336 | } | 341 | } |
337 | 342 | ||
343 | static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name, | ||
344 | const struct posix_acl *acl) | ||
345 | { | ||
346 | void *buffer; | ||
347 | size_t size; | ||
348 | int err; | ||
349 | |||
350 | if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl) | ||
351 | return 0; | ||
352 | |||
353 | size = posix_acl_to_xattr(NULL, acl, NULL, 0); | ||
354 | buffer = kmalloc(size, GFP_KERNEL); | ||
355 | if (!buffer) | ||
356 | return -ENOMEM; | ||
357 | |||
358 | size = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); | ||
359 | err = size; | ||
360 | if (err < 0) | ||
361 | goto out_free; | ||
362 | |||
363 | err = vfs_setxattr(upperdentry, name, buffer, size, XATTR_CREATE); | ||
364 | out_free: | ||
365 | kfree(buffer); | ||
366 | return err; | ||
367 | } | ||
368 | |||
338 | static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, | 369 | static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, |
339 | struct kstat *stat, const char *link, | 370 | struct kstat *stat, const char *link, |
340 | struct dentry *hardlink) | 371 | struct dentry *hardlink) |
@@ -346,10 +377,18 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, | |||
346 | struct dentry *upper; | 377 | struct dentry *upper; |
347 | struct dentry *newdentry; | 378 | struct dentry *newdentry; |
348 | int err; | 379 | int err; |
380 | struct posix_acl *acl, *default_acl; | ||
349 | 381 | ||
350 | if (WARN_ON(!workdir)) | 382 | if (WARN_ON(!workdir)) |
351 | return -EROFS; | 383 | return -EROFS; |
352 | 384 | ||
385 | if (!hardlink) { | ||
386 | err = posix_acl_create(dentry->d_parent->d_inode, | ||
387 | &stat->mode, &default_acl, &acl); | ||
388 | if (err) | ||
389 | return err; | ||
390 | } | ||
391 | |||
353 | err = ovl_lock_rename_workdir(workdir, upperdir); | 392 | err = ovl_lock_rename_workdir(workdir, upperdir); |
354 | if (err) | 393 | if (err) |
355 | goto out; | 394 | goto out; |
@@ -384,6 +423,17 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, | |||
384 | if (err) | 423 | if (err) |
385 | goto out_cleanup; | 424 | goto out_cleanup; |
386 | } | 425 | } |
426 | if (!hardlink) { | ||
427 | err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_ACCESS, | ||
428 | acl); | ||
429 | if (err) | ||
430 | goto out_cleanup; | ||
431 | |||
432 | err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_DEFAULT, | ||
433 | default_acl); | ||
434 | if (err) | ||
435 | goto out_cleanup; | ||
436 | } | ||
387 | 437 | ||
388 | if (!hardlink && S_ISDIR(stat->mode)) { | 438 | if (!hardlink && S_ISDIR(stat->mode)) { |
389 | err = ovl_set_opaque(newdentry); | 439 | err = ovl_set_opaque(newdentry); |
@@ -410,6 +460,10 @@ out_dput: | |||
410 | out_unlock: | 460 | out_unlock: |
411 | unlock_rename(workdir, upperdir); | 461 | unlock_rename(workdir, upperdir); |
412 | out: | 462 | out: |
463 | if (!hardlink) { | ||
464 | posix_acl_release(acl); | ||
465 | posix_acl_release(default_acl); | ||
466 | } | ||
413 | return err; | 467 | return err; |
414 | 468 | ||
415 | out_cleanup: | 469 | out_cleanup: |
@@ -950,9 +1004,9 @@ const struct inode_operations ovl_dir_inode_operations = { | |||
950 | .permission = ovl_permission, | 1004 | .permission = ovl_permission, |
951 | .getattr = ovl_dir_getattr, | 1005 | .getattr = ovl_dir_getattr, |
952 | .setxattr = generic_setxattr, | 1006 | .setxattr = generic_setxattr, |
953 | .getxattr = ovl_getxattr, | 1007 | .getxattr = generic_getxattr, |
954 | .listxattr = ovl_listxattr, | 1008 | .listxattr = ovl_listxattr, |
955 | .removexattr = ovl_removexattr, | 1009 | .removexattr = generic_removexattr, |
956 | .get_acl = ovl_get_acl, | 1010 | .get_acl = ovl_get_acl, |
957 | .update_time = ovl_update_time, | 1011 | .update_time = ovl_update_time, |
958 | }; | 1012 | }; |
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 1b885c156028..c75625c1efa3 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/fs.h> | 10 | #include <linux/fs.h> |
11 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
12 | #include <linux/xattr.h> | 12 | #include <linux/xattr.h> |
13 | #include <linux/posix_acl.h> | ||
13 | #include "overlayfs.h" | 14 | #include "overlayfs.h" |
14 | 15 | ||
15 | static int ovl_copy_up_truncate(struct dentry *dentry) | 16 | static int ovl_copy_up_truncate(struct dentry *dentry) |
@@ -191,32 +192,44 @@ static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz) | |||
191 | return err; | 192 | return err; |
192 | } | 193 | } |
193 | 194 | ||
194 | static bool ovl_is_private_xattr(const char *name) | 195 | bool ovl_is_private_xattr(const char *name) |
195 | { | 196 | { |
196 | #define OVL_XATTR_PRE_NAME OVL_XATTR_PREFIX "." | 197 | return strncmp(name, OVL_XATTR_PREFIX, |
197 | return strncmp(name, OVL_XATTR_PRE_NAME, | 198 | sizeof(OVL_XATTR_PREFIX) - 1) == 0; |
198 | sizeof(OVL_XATTR_PRE_NAME) - 1) == 0; | ||
199 | } | 199 | } |
200 | 200 | ||
201 | int ovl_setxattr(struct dentry *dentry, struct inode *inode, | 201 | int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value, |
202 | const char *name, const void *value, | 202 | size_t size, int flags) |
203 | size_t size, int flags) | ||
204 | { | 203 | { |
205 | int err; | 204 | int err; |
206 | struct dentry *upperdentry; | 205 | struct path realpath; |
206 | enum ovl_path_type type = ovl_path_real(dentry, &realpath); | ||
207 | const struct cred *old_cred; | 207 | const struct cred *old_cred; |
208 | 208 | ||
209 | err = ovl_want_write(dentry); | 209 | err = ovl_want_write(dentry); |
210 | if (err) | 210 | if (err) |
211 | goto out; | 211 | goto out; |
212 | 212 | ||
213 | if (!value && !OVL_TYPE_UPPER(type)) { | ||
214 | err = vfs_getxattr(realpath.dentry, name, NULL, 0); | ||
215 | if (err < 0) | ||
216 | goto out_drop_write; | ||
217 | } | ||
218 | |||
213 | err = ovl_copy_up(dentry); | 219 | err = ovl_copy_up(dentry); |
214 | if (err) | 220 | if (err) |
215 | goto out_drop_write; | 221 | goto out_drop_write; |
216 | 222 | ||
217 | upperdentry = ovl_dentry_upper(dentry); | 223 | if (!OVL_TYPE_UPPER(type)) |
224 | ovl_path_upper(dentry, &realpath); | ||
225 | |||
218 | old_cred = ovl_override_creds(dentry->d_sb); | 226 | old_cred = ovl_override_creds(dentry->d_sb); |
219 | err = vfs_setxattr(upperdentry, name, value, size, flags); | 227 | if (value) |
228 | err = vfs_setxattr(realpath.dentry, name, value, size, flags); | ||
229 | else { | ||
230 | WARN_ON(flags != XATTR_REPLACE); | ||
231 | err = vfs_removexattr(realpath.dentry, name); | ||
232 | } | ||
220 | revert_creds(old_cred); | 233 | revert_creds(old_cred); |
221 | 234 | ||
222 | out_drop_write: | 235 | out_drop_write: |
@@ -225,16 +238,13 @@ out: | |||
225 | return err; | 238 | return err; |
226 | } | 239 | } |
227 | 240 | ||
228 | ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode, | 241 | int ovl_xattr_get(struct dentry *dentry, const char *name, |
229 | const char *name, void *value, size_t size) | 242 | void *value, size_t size) |
230 | { | 243 | { |
231 | struct dentry *realdentry = ovl_dentry_real(dentry); | 244 | struct dentry *realdentry = ovl_dentry_real(dentry); |
232 | ssize_t res; | 245 | ssize_t res; |
233 | const struct cred *old_cred; | 246 | const struct cred *old_cred; |
234 | 247 | ||
235 | if (ovl_is_private_xattr(name)) | ||
236 | return -ENODATA; | ||
237 | |||
238 | old_cred = ovl_override_creds(dentry->d_sb); | 248 | old_cred = ovl_override_creds(dentry->d_sb); |
239 | res = vfs_getxattr(realdentry, name, value, size); | 249 | res = vfs_getxattr(realdentry, name, value, size); |
240 | revert_creds(old_cred); | 250 | revert_creds(old_cred); |
@@ -245,7 +255,8 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) | |||
245 | { | 255 | { |
246 | struct dentry *realdentry = ovl_dentry_real(dentry); | 256 | struct dentry *realdentry = ovl_dentry_real(dentry); |
247 | ssize_t res; | 257 | ssize_t res; |
248 | int off; | 258 | size_t len; |
259 | char *s; | ||
249 | const struct cred *old_cred; | 260 | const struct cred *old_cred; |
250 | 261 | ||
251 | old_cred = ovl_override_creds(dentry->d_sb); | 262 | old_cred = ovl_override_creds(dentry->d_sb); |
@@ -255,73 +266,39 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) | |||
255 | return res; | 266 | return res; |
256 | 267 | ||
257 | /* filter out private xattrs */ | 268 | /* filter out private xattrs */ |
258 | for (off = 0; off < res;) { | 269 | for (s = list, len = res; len;) { |
259 | char *s = list + off; | 270 | size_t slen = strnlen(s, len) + 1; |
260 | size_t slen = strlen(s) + 1; | ||
261 | 271 | ||
262 | BUG_ON(off + slen > res); | 272 | /* underlying fs providing us with an broken xattr list? */ |
273 | if (WARN_ON(slen > len)) | ||
274 | return -EIO; | ||
263 | 275 | ||
276 | len -= slen; | ||
264 | if (ovl_is_private_xattr(s)) { | 277 | if (ovl_is_private_xattr(s)) { |
265 | res -= slen; | 278 | res -= slen; |
266 | memmove(s, s + slen, res - off); | 279 | memmove(s, s + slen, len); |
267 | } else { | 280 | } else { |
268 | off += slen; | 281 | s += slen; |
269 | } | 282 | } |
270 | } | 283 | } |
271 | 284 | ||
272 | return res; | 285 | return res; |
273 | } | 286 | } |
274 | 287 | ||
275 | int ovl_removexattr(struct dentry *dentry, const char *name) | ||
276 | { | ||
277 | int err; | ||
278 | struct path realpath; | ||
279 | enum ovl_path_type type = ovl_path_real(dentry, &realpath); | ||
280 | const struct cred *old_cred; | ||
281 | |||
282 | err = ovl_want_write(dentry); | ||
283 | if (err) | ||
284 | goto out; | ||
285 | |||
286 | err = -ENODATA; | ||
287 | if (ovl_is_private_xattr(name)) | ||
288 | goto out_drop_write; | ||
289 | |||
290 | if (!OVL_TYPE_UPPER(type)) { | ||
291 | err = vfs_getxattr(realpath.dentry, name, NULL, 0); | ||
292 | if (err < 0) | ||
293 | goto out_drop_write; | ||
294 | |||
295 | err = ovl_copy_up(dentry); | ||
296 | if (err) | ||
297 | goto out_drop_write; | ||
298 | |||
299 | ovl_path_upper(dentry, &realpath); | ||
300 | } | ||
301 | |||
302 | old_cred = ovl_override_creds(dentry->d_sb); | ||
303 | err = vfs_removexattr(realpath.dentry, name); | ||
304 | revert_creds(old_cred); | ||
305 | out_drop_write: | ||
306 | ovl_drop_write(dentry); | ||
307 | out: | ||
308 | return err; | ||
309 | } | ||
310 | |||
311 | struct posix_acl *ovl_get_acl(struct inode *inode, int type) | 288 | struct posix_acl *ovl_get_acl(struct inode *inode, int type) |
312 | { | 289 | { |
313 | struct inode *realinode = ovl_inode_real(inode, NULL); | 290 | struct inode *realinode = ovl_inode_real(inode, NULL); |
314 | const struct cred *old_cred; | 291 | const struct cred *old_cred; |
315 | struct posix_acl *acl; | 292 | struct posix_acl *acl; |
316 | 293 | ||
317 | if (!IS_POSIXACL(realinode)) | 294 | if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode)) |
318 | return NULL; | 295 | return NULL; |
319 | 296 | ||
320 | if (!realinode->i_op->get_acl) | 297 | if (!realinode->i_op->get_acl) |
321 | return NULL; | 298 | return NULL; |
322 | 299 | ||
323 | old_cred = ovl_override_creds(inode->i_sb); | 300 | old_cred = ovl_override_creds(inode->i_sb); |
324 | acl = realinode->i_op->get_acl(realinode, type); | 301 | acl = get_acl(realinode, type); |
325 | revert_creds(old_cred); | 302 | revert_creds(old_cred); |
326 | 303 | ||
327 | return acl; | 304 | return acl; |
@@ -391,9 +368,9 @@ static const struct inode_operations ovl_file_inode_operations = { | |||
391 | .permission = ovl_permission, | 368 | .permission = ovl_permission, |
392 | .getattr = ovl_getattr, | 369 | .getattr = ovl_getattr, |
393 | .setxattr = generic_setxattr, | 370 | .setxattr = generic_setxattr, |
394 | .getxattr = ovl_getxattr, | 371 | .getxattr = generic_getxattr, |
395 | .listxattr = ovl_listxattr, | 372 | .listxattr = ovl_listxattr, |
396 | .removexattr = ovl_removexattr, | 373 | .removexattr = generic_removexattr, |
397 | .get_acl = ovl_get_acl, | 374 | .get_acl = ovl_get_acl, |
398 | .update_time = ovl_update_time, | 375 | .update_time = ovl_update_time, |
399 | }; | 376 | }; |
@@ -404,9 +381,9 @@ static const struct inode_operations ovl_symlink_inode_operations = { | |||
404 | .readlink = ovl_readlink, | 381 | .readlink = ovl_readlink, |
405 | .getattr = ovl_getattr, | 382 | .getattr = ovl_getattr, |
406 | .setxattr = generic_setxattr, | 383 | .setxattr = generic_setxattr, |
407 | .getxattr = ovl_getxattr, | 384 | .getxattr = generic_getxattr, |
408 | .listxattr = ovl_listxattr, | 385 | .listxattr = ovl_listxattr, |
409 | .removexattr = ovl_removexattr, | 386 | .removexattr = generic_removexattr, |
410 | .update_time = ovl_update_time, | 387 | .update_time = ovl_update_time, |
411 | }; | 388 | }; |
412 | 389 | ||
@@ -415,6 +392,9 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode) | |||
415 | inode->i_ino = get_next_ino(); | 392 | inode->i_ino = get_next_ino(); |
416 | inode->i_mode = mode; | 393 | inode->i_mode = mode; |
417 | inode->i_flags |= S_NOCMTIME; | 394 | inode->i_flags |= S_NOCMTIME; |
395 | #ifdef CONFIG_FS_POSIX_ACL | ||
396 | inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; | ||
397 | #endif | ||
418 | 398 | ||
419 | mode &= S_IFMT; | 399 | mode &= S_IFMT; |
420 | switch (mode) { | 400 | switch (mode) { |
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index e4f5c9536bfe..5813ccff8cd9 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h | |||
@@ -24,8 +24,8 @@ enum ovl_path_type { | |||
24 | (OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type)) | 24 | (OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type)) |
25 | 25 | ||
26 | 26 | ||
27 | #define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay" | 27 | #define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay." |
28 | #define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX ".opaque" | 28 | #define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque" |
29 | 29 | ||
30 | #define OVL_ISUPPER_MASK 1UL | 30 | #define OVL_ISUPPER_MASK 1UL |
31 | 31 | ||
@@ -179,20 +179,21 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list); | |||
179 | void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list); | 179 | void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list); |
180 | void ovl_cache_free(struct list_head *list); | 180 | void ovl_cache_free(struct list_head *list); |
181 | int ovl_check_d_type_supported(struct path *realpath); | 181 | int ovl_check_d_type_supported(struct path *realpath); |
182 | void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, | ||
183 | struct dentry *dentry, int level); | ||
182 | 184 | ||
183 | /* inode.c */ | 185 | /* inode.c */ |
184 | int ovl_setattr(struct dentry *dentry, struct iattr *attr); | 186 | int ovl_setattr(struct dentry *dentry, struct iattr *attr); |
185 | int ovl_permission(struct inode *inode, int mask); | 187 | int ovl_permission(struct inode *inode, int mask); |
186 | int ovl_setxattr(struct dentry *dentry, struct inode *inode, | 188 | int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value, |
187 | const char *name, const void *value, | 189 | size_t size, int flags); |
188 | size_t size, int flags); | 190 | int ovl_xattr_get(struct dentry *dentry, const char *name, |
189 | ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode, | 191 | void *value, size_t size); |
190 | const char *name, void *value, size_t size); | ||
191 | ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); | 192 | ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); |
192 | int ovl_removexattr(struct dentry *dentry, const char *name); | ||
193 | struct posix_acl *ovl_get_acl(struct inode *inode, int type); | 193 | struct posix_acl *ovl_get_acl(struct inode *inode, int type); |
194 | int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags); | 194 | int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags); |
195 | int ovl_update_time(struct inode *inode, struct timespec *ts, int flags); | 195 | int ovl_update_time(struct inode *inode, struct timespec *ts, int flags); |
196 | bool ovl_is_private_xattr(const char *name); | ||
196 | 197 | ||
197 | struct inode *ovl_new_inode(struct super_block *sb, umode_t mode); | 198 | struct inode *ovl_new_inode(struct super_block *sb, umode_t mode); |
198 | struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode); | 199 | struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode); |
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index cf37fc76fc9f..f241b4ee3d8a 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c | |||
@@ -248,7 +248,7 @@ static inline int ovl_dir_read(struct path *realpath, | |||
248 | err = rdd->err; | 248 | err = rdd->err; |
249 | } while (!err && rdd->count); | 249 | } while (!err && rdd->count); |
250 | 250 | ||
251 | if (!err && rdd->first_maybe_whiteout) | 251 | if (!err && rdd->first_maybe_whiteout && rdd->dentry) |
252 | err = ovl_check_whiteouts(realpath->dentry, rdd); | 252 | err = ovl_check_whiteouts(realpath->dentry, rdd); |
253 | 253 | ||
254 | fput(realfile); | 254 | fput(realfile); |
@@ -606,3 +606,64 @@ int ovl_check_d_type_supported(struct path *realpath) | |||
606 | 606 | ||
607 | return rdd.d_type_supported; | 607 | return rdd.d_type_supported; |
608 | } | 608 | } |
609 | |||
610 | static void ovl_workdir_cleanup_recurse(struct path *path, int level) | ||
611 | { | ||
612 | int err; | ||
613 | struct inode *dir = path->dentry->d_inode; | ||
614 | LIST_HEAD(list); | ||
615 | struct ovl_cache_entry *p; | ||
616 | struct ovl_readdir_data rdd = { | ||
617 | .ctx.actor = ovl_fill_merge, | ||
618 | .dentry = NULL, | ||
619 | .list = &list, | ||
620 | .root = RB_ROOT, | ||
621 | .is_lowest = false, | ||
622 | }; | ||
623 | |||
624 | err = ovl_dir_read(path, &rdd); | ||
625 | if (err) | ||
626 | goto out; | ||
627 | |||
628 | inode_lock_nested(dir, I_MUTEX_PARENT); | ||
629 | list_for_each_entry(p, &list, l_node) { | ||
630 | struct dentry *dentry; | ||
631 | |||
632 | if (p->name[0] == '.') { | ||
633 | if (p->len == 1) | ||
634 | continue; | ||
635 | if (p->len == 2 && p->name[1] == '.') | ||
636 | continue; | ||
637 | } | ||
638 | dentry = lookup_one_len(p->name, path->dentry, p->len); | ||
639 | if (IS_ERR(dentry)) | ||
640 | continue; | ||
641 | if (dentry->d_inode) | ||
642 | ovl_workdir_cleanup(dir, path->mnt, dentry, level); | ||
643 | dput(dentry); | ||
644 | } | ||
645 | inode_unlock(dir); | ||
646 | out: | ||
647 | ovl_cache_free(&list); | ||
648 | } | ||
649 | |||
650 | void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, | ||
651 | struct dentry *dentry, int level) | ||
652 | { | ||
653 | int err; | ||
654 | |||
655 | if (!d_is_dir(dentry) || level > 1) { | ||
656 | ovl_cleanup(dir, dentry); | ||
657 | return; | ||
658 | } | ||
659 | |||
660 | err = ovl_do_rmdir(dir, dentry); | ||
661 | if (err) { | ||
662 | struct path path = { .mnt = mnt, .dentry = dentry }; | ||
663 | |||
664 | inode_unlock(dir); | ||
665 | ovl_workdir_cleanup_recurse(&path, level + 1); | ||
666 | inode_lock_nested(dir, I_MUTEX_PARENT); | ||
667 | ovl_cleanup(dir, dentry); | ||
668 | } | ||
669 | } | ||
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 4036132842b5..e2a94a26767b 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c | |||
@@ -814,6 +814,10 @@ retry: | |||
814 | struct kstat stat = { | 814 | struct kstat stat = { |
815 | .mode = S_IFDIR | 0, | 815 | .mode = S_IFDIR | 0, |
816 | }; | 816 | }; |
817 | struct iattr attr = { | ||
818 | .ia_valid = ATTR_MODE, | ||
819 | .ia_mode = stat.mode, | ||
820 | }; | ||
817 | 821 | ||
818 | if (work->d_inode) { | 822 | if (work->d_inode) { |
819 | err = -EEXIST; | 823 | err = -EEXIST; |
@@ -821,7 +825,7 @@ retry: | |||
821 | goto out_dput; | 825 | goto out_dput; |
822 | 826 | ||
823 | retried = true; | 827 | retried = true; |
824 | ovl_cleanup(dir, work); | 828 | ovl_workdir_cleanup(dir, mnt, work, 0); |
825 | dput(work); | 829 | dput(work); |
826 | goto retry; | 830 | goto retry; |
827 | } | 831 | } |
@@ -829,6 +833,21 @@ retry: | |||
829 | err = ovl_create_real(dir, work, &stat, NULL, NULL, true); | 833 | err = ovl_create_real(dir, work, &stat, NULL, NULL, true); |
830 | if (err) | 834 | if (err) |
831 | goto out_dput; | 835 | goto out_dput; |
836 | |||
837 | err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT); | ||
838 | if (err && err != -ENODATA && err != -EOPNOTSUPP) | ||
839 | goto out_dput; | ||
840 | |||
841 | err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS); | ||
842 | if (err && err != -ENODATA && err != -EOPNOTSUPP) | ||
843 | goto out_dput; | ||
844 | |||
845 | /* Clear any inherited mode bits */ | ||
846 | inode_lock(work->d_inode); | ||
847 | err = notify_change(work, &attr, NULL); | ||
848 | inode_unlock(work->d_inode); | ||
849 | if (err) | ||
850 | goto out_dput; | ||
832 | } | 851 | } |
833 | out_unlock: | 852 | out_unlock: |
834 | inode_unlock(dir); | 853 | inode_unlock(dir); |
@@ -967,10 +986,19 @@ static unsigned int ovl_split_lowerdirs(char *str) | |||
967 | return ctr; | 986 | return ctr; |
968 | } | 987 | } |
969 | 988 | ||
970 | static int ovl_posix_acl_xattr_set(const struct xattr_handler *handler, | 989 | static int __maybe_unused |
971 | struct dentry *dentry, struct inode *inode, | 990 | ovl_posix_acl_xattr_get(const struct xattr_handler *handler, |
972 | const char *name, const void *value, | 991 | struct dentry *dentry, struct inode *inode, |
973 | size_t size, int flags) | 992 | const char *name, void *buffer, size_t size) |
993 | { | ||
994 | return ovl_xattr_get(dentry, handler->name, buffer, size); | ||
995 | } | ||
996 | |||
997 | static int __maybe_unused | ||
998 | ovl_posix_acl_xattr_set(const struct xattr_handler *handler, | ||
999 | struct dentry *dentry, struct inode *inode, | ||
1000 | const char *name, const void *value, | ||
1001 | size_t size, int flags) | ||
974 | { | 1002 | { |
975 | struct dentry *workdir = ovl_workdir(dentry); | 1003 | struct dentry *workdir = ovl_workdir(dentry); |
976 | struct inode *realinode = ovl_inode_real(inode, NULL); | 1004 | struct inode *realinode = ovl_inode_real(inode, NULL); |
@@ -998,19 +1026,22 @@ static int ovl_posix_acl_xattr_set(const struct xattr_handler *handler, | |||
998 | 1026 | ||
999 | posix_acl_release(acl); | 1027 | posix_acl_release(acl); |
1000 | 1028 | ||
1001 | return ovl_setxattr(dentry, inode, handler->name, value, size, flags); | 1029 | err = ovl_xattr_set(dentry, handler->name, value, size, flags); |
1030 | if (!err) | ||
1031 | ovl_copyattr(ovl_inode_real(inode, NULL), inode); | ||
1032 | |||
1033 | return err; | ||
1002 | 1034 | ||
1003 | out_acl_release: | 1035 | out_acl_release: |
1004 | posix_acl_release(acl); | 1036 | posix_acl_release(acl); |
1005 | return err; | 1037 | return err; |
1006 | } | 1038 | } |
1007 | 1039 | ||
1008 | static int ovl_other_xattr_set(const struct xattr_handler *handler, | 1040 | static int ovl_own_xattr_get(const struct xattr_handler *handler, |
1009 | struct dentry *dentry, struct inode *inode, | 1041 | struct dentry *dentry, struct inode *inode, |
1010 | const char *name, const void *value, | 1042 | const char *name, void *buffer, size_t size) |
1011 | size_t size, int flags) | ||
1012 | { | 1043 | { |
1013 | return ovl_setxattr(dentry, inode, name, value, size, flags); | 1044 | return -EPERM; |
1014 | } | 1045 | } |
1015 | 1046 | ||
1016 | static int ovl_own_xattr_set(const struct xattr_handler *handler, | 1047 | static int ovl_own_xattr_set(const struct xattr_handler *handler, |
@@ -1021,42 +1052,59 @@ static int ovl_own_xattr_set(const struct xattr_handler *handler, | |||
1021 | return -EPERM; | 1052 | return -EPERM; |
1022 | } | 1053 | } |
1023 | 1054 | ||
1024 | static const struct xattr_handler ovl_posix_acl_access_xattr_handler = { | 1055 | static int ovl_other_xattr_get(const struct xattr_handler *handler, |
1056 | struct dentry *dentry, struct inode *inode, | ||
1057 | const char *name, void *buffer, size_t size) | ||
1058 | { | ||
1059 | return ovl_xattr_get(dentry, name, buffer, size); | ||
1060 | } | ||
1061 | |||
1062 | static int ovl_other_xattr_set(const struct xattr_handler *handler, | ||
1063 | struct dentry *dentry, struct inode *inode, | ||
1064 | const char *name, const void *value, | ||
1065 | size_t size, int flags) | ||
1066 | { | ||
1067 | return ovl_xattr_set(dentry, name, value, size, flags); | ||
1068 | } | ||
1069 | |||
1070 | static const struct xattr_handler __maybe_unused | ||
1071 | ovl_posix_acl_access_xattr_handler = { | ||
1025 | .name = XATTR_NAME_POSIX_ACL_ACCESS, | 1072 | .name = XATTR_NAME_POSIX_ACL_ACCESS, |
1026 | .flags = ACL_TYPE_ACCESS, | 1073 | .flags = ACL_TYPE_ACCESS, |
1074 | .get = ovl_posix_acl_xattr_get, | ||
1027 | .set = ovl_posix_acl_xattr_set, | 1075 | .set = ovl_posix_acl_xattr_set, |
1028 | }; | 1076 | }; |
1029 | 1077 | ||
1030 | static const struct xattr_handler ovl_posix_acl_default_xattr_handler = { | 1078 | static const struct xattr_handler __maybe_unused |
1079 | ovl_posix_acl_default_xattr_handler = { | ||
1031 | .name = XATTR_NAME_POSIX_ACL_DEFAULT, | 1080 | .name = XATTR_NAME_POSIX_ACL_DEFAULT, |
1032 | .flags = ACL_TYPE_DEFAULT, | 1081 | .flags = ACL_TYPE_DEFAULT, |
1082 | .get = ovl_posix_acl_xattr_get, | ||
1033 | .set = ovl_posix_acl_xattr_set, | 1083 | .set = ovl_posix_acl_xattr_set, |
1034 | }; | 1084 | }; |
1035 | 1085 | ||
1036 | static const struct xattr_handler ovl_own_xattr_handler = { | 1086 | static const struct xattr_handler ovl_own_xattr_handler = { |
1037 | .prefix = OVL_XATTR_PREFIX, | 1087 | .prefix = OVL_XATTR_PREFIX, |
1088 | .get = ovl_own_xattr_get, | ||
1038 | .set = ovl_own_xattr_set, | 1089 | .set = ovl_own_xattr_set, |
1039 | }; | 1090 | }; |
1040 | 1091 | ||
1041 | static const struct xattr_handler ovl_other_xattr_handler = { | 1092 | static const struct xattr_handler ovl_other_xattr_handler = { |
1042 | .prefix = "", /* catch all */ | 1093 | .prefix = "", /* catch all */ |
1094 | .get = ovl_other_xattr_get, | ||
1043 | .set = ovl_other_xattr_set, | 1095 | .set = ovl_other_xattr_set, |
1044 | }; | 1096 | }; |
1045 | 1097 | ||
1046 | static const struct xattr_handler *ovl_xattr_handlers[] = { | 1098 | static const struct xattr_handler *ovl_xattr_handlers[] = { |
1099 | #ifdef CONFIG_FS_POSIX_ACL | ||
1047 | &ovl_posix_acl_access_xattr_handler, | 1100 | &ovl_posix_acl_access_xattr_handler, |
1048 | &ovl_posix_acl_default_xattr_handler, | 1101 | &ovl_posix_acl_default_xattr_handler, |
1102 | #endif | ||
1049 | &ovl_own_xattr_handler, | 1103 | &ovl_own_xattr_handler, |
1050 | &ovl_other_xattr_handler, | 1104 | &ovl_other_xattr_handler, |
1051 | NULL | 1105 | NULL |
1052 | }; | 1106 | }; |
1053 | 1107 | ||
1054 | static const struct xattr_handler *ovl_xattr_noacl_handlers[] = { | ||
1055 | &ovl_own_xattr_handler, | ||
1056 | &ovl_other_xattr_handler, | ||
1057 | NULL, | ||
1058 | }; | ||
1059 | |||
1060 | static int ovl_fill_super(struct super_block *sb, void *data, int silent) | 1108 | static int ovl_fill_super(struct super_block *sb, void *data, int silent) |
1061 | { | 1109 | { |
1062 | struct path upperpath = { NULL, NULL }; | 1110 | struct path upperpath = { NULL, NULL }; |
@@ -1132,7 +1180,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) | |||
1132 | err = -EINVAL; | 1180 | err = -EINVAL; |
1133 | stacklen = ovl_split_lowerdirs(lowertmp); | 1181 | stacklen = ovl_split_lowerdirs(lowertmp); |
1134 | if (stacklen > OVL_MAX_STACK) { | 1182 | if (stacklen > OVL_MAX_STACK) { |
1135 | pr_err("overlayfs: too many lower directries, limit is %d\n", | 1183 | pr_err("overlayfs: too many lower directories, limit is %d\n", |
1136 | OVL_MAX_STACK); | 1184 | OVL_MAX_STACK); |
1137 | goto out_free_lowertmp; | 1185 | goto out_free_lowertmp; |
1138 | } else if (!ufs->config.upperdir && stacklen == 1) { | 1186 | } else if (!ufs->config.upperdir && stacklen == 1) { |
@@ -1269,10 +1317,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) | |||
1269 | 1317 | ||
1270 | sb->s_magic = OVERLAYFS_SUPER_MAGIC; | 1318 | sb->s_magic = OVERLAYFS_SUPER_MAGIC; |
1271 | sb->s_op = &ovl_super_operations; | 1319 | sb->s_op = &ovl_super_operations; |
1272 | if (IS_ENABLED(CONFIG_FS_POSIX_ACL)) | 1320 | sb->s_xattr = ovl_xattr_handlers; |
1273 | sb->s_xattr = ovl_xattr_handlers; | ||
1274 | else | ||
1275 | sb->s_xattr = ovl_xattr_noacl_handlers; | ||
1276 | sb->s_root = root_dentry; | 1321 | sb->s_root = root_dentry; |
1277 | sb->s_fs_info = ufs; | 1322 | sb->s_fs_info = ufs; |
1278 | sb->s_flags |= MS_POSIXACL; | 1323 | sb->s_flags |= MS_POSIXACL; |
@@ -144,10 +144,8 @@ static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, | |||
144 | struct page *page = buf->page; | 144 | struct page *page = buf->page; |
145 | 145 | ||
146 | if (page_count(page) == 1) { | 146 | if (page_count(page) == 1) { |
147 | if (memcg_kmem_enabled()) { | 147 | if (memcg_kmem_enabled()) |
148 | memcg_kmem_uncharge(page, 0); | 148 | memcg_kmem_uncharge(page, 0); |
149 | __ClearPageKmemcg(page); | ||
150 | } | ||
151 | __SetPageLocked(page); | 149 | __SetPageLocked(page); |
152 | return 0; | 150 | return 0; |
153 | } | 151 | } |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 54e270262979..ac0df4dde823 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -1556,18 +1556,13 @@ static const struct file_operations proc_pid_set_comm_operations = { | |||
1556 | static int proc_exe_link(struct dentry *dentry, struct path *exe_path) | 1556 | static int proc_exe_link(struct dentry *dentry, struct path *exe_path) |
1557 | { | 1557 | { |
1558 | struct task_struct *task; | 1558 | struct task_struct *task; |
1559 | struct mm_struct *mm; | ||
1560 | struct file *exe_file; | 1559 | struct file *exe_file; |
1561 | 1560 | ||
1562 | task = get_proc_task(d_inode(dentry)); | 1561 | task = get_proc_task(d_inode(dentry)); |
1563 | if (!task) | 1562 | if (!task) |
1564 | return -ENOENT; | 1563 | return -ENOENT; |
1565 | mm = get_task_mm(task); | 1564 | exe_file = get_task_exe_file(task); |
1566 | put_task_struct(task); | 1565 | put_task_struct(task); |
1567 | if (!mm) | ||
1568 | return -ENOENT; | ||
1569 | exe_file = get_mm_exe_file(mm); | ||
1570 | mmput(mm); | ||
1571 | if (exe_file) { | 1566 | if (exe_file) { |
1572 | *exe_path = exe_file->f_path; | 1567 | *exe_path = exe_file->f_path; |
1573 | path_get(&exe_file->f_path); | 1568 | path_get(&exe_file->f_path); |
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index a939f5ed7f89..5c89a07e3d7f 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c | |||
@@ -430,6 +430,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) | |||
430 | static ssize_t | 430 | static ssize_t |
431 | read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) | 431 | read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) |
432 | { | 432 | { |
433 | char *buf = file->private_data; | ||
433 | ssize_t acc = 0; | 434 | ssize_t acc = 0; |
434 | size_t size, tsz; | 435 | size_t size, tsz; |
435 | size_t elf_buflen; | 436 | size_t elf_buflen; |
@@ -500,23 +501,20 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) | |||
500 | if (clear_user(buffer, tsz)) | 501 | if (clear_user(buffer, tsz)) |
501 | return -EFAULT; | 502 | return -EFAULT; |
502 | } else if (is_vmalloc_or_module_addr((void *)start)) { | 503 | } else if (is_vmalloc_or_module_addr((void *)start)) { |
503 | char * elf_buf; | 504 | vread(buf, (char *)start, tsz); |
504 | |||
505 | elf_buf = kzalloc(tsz, GFP_KERNEL); | ||
506 | if (!elf_buf) | ||
507 | return -ENOMEM; | ||
508 | vread(elf_buf, (char *)start, tsz); | ||
509 | /* we have to zero-fill user buffer even if no read */ | 505 | /* we have to zero-fill user buffer even if no read */ |
510 | if (copy_to_user(buffer, elf_buf, tsz)) { | 506 | if (copy_to_user(buffer, buf, tsz)) |
511 | kfree(elf_buf); | ||
512 | return -EFAULT; | 507 | return -EFAULT; |
513 | } | ||
514 | kfree(elf_buf); | ||
515 | } else { | 508 | } else { |
516 | if (kern_addr_valid(start)) { | 509 | if (kern_addr_valid(start)) { |
517 | unsigned long n; | 510 | unsigned long n; |
518 | 511 | ||
519 | n = copy_to_user(buffer, (char *)start, tsz); | 512 | /* |
513 | * Using bounce buffer to bypass the | ||
514 | * hardened user copy kernel text checks. | ||
515 | */ | ||
516 | memcpy(buf, (char *) start, tsz); | ||
517 | n = copy_to_user(buffer, buf, tsz); | ||
520 | /* | 518 | /* |
521 | * We cannot distinguish between fault on source | 519 | * We cannot distinguish between fault on source |
522 | * and fault on destination. When this happens | 520 | * and fault on destination. When this happens |
@@ -549,6 +547,11 @@ static int open_kcore(struct inode *inode, struct file *filp) | |||
549 | { | 547 | { |
550 | if (!capable(CAP_SYS_RAWIO)) | 548 | if (!capable(CAP_SYS_RAWIO)) |
551 | return -EPERM; | 549 | return -EPERM; |
550 | |||
551 | filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||
552 | if (!filp->private_data) | ||
553 | return -ENOMEM; | ||
554 | |||
552 | if (kcore_need_update) | 555 | if (kcore_need_update) |
553 | kcore_update_ram(); | 556 | kcore_update_ram(); |
554 | if (i_size_read(inode) != proc_root_kcore->size) { | 557 | if (i_size_read(inode) != proc_root_kcore->size) { |
@@ -559,10 +562,16 @@ static int open_kcore(struct inode *inode, struct file *filp) | |||
559 | return 0; | 562 | return 0; |
560 | } | 563 | } |
561 | 564 | ||
565 | static int release_kcore(struct inode *inode, struct file *file) | ||
566 | { | ||
567 | kfree(file->private_data); | ||
568 | return 0; | ||
569 | } | ||
562 | 570 | ||
563 | static const struct file_operations proc_kcore_operations = { | 571 | static const struct file_operations proc_kcore_operations = { |
564 | .read = read_kcore, | 572 | .read = read_kcore, |
565 | .open = open_kcore, | 573 | .open = open_kcore, |
574 | .release = release_kcore, | ||
566 | .llseek = default_llseek, | 575 | .llseek = default_llseek, |
567 | }; | 576 | }; |
568 | 577 | ||
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 09e18fdf61e5..b9a8c813e5e6 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c | |||
@@ -46,7 +46,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) | |||
46 | cached = 0; | 46 | cached = 0; |
47 | 47 | ||
48 | for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++) | 48 | for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++) |
49 | pages[lru] = global_page_state(NR_LRU_BASE + lru); | 49 | pages[lru] = global_node_page_state(NR_LRU_BASE + lru); |
50 | 50 | ||
51 | available = si_mem_available(); | 51 | available = si_mem_available(); |
52 | 52 | ||
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 187d84ef9de9..f6fa99eca515 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -581,6 +581,8 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, | |||
581 | mss->anonymous_thp += HPAGE_PMD_SIZE; | 581 | mss->anonymous_thp += HPAGE_PMD_SIZE; |
582 | else if (PageSwapBacked(page)) | 582 | else if (PageSwapBacked(page)) |
583 | mss->shmem_thp += HPAGE_PMD_SIZE; | 583 | mss->shmem_thp += HPAGE_PMD_SIZE; |
584 | else if (is_zone_device_page(page)) | ||
585 | /* pass */; | ||
584 | else | 586 | else |
585 | VM_BUG_ON_PAGE(1, page); | 587 | VM_BUG_ON_PAGE(1, page); |
586 | smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd)); | 588 | smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd)); |
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index 183a212694bf..12af0490322f 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c | |||
@@ -27,9 +27,17 @@ | |||
27 | #include <linux/fs.h> | 27 | #include <linux/fs.h> |
28 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
29 | #include <linux/ramfs.h> | 29 | #include <linux/ramfs.h> |
30 | #include <linux/sched.h> | ||
30 | 31 | ||
31 | #include "internal.h" | 32 | #include "internal.h" |
32 | 33 | ||
34 | static unsigned long ramfs_mmu_get_unmapped_area(struct file *file, | ||
35 | unsigned long addr, unsigned long len, unsigned long pgoff, | ||
36 | unsigned long flags) | ||
37 | { | ||
38 | return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); | ||
39 | } | ||
40 | |||
33 | const struct file_operations ramfs_file_operations = { | 41 | const struct file_operations ramfs_file_operations = { |
34 | .read_iter = generic_file_read_iter, | 42 | .read_iter = generic_file_read_iter, |
35 | .write_iter = generic_file_write_iter, | 43 | .write_iter = generic_file_write_iter, |
@@ -38,6 +46,7 @@ const struct file_operations ramfs_file_operations = { | |||
38 | .splice_read = generic_file_splice_read, | 46 | .splice_read = generic_file_splice_read, |
39 | .splice_write = iter_file_splice_write, | 47 | .splice_write = iter_file_splice_write, |
40 | .llseek = generic_file_llseek, | 48 | .llseek = generic_file_llseek, |
49 | .get_unmapped_area = ramfs_mmu_get_unmapped_area, | ||
41 | }; | 50 | }; |
42 | 51 | ||
43 | const struct inode_operations ramfs_file_inode_operations = { | 52 | const struct inode_operations ramfs_file_inode_operations = { |
diff --git a/fs/seq_file.c b/fs/seq_file.c index 19f532e7d35e..6dc4296eed62 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c | |||
@@ -223,8 +223,10 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) | |||
223 | size -= n; | 223 | size -= n; |
224 | buf += n; | 224 | buf += n; |
225 | copied += n; | 225 | copied += n; |
226 | if (!m->count) | 226 | if (!m->count) { |
227 | m->from = 0; | ||
227 | m->index++; | 228 | m->index++; |
229 | } | ||
228 | if (!size) | 230 | if (!size) |
229 | goto Done; | 231 | goto Done; |
230 | } | 232 | } |
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index f35523d4fa3a..b803213d1307 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
@@ -114,9 +114,15 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf, | |||
114 | * If buf != of->prealloc_buf, we don't know how | 114 | * If buf != of->prealloc_buf, we don't know how |
115 | * large it is, so cannot safely pass it to ->show | 115 | * large it is, so cannot safely pass it to ->show |
116 | */ | 116 | */ |
117 | if (pos || WARN_ON_ONCE(buf != of->prealloc_buf)) | 117 | if (WARN_ON_ONCE(buf != of->prealloc_buf)) |
118 | return 0; | 118 | return 0; |
119 | len = ops->show(kobj, of->kn->priv, buf); | 119 | len = ops->show(kobj, of->kn->priv, buf); |
120 | if (pos) { | ||
121 | if (len <= pos) | ||
122 | return 0; | ||
123 | len -= pos; | ||
124 | memmove(buf, buf + pos, len); | ||
125 | } | ||
120 | return min(count, len); | 126 | return min(count, len); |
121 | } | 127 | } |
122 | 128 | ||
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index b45345d701e7..51157da3f76e 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c | |||
@@ -370,7 +370,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt) | |||
370 | 370 | ||
371 | p = c->gap_lebs; | 371 | p = c->gap_lebs; |
372 | do { | 372 | do { |
373 | ubifs_assert(p < c->gap_lebs + sizeof(int) * c->lst.idx_lebs); | 373 | ubifs_assert(p < c->gap_lebs + c->lst.idx_lebs); |
374 | written = layout_leb_in_gaps(c, p); | 374 | written = layout_leb_in_gaps(c, p); |
375 | if (written < 0) { | 375 | if (written < 0) { |
376 | err = written; | 376 | err = written; |
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index e237811f09ce..11a004114eba 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c | |||
@@ -575,7 +575,8 @@ static int ubifs_xattr_get(const struct xattr_handler *handler, | |||
575 | dbg_gen("xattr '%s', ino %lu ('%pd'), buf size %zd", name, | 575 | dbg_gen("xattr '%s', ino %lu ('%pd'), buf size %zd", name, |
576 | inode->i_ino, dentry, size); | 576 | inode->i_ino, dentry, size); |
577 | 577 | ||
578 | return __ubifs_getxattr(inode, name, buffer, size); | 578 | name = xattr_full_name(handler, name); |
579 | return __ubifs_getxattr(inode, name, buffer, size); | ||
579 | } | 580 | } |
580 | 581 | ||
581 | static int ubifs_xattr_set(const struct xattr_handler *handler, | 582 | static int ubifs_xattr_set(const struct xattr_handler *handler, |
@@ -586,6 +587,8 @@ static int ubifs_xattr_set(const struct xattr_handler *handler, | |||
586 | dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd", | 587 | dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd", |
587 | name, inode->i_ino, dentry, size); | 588 | name, inode->i_ino, dentry, size); |
588 | 589 | ||
590 | name = xattr_full_name(handler, name); | ||
591 | |||
589 | if (value) | 592 | if (value) |
590 | return __ubifs_setxattr(inode, name, value, size, flags); | 593 | return __ubifs_setxattr(inode, name, value, size, flags); |
591 | else | 594 | else |
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 776ae2f325d1..05b5243d89f6 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c | |||
@@ -1582,6 +1582,7 @@ xfs_alloc_ag_vextent_small( | |||
1582 | xfs_extlen_t *flenp, /* result length */ | 1582 | xfs_extlen_t *flenp, /* result length */ |
1583 | int *stat) /* status: 0-freelist, 1-normal/none */ | 1583 | int *stat) /* status: 0-freelist, 1-normal/none */ |
1584 | { | 1584 | { |
1585 | struct xfs_owner_info oinfo; | ||
1585 | int error; | 1586 | int error; |
1586 | xfs_agblock_t fbno; | 1587 | xfs_agblock_t fbno; |
1587 | xfs_extlen_t flen; | 1588 | xfs_extlen_t flen; |
@@ -1624,6 +1625,18 @@ xfs_alloc_ag_vextent_small( | |||
1624 | error0); | 1625 | error0); |
1625 | args->wasfromfl = 1; | 1626 | args->wasfromfl = 1; |
1626 | trace_xfs_alloc_small_freelist(args); | 1627 | trace_xfs_alloc_small_freelist(args); |
1628 | |||
1629 | /* | ||
1630 | * If we're feeding an AGFL block to something that | ||
1631 | * doesn't live in the free space, we need to clear | ||
1632 | * out the OWN_AG rmap. | ||
1633 | */ | ||
1634 | xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG); | ||
1635 | error = xfs_rmap_free(args->tp, args->agbp, args->agno, | ||
1636 | fbno, 1, &oinfo); | ||
1637 | if (error) | ||
1638 | goto error0; | ||
1639 | |||
1627 | *stat = 0; | 1640 | *stat = 0; |
1628 | return 0; | 1641 | return 0; |
1629 | } | 1642 | } |
@@ -2264,6 +2277,9 @@ xfs_alloc_log_agf( | |||
2264 | offsetof(xfs_agf_t, agf_longest), | 2277 | offsetof(xfs_agf_t, agf_longest), |
2265 | offsetof(xfs_agf_t, agf_btreeblks), | 2278 | offsetof(xfs_agf_t, agf_btreeblks), |
2266 | offsetof(xfs_agf_t, agf_uuid), | 2279 | offsetof(xfs_agf_t, agf_uuid), |
2280 | offsetof(xfs_agf_t, agf_rmap_blocks), | ||
2281 | /* needed so that we don't log the whole rest of the structure: */ | ||
2282 | offsetof(xfs_agf_t, agf_spare64), | ||
2267 | sizeof(xfs_agf_t) | 2283 | sizeof(xfs_agf_t) |
2268 | }; | 2284 | }; |
2269 | 2285 | ||
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index b5c213a051cd..08569792fe20 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c | |||
@@ -1814,6 +1814,10 @@ xfs_btree_lookup( | |||
1814 | 1814 | ||
1815 | XFS_BTREE_STATS_INC(cur, lookup); | 1815 | XFS_BTREE_STATS_INC(cur, lookup); |
1816 | 1816 | ||
1817 | /* No such thing as a zero-level tree. */ | ||
1818 | if (cur->bc_nlevels == 0) | ||
1819 | return -EFSCORRUPTED; | ||
1820 | |||
1817 | block = NULL; | 1821 | block = NULL; |
1818 | keyno = 0; | 1822 | keyno = 0; |
1819 | 1823 | ||
@@ -4554,15 +4558,22 @@ xfs_btree_simple_query_range( | |||
4554 | if (error) | 4558 | if (error) |
4555 | goto out; | 4559 | goto out; |
4556 | 4560 | ||
4561 | /* Nothing? See if there's anything to the right. */ | ||
4562 | if (!stat) { | ||
4563 | error = xfs_btree_increment(cur, 0, &stat); | ||
4564 | if (error) | ||
4565 | goto out; | ||
4566 | } | ||
4567 | |||
4557 | while (stat) { | 4568 | while (stat) { |
4558 | /* Find the record. */ | 4569 | /* Find the record. */ |
4559 | error = xfs_btree_get_rec(cur, &recp, &stat); | 4570 | error = xfs_btree_get_rec(cur, &recp, &stat); |
4560 | if (error || !stat) | 4571 | if (error || !stat) |
4561 | break; | 4572 | break; |
4562 | cur->bc_ops->init_high_key_from_rec(&rec_key, recp); | ||
4563 | 4573 | ||
4564 | /* Skip if high_key(rec) < low_key. */ | 4574 | /* Skip if high_key(rec) < low_key. */ |
4565 | if (firstrec) { | 4575 | if (firstrec) { |
4576 | cur->bc_ops->init_high_key_from_rec(&rec_key, recp); | ||
4566 | firstrec = false; | 4577 | firstrec = false; |
4567 | diff = cur->bc_ops->diff_two_keys(cur, low_key, | 4578 | diff = cur->bc_ops->diff_two_keys(cur, low_key, |
4568 | &rec_key); | 4579 | &rec_key); |
@@ -4571,6 +4582,7 @@ xfs_btree_simple_query_range( | |||
4571 | } | 4582 | } |
4572 | 4583 | ||
4573 | /* Stop if high_key < low_key(rec). */ | 4584 | /* Stop if high_key < low_key(rec). */ |
4585 | cur->bc_ops->init_key_from_rec(&rec_key, recp); | ||
4574 | diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key); | 4586 | diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key); |
4575 | if (diff > 0) | 4587 | if (diff > 0) |
4576 | break; | 4588 | break; |
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 054a2032fdb3..c221d0ecd52e 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c | |||
@@ -194,7 +194,7 @@ xfs_defer_trans_abort( | |||
194 | /* Abort intent items. */ | 194 | /* Abort intent items. */ |
195 | list_for_each_entry(dfp, &dop->dop_pending, dfp_list) { | 195 | list_for_each_entry(dfp, &dop->dop_pending, dfp_list) { |
196 | trace_xfs_defer_pending_abort(tp->t_mountp, dfp); | 196 | trace_xfs_defer_pending_abort(tp->t_mountp, dfp); |
197 | if (dfp->dfp_committed) | 197 | if (!dfp->dfp_done) |
198 | dfp->dfp_type->abort_intent(dfp->dfp_intent); | 198 | dfp->dfp_type->abort_intent(dfp->dfp_intent); |
199 | } | 199 | } |
200 | 200 | ||
@@ -290,7 +290,6 @@ xfs_defer_finish( | |||
290 | struct xfs_defer_pending *dfp; | 290 | struct xfs_defer_pending *dfp; |
291 | struct list_head *li; | 291 | struct list_head *li; |
292 | struct list_head *n; | 292 | struct list_head *n; |
293 | void *done_item = NULL; | ||
294 | void *state; | 293 | void *state; |
295 | int error = 0; | 294 | int error = 0; |
296 | void (*cleanup_fn)(struct xfs_trans *, void *, int); | 295 | void (*cleanup_fn)(struct xfs_trans *, void *, int); |
@@ -309,19 +308,11 @@ xfs_defer_finish( | |||
309 | if (error) | 308 | if (error) |
310 | goto out; | 309 | goto out; |
311 | 310 | ||
312 | /* Mark all pending intents as committed. */ | ||
313 | list_for_each_entry_reverse(dfp, &dop->dop_pending, dfp_list) { | ||
314 | if (dfp->dfp_committed) | ||
315 | break; | ||
316 | trace_xfs_defer_pending_commit((*tp)->t_mountp, dfp); | ||
317 | dfp->dfp_committed = true; | ||
318 | } | ||
319 | |||
320 | /* Log an intent-done item for the first pending item. */ | 311 | /* Log an intent-done item for the first pending item. */ |
321 | dfp = list_first_entry(&dop->dop_pending, | 312 | dfp = list_first_entry(&dop->dop_pending, |
322 | struct xfs_defer_pending, dfp_list); | 313 | struct xfs_defer_pending, dfp_list); |
323 | trace_xfs_defer_pending_finish((*tp)->t_mountp, dfp); | 314 | trace_xfs_defer_pending_finish((*tp)->t_mountp, dfp); |
324 | done_item = dfp->dfp_type->create_done(*tp, dfp->dfp_intent, | 315 | dfp->dfp_done = dfp->dfp_type->create_done(*tp, dfp->dfp_intent, |
325 | dfp->dfp_count); | 316 | dfp->dfp_count); |
326 | cleanup_fn = dfp->dfp_type->finish_cleanup; | 317 | cleanup_fn = dfp->dfp_type->finish_cleanup; |
327 | 318 | ||
@@ -331,7 +322,7 @@ xfs_defer_finish( | |||
331 | list_del(li); | 322 | list_del(li); |
332 | dfp->dfp_count--; | 323 | dfp->dfp_count--; |
333 | error = dfp->dfp_type->finish_item(*tp, dop, li, | 324 | error = dfp->dfp_type->finish_item(*tp, dop, li, |
334 | done_item, &state); | 325 | dfp->dfp_done, &state); |
335 | if (error) { | 326 | if (error) { |
336 | /* | 327 | /* |
337 | * Clean up after ourselves and jump out. | 328 | * Clean up after ourselves and jump out. |
@@ -428,8 +419,8 @@ xfs_defer_add( | |||
428 | dfp = kmem_alloc(sizeof(struct xfs_defer_pending), | 419 | dfp = kmem_alloc(sizeof(struct xfs_defer_pending), |
429 | KM_SLEEP | KM_NOFS); | 420 | KM_SLEEP | KM_NOFS); |
430 | dfp->dfp_type = defer_op_types[type]; | 421 | dfp->dfp_type = defer_op_types[type]; |
431 | dfp->dfp_committed = false; | ||
432 | dfp->dfp_intent = NULL; | 422 | dfp->dfp_intent = NULL; |
423 | dfp->dfp_done = NULL; | ||
433 | dfp->dfp_count = 0; | 424 | dfp->dfp_count = 0; |
434 | INIT_LIST_HEAD(&dfp->dfp_work); | 425 | INIT_LIST_HEAD(&dfp->dfp_work); |
435 | list_add_tail(&dfp->dfp_list, &dop->dop_intake); | 426 | list_add_tail(&dfp->dfp_list, &dop->dop_intake); |
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index cc3981c48296..e96533d178cf 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h | |||
@@ -30,8 +30,8 @@ struct xfs_defer_op_type; | |||
30 | struct xfs_defer_pending { | 30 | struct xfs_defer_pending { |
31 | const struct xfs_defer_op_type *dfp_type; /* function pointers */ | 31 | const struct xfs_defer_op_type *dfp_type; /* function pointers */ |
32 | struct list_head dfp_list; /* pending items */ | 32 | struct list_head dfp_list; /* pending items */ |
33 | bool dfp_committed; /* committed trans? */ | ||
34 | void *dfp_intent; /* log intent item */ | 33 | void *dfp_intent; /* log intent item */ |
34 | void *dfp_done; /* log done item */ | ||
35 | struct list_head dfp_work; /* work items */ | 35 | struct list_head dfp_work; /* work items */ |
36 | unsigned int dfp_count; /* # extent items */ | 36 | unsigned int dfp_count; /* # extent items */ |
37 | }; | 37 | }; |
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index f814d42c73b2..270fb5cf4fa1 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h | |||
@@ -640,12 +640,15 @@ typedef struct xfs_agf { | |||
640 | __be32 agf_btreeblks; /* # of blocks held in AGF btrees */ | 640 | __be32 agf_btreeblks; /* # of blocks held in AGF btrees */ |
641 | uuid_t agf_uuid; /* uuid of filesystem */ | 641 | uuid_t agf_uuid; /* uuid of filesystem */ |
642 | 642 | ||
643 | __be32 agf_rmap_blocks; /* rmapbt blocks used */ | ||
644 | __be32 agf_padding; /* padding */ | ||
645 | |||
643 | /* | 646 | /* |
644 | * reserve some contiguous space for future logged fields before we add | 647 | * reserve some contiguous space for future logged fields before we add |
645 | * the unlogged fields. This makes the range logging via flags and | 648 | * the unlogged fields. This makes the range logging via flags and |
646 | * structure offsets much simpler. | 649 | * structure offsets much simpler. |
647 | */ | 650 | */ |
648 | __be64 agf_spare64[16]; | 651 | __be64 agf_spare64[15]; |
649 | 652 | ||
650 | /* unlogged fields, written during buffer writeback. */ | 653 | /* unlogged fields, written during buffer writeback. */ |
651 | __be64 agf_lsn; /* last write sequence */ | 654 | __be64 agf_lsn; /* last write sequence */ |
@@ -670,7 +673,9 @@ typedef struct xfs_agf { | |||
670 | #define XFS_AGF_LONGEST 0x00000400 | 673 | #define XFS_AGF_LONGEST 0x00000400 |
671 | #define XFS_AGF_BTREEBLKS 0x00000800 | 674 | #define XFS_AGF_BTREEBLKS 0x00000800 |
672 | #define XFS_AGF_UUID 0x00001000 | 675 | #define XFS_AGF_UUID 0x00001000 |
673 | #define XFS_AGF_NUM_BITS 13 | 676 | #define XFS_AGF_RMAP_BLOCKS 0x00002000 |
677 | #define XFS_AGF_SPARE64 0x00004000 | ||
678 | #define XFS_AGF_NUM_BITS 15 | ||
674 | #define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) | 679 | #define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) |
675 | 680 | ||
676 | #define XFS_AGF_FLAGS \ | 681 | #define XFS_AGF_FLAGS \ |
@@ -686,7 +691,9 @@ typedef struct xfs_agf { | |||
686 | { XFS_AGF_FREEBLKS, "FREEBLKS" }, \ | 691 | { XFS_AGF_FREEBLKS, "FREEBLKS" }, \ |
687 | { XFS_AGF_LONGEST, "LONGEST" }, \ | 692 | { XFS_AGF_LONGEST, "LONGEST" }, \ |
688 | { XFS_AGF_BTREEBLKS, "BTREEBLKS" }, \ | 693 | { XFS_AGF_BTREEBLKS, "BTREEBLKS" }, \ |
689 | { XFS_AGF_UUID, "UUID" } | 694 | { XFS_AGF_UUID, "UUID" }, \ |
695 | { XFS_AGF_RMAP_BLOCKS, "RMAP_BLOCKS" }, \ | ||
696 | { XFS_AGF_SPARE64, "SPARE64" } | ||
690 | 697 | ||
691 | /* disk block (xfs_daddr_t) in the AG */ | 698 | /* disk block (xfs_daddr_t) in the AG */ |
692 | #define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) | 699 | #define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) |
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index bc1faebc84ec..17b8eeb34ac8 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c | |||
@@ -98,6 +98,8 @@ xfs_rmapbt_alloc_block( | |||
98 | union xfs_btree_ptr *new, | 98 | union xfs_btree_ptr *new, |
99 | int *stat) | 99 | int *stat) |
100 | { | 100 | { |
101 | struct xfs_buf *agbp = cur->bc_private.a.agbp; | ||
102 | struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); | ||
101 | int error; | 103 | int error; |
102 | xfs_agblock_t bno; | 104 | xfs_agblock_t bno; |
103 | 105 | ||
@@ -124,6 +126,8 @@ xfs_rmapbt_alloc_block( | |||
124 | 126 | ||
125 | xfs_trans_agbtree_delta(cur->bc_tp, 1); | 127 | xfs_trans_agbtree_delta(cur->bc_tp, 1); |
126 | new->s = cpu_to_be32(bno); | 128 | new->s = cpu_to_be32(bno); |
129 | be32_add_cpu(&agf->agf_rmap_blocks, 1); | ||
130 | xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); | ||
127 | 131 | ||
128 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | 132 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); |
129 | *stat = 1; | 133 | *stat = 1; |
@@ -143,6 +147,8 @@ xfs_rmapbt_free_block( | |||
143 | bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp)); | 147 | bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp)); |
144 | trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_private.a.agno, | 148 | trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_private.a.agno, |
145 | bno, 1); | 149 | bno, 1); |
150 | be32_add_cpu(&agf->agf_rmap_blocks, -1); | ||
151 | xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); | ||
146 | error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1); | 152 | error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1); |
147 | if (error) | 153 | if (error) |
148 | return error; | 154 | return error; |
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 0e3d4f5ec33c..4aecc5fefe96 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c | |||
@@ -583,7 +583,8 @@ xfs_sb_verify( | |||
583 | * Only check the in progress field for the primary superblock as | 583 | * Only check the in progress field for the primary superblock as |
584 | * mkfs.xfs doesn't clear it from secondary superblocks. | 584 | * mkfs.xfs doesn't clear it from secondary superblocks. |
585 | */ | 585 | */ |
586 | return xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR, | 586 | return xfs_mount_validate_sb(mp, &sb, |
587 | bp->b_maps[0].bm_bn == XFS_SB_DADDR, | ||
587 | check_version); | 588 | check_version); |
588 | } | 589 | } |
589 | 590 | ||
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 47a318ce82e0..b5b9bffe3520 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -115,7 +115,6 @@ xfs_buf_ioacct_dec( | |||
115 | if (!(bp->b_flags & _XBF_IN_FLIGHT)) | 115 | if (!(bp->b_flags & _XBF_IN_FLIGHT)) |
116 | return; | 116 | return; |
117 | 117 | ||
118 | ASSERT(bp->b_flags & XBF_ASYNC); | ||
119 | bp->b_flags &= ~_XBF_IN_FLIGHT; | 118 | bp->b_flags &= ~_XBF_IN_FLIGHT; |
120 | percpu_counter_dec(&bp->b_target->bt_io_count); | 119 | percpu_counter_dec(&bp->b_target->bt_io_count); |
121 | } | 120 | } |
@@ -1612,7 +1611,7 @@ xfs_wait_buftarg( | |||
1612 | */ | 1611 | */ |
1613 | while (percpu_counter_sum(&btp->bt_io_count)) | 1612 | while (percpu_counter_sum(&btp->bt_io_count)) |
1614 | delay(100); | 1613 | delay(100); |
1615 | drain_workqueue(btp->bt_mount->m_buf_workqueue); | 1614 | flush_workqueue(btp->bt_mount->m_buf_workqueue); |
1616 | 1615 | ||
1617 | /* loop until there is nothing left on the lru list. */ | 1616 | /* loop until there is nothing left on the lru list. */ |
1618 | while (list_lru_count(&btp->bt_lru)) { | 1617 | while (list_lru_count(&btp->bt_lru)) { |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index ed95e5bb04e6..e612a0233710 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -741,9 +741,20 @@ xfs_file_dax_write( | |||
741 | * page is inserted into the pagecache when we have to serve a write | 741 | * page is inserted into the pagecache when we have to serve a write |
742 | * fault on a hole. It should never be dirtied and can simply be | 742 | * fault on a hole. It should never be dirtied and can simply be |
743 | * dropped from the pagecache once we get real data for the page. | 743 | * dropped from the pagecache once we get real data for the page. |
744 | * | ||
745 | * XXX: This is racy against mmap, and there's nothing we can do about | ||
746 | * it. dax_do_io() should really do this invalidation internally as | ||
747 | * it will know if we've allocated over a holei for this specific IO and | ||
748 | * if so it needs to update the mapping tree and invalidate existing | ||
749 | * PTEs over the newly allocated range. Remove this invalidation when | ||
750 | * dax_do_io() is fixed up. | ||
744 | */ | 751 | */ |
745 | if (mapping->nrpages) { | 752 | if (mapping->nrpages) { |
746 | ret = invalidate_inode_pages2(mapping); | 753 | loff_t end = iocb->ki_pos + iov_iter_count(from) - 1; |
754 | |||
755 | ret = invalidate_inode_pages2_range(mapping, | ||
756 | iocb->ki_pos >> PAGE_SHIFT, | ||
757 | end >> PAGE_SHIFT); | ||
747 | WARN_ON_ONCE(ret); | 758 | WARN_ON_ONCE(ret); |
748 | } | 759 | } |
749 | 760 | ||
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 0f96847b90e1..0b7f986745c1 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -248,6 +248,7 @@ xfs_growfs_data_private( | |||
248 | agf->agf_roots[XFS_BTNUM_RMAPi] = | 248 | agf->agf_roots[XFS_BTNUM_RMAPi] = |
249 | cpu_to_be32(XFS_RMAP_BLOCK(mp)); | 249 | cpu_to_be32(XFS_RMAP_BLOCK(mp)); |
250 | agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1); | 250 | agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1); |
251 | agf->agf_rmap_blocks = cpu_to_be32(1); | ||
251 | } | 252 | } |
252 | 253 | ||
253 | agf->agf_flfirst = cpu_to_be32(1); | 254 | agf->agf_flfirst = cpu_to_be32(1); |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 2114d53df433..2af0dda1c978 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -715,12 +715,16 @@ xfs_iomap_write_allocate( | |||
715 | * is in the delayed allocation extent on which we sit | 715 | * is in the delayed allocation extent on which we sit |
716 | * but before our buffer starts. | 716 | * but before our buffer starts. |
717 | */ | 717 | */ |
718 | |||
719 | nimaps = 0; | 718 | nimaps = 0; |
720 | while (nimaps == 0) { | 719 | while (nimaps == 0) { |
721 | nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); | 720 | nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); |
722 | 721 | /* | |
723 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, nres, | 722 | * We have already reserved space for the extent and any |
723 | * indirect blocks when creating the delalloc extent, | ||
724 | * there is no need to reserve space in this transaction | ||
725 | * again. | ||
726 | */ | ||
727 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, | ||
724 | 0, XFS_TRANS_RESERVE, &tp); | 728 | 0, XFS_TRANS_RESERVE, &tp); |
725 | if (error) | 729 | if (error) |
726 | return error; | 730 | return error; |
@@ -1037,20 +1041,14 @@ xfs_file_iomap_begin( | |||
1037 | return error; | 1041 | return error; |
1038 | 1042 | ||
1039 | trace_xfs_iomap_alloc(ip, offset, length, 0, &imap); | 1043 | trace_xfs_iomap_alloc(ip, offset, length, 0, &imap); |
1040 | xfs_bmbt_to_iomap(ip, iomap, &imap); | ||
1041 | } else if (nimaps) { | ||
1042 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1043 | trace_xfs_iomap_found(ip, offset, length, 0, &imap); | ||
1044 | xfs_bmbt_to_iomap(ip, iomap, &imap); | ||
1045 | } else { | 1044 | } else { |
1045 | ASSERT(nimaps); | ||
1046 | |||
1046 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 1047 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
1047 | trace_xfs_iomap_not_found(ip, offset, length, 0, &imap); | 1048 | trace_xfs_iomap_found(ip, offset, length, 0, &imap); |
1048 | iomap->blkno = IOMAP_NULL_BLOCK; | ||
1049 | iomap->type = IOMAP_HOLE; | ||
1050 | iomap->offset = offset; | ||
1051 | iomap->length = length; | ||
1052 | } | 1049 | } |
1053 | 1050 | ||
1051 | xfs_bmbt_to_iomap(ip, iomap, &imap); | ||
1054 | return 0; | 1052 | return 0; |
1055 | } | 1053 | } |
1056 | 1054 | ||
@@ -1112,3 +1110,48 @@ struct iomap_ops xfs_iomap_ops = { | |||
1112 | .iomap_begin = xfs_file_iomap_begin, | 1110 | .iomap_begin = xfs_file_iomap_begin, |
1113 | .iomap_end = xfs_file_iomap_end, | 1111 | .iomap_end = xfs_file_iomap_end, |
1114 | }; | 1112 | }; |
1113 | |||
1114 | static int | ||
1115 | xfs_xattr_iomap_begin( | ||
1116 | struct inode *inode, | ||
1117 | loff_t offset, | ||
1118 | loff_t length, | ||
1119 | unsigned flags, | ||
1120 | struct iomap *iomap) | ||
1121 | { | ||
1122 | struct xfs_inode *ip = XFS_I(inode); | ||
1123 | struct xfs_mount *mp = ip->i_mount; | ||
1124 | xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); | ||
1125 | xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length); | ||
1126 | struct xfs_bmbt_irec imap; | ||
1127 | int nimaps = 1, error = 0; | ||
1128 | unsigned lockmode; | ||
1129 | |||
1130 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
1131 | return -EIO; | ||
1132 | |||
1133 | lockmode = xfs_ilock_data_map_shared(ip); | ||
1134 | |||
1135 | /* if there are no attribute fork or extents, return ENOENT */ | ||
1136 | if (XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) { | ||
1137 | error = -ENOENT; | ||
1138 | goto out_unlock; | ||
1139 | } | ||
1140 | |||
1141 | ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL); | ||
1142 | error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, | ||
1143 | &nimaps, XFS_BMAPI_ENTIRE | XFS_BMAPI_ATTRFORK); | ||
1144 | out_unlock: | ||
1145 | xfs_iunlock(ip, lockmode); | ||
1146 | |||
1147 | if (!error) { | ||
1148 | ASSERT(nimaps); | ||
1149 | xfs_bmbt_to_iomap(ip, iomap, &imap); | ||
1150 | } | ||
1151 | |||
1152 | return error; | ||
1153 | } | ||
1154 | |||
1155 | struct iomap_ops xfs_xattr_iomap_ops = { | ||
1156 | .iomap_begin = xfs_xattr_iomap_begin, | ||
1157 | }; | ||
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index e066d045e2ff..fb8aca3d69ab 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h | |||
@@ -35,5 +35,6 @@ void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, | |||
35 | struct xfs_bmbt_irec *); | 35 | struct xfs_bmbt_irec *); |
36 | 36 | ||
37 | extern struct iomap_ops xfs_iomap_ops; | 37 | extern struct iomap_ops xfs_iomap_ops; |
38 | extern struct iomap_ops xfs_xattr_iomap_ops; | ||
38 | 39 | ||
39 | #endif /* __XFS_IOMAP_H__*/ | 40 | #endif /* __XFS_IOMAP_H__*/ |
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ab820f84ed50..b24c3102fa93 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c | |||
@@ -1009,7 +1009,14 @@ xfs_vn_fiemap( | |||
1009 | int error; | 1009 | int error; |
1010 | 1010 | ||
1011 | xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED); | 1011 | xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED); |
1012 | error = iomap_fiemap(inode, fieinfo, start, length, &xfs_iomap_ops); | 1012 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { |
1013 | fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR; | ||
1014 | error = iomap_fiemap(inode, fieinfo, start, length, | ||
1015 | &xfs_xattr_iomap_ops); | ||
1016 | } else { | ||
1017 | error = iomap_fiemap(inode, fieinfo, start, length, | ||
1018 | &xfs_iomap_ops); | ||
1019 | } | ||
1013 | xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED); | 1020 | xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED); |
1014 | 1021 | ||
1015 | return error; | 1022 | return error; |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 24ef83ef04de..fd6be45b3a1e 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -1574,9 +1574,16 @@ xfs_fs_fill_super( | |||
1574 | } | 1574 | } |
1575 | } | 1575 | } |
1576 | 1576 | ||
1577 | if (xfs_sb_version_hasrmapbt(&mp->m_sb)) | 1577 | if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { |
1578 | if (mp->m_sb.sb_rblocks) { | ||
1579 | xfs_alert(mp, | ||
1580 | "EXPERIMENTAL reverse mapping btree not compatible with realtime device!"); | ||
1581 | error = -EINVAL; | ||
1582 | goto out_filestream_unmount; | ||
1583 | } | ||
1578 | xfs_alert(mp, | 1584 | xfs_alert(mp, |
1579 | "EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!"); | 1585 | "EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!"); |
1586 | } | ||
1580 | 1587 | ||
1581 | error = xfs_mountfs(mp); | 1588 | error = xfs_mountfs(mp); |
1582 | if (error) | 1589 | if (error) |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 551b7e26980c..d303a665dba9 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -1298,7 +1298,6 @@ DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); | |||
1298 | DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct); | 1298 | DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct); |
1299 | DEFINE_IOMAP_EVENT(xfs_iomap_alloc); | 1299 | DEFINE_IOMAP_EVENT(xfs_iomap_alloc); |
1300 | DEFINE_IOMAP_EVENT(xfs_iomap_found); | 1300 | DEFINE_IOMAP_EVENT(xfs_iomap_found); |
1301 | DEFINE_IOMAP_EVENT(xfs_iomap_not_found); | ||
1302 | 1301 | ||
1303 | DECLARE_EVENT_CLASS(xfs_simple_io_class, | 1302 | DECLARE_EVENT_CLASS(xfs_simple_io_class, |
1304 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), | 1303 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), |
@@ -2296,7 +2295,7 @@ DECLARE_EVENT_CLASS(xfs_defer_pending_class, | |||
2296 | __entry->dev = mp ? mp->m_super->s_dev : 0; | 2295 | __entry->dev = mp ? mp->m_super->s_dev : 0; |
2297 | __entry->type = dfp->dfp_type->type; | 2296 | __entry->type = dfp->dfp_type->type; |
2298 | __entry->intent = dfp->dfp_intent; | 2297 | __entry->intent = dfp->dfp_intent; |
2299 | __entry->committed = dfp->dfp_committed; | 2298 | __entry->committed = dfp->dfp_done != NULL; |
2300 | __entry->nr = dfp->dfp_count; | 2299 | __entry->nr = dfp->dfp_count; |
2301 | ), | 2300 | ), |
2302 | TP_printk("dev %d:%d optype %d intent %p committed %d nr %d\n", | 2301 | TP_printk("dev %d:%d optype %d intent %p committed %d nr %d\n", |