diff options
Diffstat (limited to 'fs')
142 files changed, 4953 insertions, 3071 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 370b24cee4d8..c055d56ec63d 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt | |||
@@ -30,6 +30,9 @@ config COMPAT_BINFMT_ELF | |||
30 | config ARCH_BINFMT_ELF_RANDOMIZE_PIE | 30 | config ARCH_BINFMT_ELF_RANDOMIZE_PIE |
31 | bool | 31 | bool |
32 | 32 | ||
33 | config ARCH_BINFMT_ELF_STATE | ||
34 | bool | ||
35 | |||
33 | config BINFMT_ELF_FDPIC | 36 | config BINFMT_ELF_FDPIC |
34 | bool "Kernel support for FDPIC ELF binaries" | 37 | bool "Kernel support for FDPIC ELF binaries" |
35 | default y | 38 | default y |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 3a6175fe10c0..02b16910f4c9 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -386,6 +386,127 @@ static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr) | |||
386 | ELF_PAGESTART(cmds[first_idx].p_vaddr); | 386 | ELF_PAGESTART(cmds[first_idx].p_vaddr); |
387 | } | 387 | } |
388 | 388 | ||
389 | /** | ||
390 | * load_elf_phdrs() - load ELF program headers | ||
391 | * @elf_ex: ELF header of the binary whose program headers should be loaded | ||
392 | * @elf_file: the opened ELF binary file | ||
393 | * | ||
394 | * Loads ELF program headers from the binary file elf_file, which has the ELF | ||
395 | * header pointed to by elf_ex, into a newly allocated array. The caller is | ||
396 | * responsible for freeing the allocated data. Returns an ERR_PTR upon failure. | ||
397 | */ | ||
398 | static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex, | ||
399 | struct file *elf_file) | ||
400 | { | ||
401 | struct elf_phdr *elf_phdata = NULL; | ||
402 | int retval, size, err = -1; | ||
403 | |||
404 | /* | ||
405 | * If the size of this structure has changed, then punt, since | ||
406 | * we will be doing the wrong thing. | ||
407 | */ | ||
408 | if (elf_ex->e_phentsize != sizeof(struct elf_phdr)) | ||
409 | goto out; | ||
410 | |||
411 | /* Sanity check the number of program headers... */ | ||
412 | if (elf_ex->e_phnum < 1 || | ||
413 | elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr)) | ||
414 | goto out; | ||
415 | |||
416 | /* ...and their total size. */ | ||
417 | size = sizeof(struct elf_phdr) * elf_ex->e_phnum; | ||
418 | if (size > ELF_MIN_ALIGN) | ||
419 | goto out; | ||
420 | |||
421 | elf_phdata = kmalloc(size, GFP_KERNEL); | ||
422 | if (!elf_phdata) | ||
423 | goto out; | ||
424 | |||
425 | /* Read in the program headers */ | ||
426 | retval = kernel_read(elf_file, elf_ex->e_phoff, | ||
427 | (char *)elf_phdata, size); | ||
428 | if (retval != size) { | ||
429 | err = (retval < 0) ? retval : -EIO; | ||
430 | goto out; | ||
431 | } | ||
432 | |||
433 | /* Success! */ | ||
434 | err = 0; | ||
435 | out: | ||
436 | if (err) { | ||
437 | kfree(elf_phdata); | ||
438 | elf_phdata = NULL; | ||
439 | } | ||
440 | return elf_phdata; | ||
441 | } | ||
442 | |||
443 | #ifndef CONFIG_ARCH_BINFMT_ELF_STATE | ||
444 | |||
445 | /** | ||
446 | * struct arch_elf_state - arch-specific ELF loading state | ||
447 | * | ||
448 | * This structure is used to preserve architecture specific data during | ||
449 | * the loading of an ELF file, throughout the checking of architecture | ||
450 | * specific ELF headers & through to the point where the ELF load is | ||
451 | * known to be proceeding (ie. SET_PERSONALITY). | ||
452 | * | ||
453 | * This implementation is a dummy for architectures which require no | ||
454 | * specific state. | ||
455 | */ | ||
456 | struct arch_elf_state { | ||
457 | }; | ||
458 | |||
459 | #define INIT_ARCH_ELF_STATE {} | ||
460 | |||
461 | /** | ||
462 | * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header | ||
463 | * @ehdr: The main ELF header | ||
464 | * @phdr: The program header to check | ||
465 | * @elf: The open ELF file | ||
466 | * @is_interp: True if the phdr is from the interpreter of the ELF being | ||
467 | * loaded, else false. | ||
468 | * @state: Architecture-specific state preserved throughout the process | ||
469 | * of loading the ELF. | ||
470 | * | ||
471 | * Inspects the program header phdr to validate its correctness and/or | ||
472 | * suitability for the system. Called once per ELF program header in the | ||
473 | * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its | ||
474 | * interpreter. | ||
475 | * | ||
476 | * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load | ||
477 | * with that return code. | ||
478 | */ | ||
479 | static inline int arch_elf_pt_proc(struct elfhdr *ehdr, | ||
480 | struct elf_phdr *phdr, | ||
481 | struct file *elf, bool is_interp, | ||
482 | struct arch_elf_state *state) | ||
483 | { | ||
484 | /* Dummy implementation, always proceed */ | ||
485 | return 0; | ||
486 | } | ||
487 | |||
488 | /** | ||
489 | * arch_check_elf() - check a PT_LOPROC..PT_HIPROC ELF program header | ||
490 | * @ehdr: The main ELF header | ||
491 | * @has_interp: True if the ELF has an interpreter, else false. | ||
492 | * @state: Architecture-specific state preserved throughout the process | ||
493 | * of loading the ELF. | ||
494 | * | ||
495 | * Provides a final opportunity for architecture code to reject the loading | ||
496 | * of the ELF & cause an exec syscall to return an error. This is called after | ||
497 | * all program headers to be checked by arch_elf_pt_proc have been. | ||
498 | * | ||
499 | * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load | ||
500 | * with that return code. | ||
501 | */ | ||
502 | static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp, | ||
503 | struct arch_elf_state *state) | ||
504 | { | ||
505 | /* Dummy implementation, always proceed */ | ||
506 | return 0; | ||
507 | } | ||
508 | |||
509 | #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */ | ||
389 | 510 | ||
390 | /* This is much more generalized than the library routine read function, | 511 | /* This is much more generalized than the library routine read function, |
391 | so we keep this separate. Technically the library read function | 512 | so we keep this separate. Technically the library read function |
@@ -394,16 +515,15 @@ static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr) | |||
394 | 515 | ||
395 | static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | 516 | static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, |
396 | struct file *interpreter, unsigned long *interp_map_addr, | 517 | struct file *interpreter, unsigned long *interp_map_addr, |
397 | unsigned long no_base) | 518 | unsigned long no_base, struct elf_phdr *interp_elf_phdata) |
398 | { | 519 | { |
399 | struct elf_phdr *elf_phdata; | ||
400 | struct elf_phdr *eppnt; | 520 | struct elf_phdr *eppnt; |
401 | unsigned long load_addr = 0; | 521 | unsigned long load_addr = 0; |
402 | int load_addr_set = 0; | 522 | int load_addr_set = 0; |
403 | unsigned long last_bss = 0, elf_bss = 0; | 523 | unsigned long last_bss = 0, elf_bss = 0; |
404 | unsigned long error = ~0UL; | 524 | unsigned long error = ~0UL; |
405 | unsigned long total_size; | 525 | unsigned long total_size; |
406 | int retval, i, size; | 526 | int i; |
407 | 527 | ||
408 | /* First of all, some simple consistency checks */ | 528 | /* First of all, some simple consistency checks */ |
409 | if (interp_elf_ex->e_type != ET_EXEC && | 529 | if (interp_elf_ex->e_type != ET_EXEC && |
@@ -414,40 +534,14 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | |||
414 | if (!interpreter->f_op->mmap) | 534 | if (!interpreter->f_op->mmap) |
415 | goto out; | 535 | goto out; |
416 | 536 | ||
417 | /* | 537 | total_size = total_mapping_size(interp_elf_phdata, |
418 | * If the size of this structure has changed, then punt, since | 538 | interp_elf_ex->e_phnum); |
419 | * we will be doing the wrong thing. | ||
420 | */ | ||
421 | if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr)) | ||
422 | goto out; | ||
423 | if (interp_elf_ex->e_phnum < 1 || | ||
424 | interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr)) | ||
425 | goto out; | ||
426 | |||
427 | /* Now read in all of the header information */ | ||
428 | size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum; | ||
429 | if (size > ELF_MIN_ALIGN) | ||
430 | goto out; | ||
431 | elf_phdata = kmalloc(size, GFP_KERNEL); | ||
432 | if (!elf_phdata) | ||
433 | goto out; | ||
434 | |||
435 | retval = kernel_read(interpreter, interp_elf_ex->e_phoff, | ||
436 | (char *)elf_phdata, size); | ||
437 | error = -EIO; | ||
438 | if (retval != size) { | ||
439 | if (retval < 0) | ||
440 | error = retval; | ||
441 | goto out_close; | ||
442 | } | ||
443 | |||
444 | total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum); | ||
445 | if (!total_size) { | 539 | if (!total_size) { |
446 | error = -EINVAL; | 540 | error = -EINVAL; |
447 | goto out_close; | 541 | goto out; |
448 | } | 542 | } |
449 | 543 | ||
450 | eppnt = elf_phdata; | 544 | eppnt = interp_elf_phdata; |
451 | for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { | 545 | for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { |
452 | if (eppnt->p_type == PT_LOAD) { | 546 | if (eppnt->p_type == PT_LOAD) { |
453 | int elf_type = MAP_PRIVATE | MAP_DENYWRITE; | 547 | int elf_type = MAP_PRIVATE | MAP_DENYWRITE; |
@@ -474,7 +568,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | |||
474 | *interp_map_addr = map_addr; | 568 | *interp_map_addr = map_addr; |
475 | error = map_addr; | 569 | error = map_addr; |
476 | if (BAD_ADDR(map_addr)) | 570 | if (BAD_ADDR(map_addr)) |
477 | goto out_close; | 571 | goto out; |
478 | 572 | ||
479 | if (!load_addr_set && | 573 | if (!load_addr_set && |
480 | interp_elf_ex->e_type == ET_DYN) { | 574 | interp_elf_ex->e_type == ET_DYN) { |
@@ -493,7 +587,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | |||
493 | eppnt->p_memsz > TASK_SIZE || | 587 | eppnt->p_memsz > TASK_SIZE || |
494 | TASK_SIZE - eppnt->p_memsz < k) { | 588 | TASK_SIZE - eppnt->p_memsz < k) { |
495 | error = -ENOMEM; | 589 | error = -ENOMEM; |
496 | goto out_close; | 590 | goto out; |
497 | } | 591 | } |
498 | 592 | ||
499 | /* | 593 | /* |
@@ -523,7 +617,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | |||
523 | */ | 617 | */ |
524 | if (padzero(elf_bss)) { | 618 | if (padzero(elf_bss)) { |
525 | error = -EFAULT; | 619 | error = -EFAULT; |
526 | goto out_close; | 620 | goto out; |
527 | } | 621 | } |
528 | 622 | ||
529 | /* What we have mapped so far */ | 623 | /* What we have mapped so far */ |
@@ -532,13 +626,10 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | |||
532 | /* Map the last of the bss segment */ | 626 | /* Map the last of the bss segment */ |
533 | error = vm_brk(elf_bss, last_bss - elf_bss); | 627 | error = vm_brk(elf_bss, last_bss - elf_bss); |
534 | if (BAD_ADDR(error)) | 628 | if (BAD_ADDR(error)) |
535 | goto out_close; | 629 | goto out; |
536 | } | 630 | } |
537 | 631 | ||
538 | error = load_addr; | 632 | error = load_addr; |
539 | |||
540 | out_close: | ||
541 | kfree(elf_phdata); | ||
542 | out: | 633 | out: |
543 | return error; | 634 | return error; |
544 | } | 635 | } |
@@ -575,10 +666,9 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
575 | int load_addr_set = 0; | 666 | int load_addr_set = 0; |
576 | char * elf_interpreter = NULL; | 667 | char * elf_interpreter = NULL; |
577 | unsigned long error; | 668 | unsigned long error; |
578 | struct elf_phdr *elf_ppnt, *elf_phdata; | 669 | struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL; |
579 | unsigned long elf_bss, elf_brk; | 670 | unsigned long elf_bss, elf_brk; |
580 | int retval, i; | 671 | int retval, i; |
581 | unsigned int size; | ||
582 | unsigned long elf_entry; | 672 | unsigned long elf_entry; |
583 | unsigned long interp_load_addr = 0; | 673 | unsigned long interp_load_addr = 0; |
584 | unsigned long start_code, end_code, start_data, end_data; | 674 | unsigned long start_code, end_code, start_data, end_data; |
@@ -589,6 +679,7 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
589 | struct elfhdr elf_ex; | 679 | struct elfhdr elf_ex; |
590 | struct elfhdr interp_elf_ex; | 680 | struct elfhdr interp_elf_ex; |
591 | } *loc; | 681 | } *loc; |
682 | struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE; | ||
592 | 683 | ||
593 | loc = kmalloc(sizeof(*loc), GFP_KERNEL); | 684 | loc = kmalloc(sizeof(*loc), GFP_KERNEL); |
594 | if (!loc) { | 685 | if (!loc) { |
@@ -611,26 +702,10 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
611 | if (!bprm->file->f_op->mmap) | 702 | if (!bprm->file->f_op->mmap) |
612 | goto out; | 703 | goto out; |
613 | 704 | ||
614 | /* Now read in all of the header information */ | 705 | elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file); |
615 | if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr)) | ||
616 | goto out; | ||
617 | if (loc->elf_ex.e_phnum < 1 || | ||
618 | loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr)) | ||
619 | goto out; | ||
620 | size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr); | ||
621 | retval = -ENOMEM; | ||
622 | elf_phdata = kmalloc(size, GFP_KERNEL); | ||
623 | if (!elf_phdata) | 706 | if (!elf_phdata) |
624 | goto out; | 707 | goto out; |
625 | 708 | ||
626 | retval = kernel_read(bprm->file, loc->elf_ex.e_phoff, | ||
627 | (char *)elf_phdata, size); | ||
628 | if (retval != size) { | ||
629 | if (retval >= 0) | ||
630 | retval = -EIO; | ||
631 | goto out_free_ph; | ||
632 | } | ||
633 | |||
634 | elf_ppnt = elf_phdata; | 709 | elf_ppnt = elf_phdata; |
635 | elf_bss = 0; | 710 | elf_bss = 0; |
636 | elf_brk = 0; | 711 | elf_brk = 0; |
@@ -699,12 +774,21 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
699 | 774 | ||
700 | elf_ppnt = elf_phdata; | 775 | elf_ppnt = elf_phdata; |
701 | for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) | 776 | for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) |
702 | if (elf_ppnt->p_type == PT_GNU_STACK) { | 777 | switch (elf_ppnt->p_type) { |
778 | case PT_GNU_STACK: | ||
703 | if (elf_ppnt->p_flags & PF_X) | 779 | if (elf_ppnt->p_flags & PF_X) |
704 | executable_stack = EXSTACK_ENABLE_X; | 780 | executable_stack = EXSTACK_ENABLE_X; |
705 | else | 781 | else |
706 | executable_stack = EXSTACK_DISABLE_X; | 782 | executable_stack = EXSTACK_DISABLE_X; |
707 | break; | 783 | break; |
784 | |||
785 | case PT_LOPROC ... PT_HIPROC: | ||
786 | retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt, | ||
787 | bprm->file, false, | ||
788 | &arch_state); | ||
789 | if (retval) | ||
790 | goto out_free_dentry; | ||
791 | break; | ||
708 | } | 792 | } |
709 | 793 | ||
710 | /* Some simple consistency checks for the interpreter */ | 794 | /* Some simple consistency checks for the interpreter */ |
@@ -716,8 +800,36 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
716 | /* Verify the interpreter has a valid arch */ | 800 | /* Verify the interpreter has a valid arch */ |
717 | if (!elf_check_arch(&loc->interp_elf_ex)) | 801 | if (!elf_check_arch(&loc->interp_elf_ex)) |
718 | goto out_free_dentry; | 802 | goto out_free_dentry; |
803 | |||
804 | /* Load the interpreter program headers */ | ||
805 | interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex, | ||
806 | interpreter); | ||
807 | if (!interp_elf_phdata) | ||
808 | goto out_free_dentry; | ||
809 | |||
810 | /* Pass PT_LOPROC..PT_HIPROC headers to arch code */ | ||
811 | elf_ppnt = interp_elf_phdata; | ||
812 | for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++) | ||
813 | switch (elf_ppnt->p_type) { | ||
814 | case PT_LOPROC ... PT_HIPROC: | ||
815 | retval = arch_elf_pt_proc(&loc->interp_elf_ex, | ||
816 | elf_ppnt, interpreter, | ||
817 | true, &arch_state); | ||
818 | if (retval) | ||
819 | goto out_free_dentry; | ||
820 | break; | ||
821 | } | ||
719 | } | 822 | } |
720 | 823 | ||
824 | /* | ||
825 | * Allow arch code to reject the ELF at this point, whilst it's | ||
826 | * still possible to return an error to the code that invoked | ||
827 | * the exec syscall. | ||
828 | */ | ||
829 | retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state); | ||
830 | if (retval) | ||
831 | goto out_free_dentry; | ||
832 | |||
721 | /* Flush all traces of the currently running executable */ | 833 | /* Flush all traces of the currently running executable */ |
722 | retval = flush_old_exec(bprm); | 834 | retval = flush_old_exec(bprm); |
723 | if (retval) | 835 | if (retval) |
@@ -725,7 +837,7 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
725 | 837 | ||
726 | /* Do this immediately, since STACK_TOP as used in setup_arg_pages | 838 | /* Do this immediately, since STACK_TOP as used in setup_arg_pages |
727 | may depend on the personality. */ | 839 | may depend on the personality. */ |
728 | SET_PERSONALITY(loc->elf_ex); | 840 | SET_PERSONALITY2(loc->elf_ex, &arch_state); |
729 | if (elf_read_implies_exec(loc->elf_ex, executable_stack)) | 841 | if (elf_read_implies_exec(loc->elf_ex, executable_stack)) |
730 | current->personality |= READ_IMPLIES_EXEC; | 842 | current->personality |= READ_IMPLIES_EXEC; |
731 | 843 | ||
@@ -890,7 +1002,7 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
890 | elf_entry = load_elf_interp(&loc->interp_elf_ex, | 1002 | elf_entry = load_elf_interp(&loc->interp_elf_ex, |
891 | interpreter, | 1003 | interpreter, |
892 | &interp_map_addr, | 1004 | &interp_map_addr, |
893 | load_bias); | 1005 | load_bias, interp_elf_phdata); |
894 | if (!IS_ERR((void *)elf_entry)) { | 1006 | if (!IS_ERR((void *)elf_entry)) { |
895 | /* | 1007 | /* |
896 | * load_elf_interp() returns relocation | 1008 | * load_elf_interp() returns relocation |
@@ -917,6 +1029,7 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
917 | } | 1029 | } |
918 | } | 1030 | } |
919 | 1031 | ||
1032 | kfree(interp_elf_phdata); | ||
920 | kfree(elf_phdata); | 1033 | kfree(elf_phdata); |
921 | 1034 | ||
922 | set_binfmt(&elf_format); | 1035 | set_binfmt(&elf_format); |
@@ -981,6 +1094,7 @@ out_ret: | |||
981 | 1094 | ||
982 | /* error cleanup */ | 1095 | /* error cleanup */ |
983 | out_free_dentry: | 1096 | out_free_dentry: |
1097 | kfree(interp_elf_phdata); | ||
984 | allow_write_access(interpreter); | 1098 | allow_write_access(interpreter); |
985 | if (interpreter) | 1099 | if (interpreter) |
986 | fput(interpreter); | 1100 | fput(interpreter); |
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index cb7f3fe9c9f6..d897ef803b3b 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c | |||
@@ -94,6 +94,7 @@ | |||
94 | #include <linux/mutex.h> | 94 | #include <linux/mutex.h> |
95 | #include <linux/genhd.h> | 95 | #include <linux/genhd.h> |
96 | #include <linux/blkdev.h> | 96 | #include <linux/blkdev.h> |
97 | #include <linux/vmalloc.h> | ||
97 | #include "ctree.h" | 98 | #include "ctree.h" |
98 | #include "disk-io.h" | 99 | #include "disk-io.h" |
99 | #include "hash.h" | 100 | #include "hash.h" |
@@ -326,9 +327,6 @@ static int btrfsic_handle_extent_data(struct btrfsic_state *state, | |||
326 | static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, | 327 | static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, |
327 | struct btrfsic_block_data_ctx *block_ctx_out, | 328 | struct btrfsic_block_data_ctx *block_ctx_out, |
328 | int mirror_num); | 329 | int mirror_num); |
329 | static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, | ||
330 | u32 len, struct block_device *bdev, | ||
331 | struct btrfsic_block_data_ctx *block_ctx_out); | ||
332 | static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx); | 330 | static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx); |
333 | static int btrfsic_read_block(struct btrfsic_state *state, | 331 | static int btrfsic_read_block(struct btrfsic_state *state, |
334 | struct btrfsic_block_data_ctx *block_ctx); | 332 | struct btrfsic_block_data_ctx *block_ctx); |
@@ -1326,24 +1324,25 @@ static int btrfsic_create_link_to_next_block( | |||
1326 | l = NULL; | 1324 | l = NULL; |
1327 | next_block->generation = BTRFSIC_GENERATION_UNKNOWN; | 1325 | next_block->generation = BTRFSIC_GENERATION_UNKNOWN; |
1328 | } else { | 1326 | } else { |
1329 | if (next_block->logical_bytenr != next_bytenr && | 1327 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) { |
1330 | !(!next_block->is_metadata && | 1328 | if (next_block->logical_bytenr != next_bytenr && |
1331 | 0 == next_block->logical_bytenr)) { | 1329 | !(!next_block->is_metadata && |
1332 | printk(KERN_INFO | 1330 | 0 == next_block->logical_bytenr)) |
1333 | "Referenced block @%llu (%s/%llu/%d)" | 1331 | printk(KERN_INFO |
1334 | " found in hash table, %c," | 1332 | "Referenced block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n", |
1335 | " bytenr mismatch (!= stored %llu).\n", | 1333 | next_bytenr, next_block_ctx->dev->name, |
1336 | next_bytenr, next_block_ctx->dev->name, | 1334 | next_block_ctx->dev_bytenr, *mirror_nump, |
1337 | next_block_ctx->dev_bytenr, *mirror_nump, | 1335 | btrfsic_get_block_type(state, |
1338 | btrfsic_get_block_type(state, next_block), | 1336 | next_block), |
1339 | next_block->logical_bytenr); | 1337 | next_block->logical_bytenr); |
1340 | } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | 1338 | else |
1341 | printk(KERN_INFO | 1339 | printk(KERN_INFO |
1342 | "Referenced block @%llu (%s/%llu/%d)" | 1340 | "Referenced block @%llu (%s/%llu/%d) found in hash table, %c.\n", |
1343 | " found in hash table, %c.\n", | 1341 | next_bytenr, next_block_ctx->dev->name, |
1344 | next_bytenr, next_block_ctx->dev->name, | 1342 | next_block_ctx->dev_bytenr, *mirror_nump, |
1345 | next_block_ctx->dev_bytenr, *mirror_nump, | 1343 | btrfsic_get_block_type(state, |
1346 | btrfsic_get_block_type(state, next_block)); | 1344 | next_block)); |
1345 | } | ||
1347 | next_block->logical_bytenr = next_bytenr; | 1346 | next_block->logical_bytenr = next_bytenr; |
1348 | 1347 | ||
1349 | next_block->mirror_num = *mirror_nump; | 1348 | next_block->mirror_num = *mirror_nump; |
@@ -1529,7 +1528,9 @@ static int btrfsic_handle_extent_data( | |||
1529 | return -1; | 1528 | return -1; |
1530 | } | 1529 | } |
1531 | if (!block_was_created) { | 1530 | if (!block_was_created) { |
1532 | if (next_block->logical_bytenr != next_bytenr && | 1531 | if ((state->print_mask & |
1532 | BTRFSIC_PRINT_MASK_VERBOSE) && | ||
1533 | next_block->logical_bytenr != next_bytenr && | ||
1533 | !(!next_block->is_metadata && | 1534 | !(!next_block->is_metadata && |
1534 | 0 == next_block->logical_bytenr)) { | 1535 | 0 == next_block->logical_bytenr)) { |
1535 | printk(KERN_INFO | 1536 | printk(KERN_INFO |
@@ -1607,25 +1608,6 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, | |||
1607 | return ret; | 1608 | return ret; |
1608 | } | 1609 | } |
1609 | 1610 | ||
1610 | static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, | ||
1611 | u32 len, struct block_device *bdev, | ||
1612 | struct btrfsic_block_data_ctx *block_ctx_out) | ||
1613 | { | ||
1614 | block_ctx_out->dev = btrfsic_dev_state_lookup(bdev); | ||
1615 | block_ctx_out->dev_bytenr = bytenr; | ||
1616 | block_ctx_out->start = bytenr; | ||
1617 | block_ctx_out->len = len; | ||
1618 | block_ctx_out->datav = NULL; | ||
1619 | block_ctx_out->pagev = NULL; | ||
1620 | block_ctx_out->mem_to_free = NULL; | ||
1621 | if (NULL != block_ctx_out->dev) { | ||
1622 | return 0; | ||
1623 | } else { | ||
1624 | printk(KERN_INFO "btrfsic: error, cannot lookup dev (#2)!\n"); | ||
1625 | return -ENXIO; | ||
1626 | } | ||
1627 | } | ||
1628 | |||
1629 | static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) | 1611 | static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) |
1630 | { | 1612 | { |
1631 | if (block_ctx->mem_to_free) { | 1613 | if (block_ctx->mem_to_free) { |
@@ -1901,25 +1883,26 @@ again: | |||
1901 | dev_state, | 1883 | dev_state, |
1902 | dev_bytenr); | 1884 | dev_bytenr); |
1903 | } | 1885 | } |
1904 | if (block->logical_bytenr != bytenr && | 1886 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) { |
1905 | !(!block->is_metadata && | 1887 | if (block->logical_bytenr != bytenr && |
1906 | block->logical_bytenr == 0)) | 1888 | !(!block->is_metadata && |
1907 | printk(KERN_INFO | 1889 | block->logical_bytenr == 0)) |
1908 | "Written block @%llu (%s/%llu/%d)" | 1890 | printk(KERN_INFO |
1909 | " found in hash table, %c," | 1891 | "Written block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n", |
1910 | " bytenr mismatch" | 1892 | bytenr, dev_state->name, |
1911 | " (!= stored %llu).\n", | 1893 | dev_bytenr, |
1912 | bytenr, dev_state->name, dev_bytenr, | 1894 | block->mirror_num, |
1913 | block->mirror_num, | 1895 | btrfsic_get_block_type(state, |
1914 | btrfsic_get_block_type(state, block), | 1896 | block), |
1915 | block->logical_bytenr); | 1897 | block->logical_bytenr); |
1916 | else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | 1898 | else |
1917 | printk(KERN_INFO | 1899 | printk(KERN_INFO |
1918 | "Written block @%llu (%s/%llu/%d)" | 1900 | "Written block @%llu (%s/%llu/%d) found in hash table, %c.\n", |
1919 | " found in hash table, %c.\n", | 1901 | bytenr, dev_state->name, |
1920 | bytenr, dev_state->name, dev_bytenr, | 1902 | dev_bytenr, block->mirror_num, |
1921 | block->mirror_num, | 1903 | btrfsic_get_block_type(state, |
1922 | btrfsic_get_block_type(state, block)); | 1904 | block)); |
1905 | } | ||
1923 | block->logical_bytenr = bytenr; | 1906 | block->logical_bytenr = bytenr; |
1924 | } else { | 1907 | } else { |
1925 | if (num_pages * PAGE_CACHE_SIZE < | 1908 | if (num_pages * PAGE_CACHE_SIZE < |
@@ -2002,24 +1985,13 @@ again: | |||
2002 | } | 1985 | } |
2003 | } | 1986 | } |
2004 | 1987 | ||
2005 | if (block->is_superblock) | ||
2006 | ret = btrfsic_map_superblock(state, bytenr, | ||
2007 | processed_len, | ||
2008 | bdev, &block_ctx); | ||
2009 | else | ||
2010 | ret = btrfsic_map_block(state, bytenr, processed_len, | ||
2011 | &block_ctx, 0); | ||
2012 | if (ret) { | ||
2013 | printk(KERN_INFO | ||
2014 | "btrfsic: btrfsic_map_block(root @%llu)" | ||
2015 | " failed!\n", bytenr); | ||
2016 | goto continue_loop; | ||
2017 | } | ||
2018 | block_ctx.datav = mapped_datav; | ||
2019 | /* the following is required in case of writes to mirrors, | ||
2020 | * use the same that was used for the lookup */ | ||
2021 | block_ctx.dev = dev_state; | 1988 | block_ctx.dev = dev_state; |
2022 | block_ctx.dev_bytenr = dev_bytenr; | 1989 | block_ctx.dev_bytenr = dev_bytenr; |
1990 | block_ctx.start = bytenr; | ||
1991 | block_ctx.len = processed_len; | ||
1992 | block_ctx.pagev = NULL; | ||
1993 | block_ctx.mem_to_free = NULL; | ||
1994 | block_ctx.datav = mapped_datav; | ||
2023 | 1995 | ||
2024 | if (is_metadata || state->include_extent_data) { | 1996 | if (is_metadata || state->include_extent_data) { |
2025 | block->never_written = 0; | 1997 | block->never_written = 0; |
@@ -2133,10 +2105,6 @@ again: | |||
2133 | /* this is getting ugly for the | 2105 | /* this is getting ugly for the |
2134 | * include_extent_data case... */ | 2106 | * include_extent_data case... */ |
2135 | bytenr = 0; /* unknown */ | 2107 | bytenr = 0; /* unknown */ |
2136 | block_ctx.start = bytenr; | ||
2137 | block_ctx.len = processed_len; | ||
2138 | block_ctx.mem_to_free = NULL; | ||
2139 | block_ctx.pagev = NULL; | ||
2140 | } else { | 2108 | } else { |
2141 | processed_len = state->metablock_size; | 2109 | processed_len = state->metablock_size; |
2142 | bytenr = btrfs_stack_header_bytenr( | 2110 | bytenr = btrfs_stack_header_bytenr( |
@@ -2149,22 +2117,15 @@ again: | |||
2149 | "Written block @%llu (%s/%llu/?)" | 2117 | "Written block @%llu (%s/%llu/?)" |
2150 | " !found in hash table, M.\n", | 2118 | " !found in hash table, M.\n", |
2151 | bytenr, dev_state->name, dev_bytenr); | 2119 | bytenr, dev_state->name, dev_bytenr); |
2152 | |||
2153 | ret = btrfsic_map_block(state, bytenr, processed_len, | ||
2154 | &block_ctx, 0); | ||
2155 | if (ret) { | ||
2156 | printk(KERN_INFO | ||
2157 | "btrfsic: btrfsic_map_block(root @%llu)" | ||
2158 | " failed!\n", | ||
2159 | dev_bytenr); | ||
2160 | goto continue_loop; | ||
2161 | } | ||
2162 | } | 2120 | } |
2163 | block_ctx.datav = mapped_datav; | 2121 | |
2164 | /* the following is required in case of writes to mirrors, | ||
2165 | * use the same that was used for the lookup */ | ||
2166 | block_ctx.dev = dev_state; | 2122 | block_ctx.dev = dev_state; |
2167 | block_ctx.dev_bytenr = dev_bytenr; | 2123 | block_ctx.dev_bytenr = dev_bytenr; |
2124 | block_ctx.start = bytenr; | ||
2125 | block_ctx.len = processed_len; | ||
2126 | block_ctx.pagev = NULL; | ||
2127 | block_ctx.mem_to_free = NULL; | ||
2128 | block_ctx.datav = mapped_datav; | ||
2168 | 2129 | ||
2169 | block = btrfsic_block_alloc(); | 2130 | block = btrfsic_block_alloc(); |
2170 | if (NULL == block) { | 2131 | if (NULL == block) { |
@@ -3130,10 +3091,13 @@ int btrfsic_mount(struct btrfs_root *root, | |||
3130 | root->sectorsize, PAGE_CACHE_SIZE); | 3091 | root->sectorsize, PAGE_CACHE_SIZE); |
3131 | return -1; | 3092 | return -1; |
3132 | } | 3093 | } |
3133 | state = kzalloc(sizeof(*state), GFP_NOFS); | 3094 | state = kzalloc(sizeof(*state), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); |
3134 | if (NULL == state) { | 3095 | if (!state) { |
3135 | printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n"); | 3096 | state = vzalloc(sizeof(*state)); |
3136 | return -1; | 3097 | if (!state) { |
3098 | printk(KERN_INFO "btrfs check-integrity: vzalloc() failed!\n"); | ||
3099 | return -1; | ||
3100 | } | ||
3137 | } | 3101 | } |
3138 | 3102 | ||
3139 | if (!btrfsic_is_initialized) { | 3103 | if (!btrfsic_is_initialized) { |
@@ -3277,5 +3241,8 @@ void btrfsic_unmount(struct btrfs_root *root, | |||
3277 | 3241 | ||
3278 | mutex_unlock(&btrfsic_mutex); | 3242 | mutex_unlock(&btrfsic_mutex); |
3279 | 3243 | ||
3280 | kfree(state); | 3244 | if (is_vmalloc_addr(state)) |
3245 | vfree(state); | ||
3246 | else | ||
3247 | kfree(state); | ||
3281 | } | 3248 | } |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index dcd9be32ac57..e9df8862012c 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -224,16 +224,19 @@ out: | |||
224 | * Clear the writeback bits on all of the file | 224 | * Clear the writeback bits on all of the file |
225 | * pages for a compressed write | 225 | * pages for a compressed write |
226 | */ | 226 | */ |
227 | static noinline void end_compressed_writeback(struct inode *inode, u64 start, | 227 | static noinline void end_compressed_writeback(struct inode *inode, |
228 | unsigned long ram_size) | 228 | const struct compressed_bio *cb) |
229 | { | 229 | { |
230 | unsigned long index = start >> PAGE_CACHE_SHIFT; | 230 | unsigned long index = cb->start >> PAGE_CACHE_SHIFT; |
231 | unsigned long end_index = (start + ram_size - 1) >> PAGE_CACHE_SHIFT; | 231 | unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_CACHE_SHIFT; |
232 | struct page *pages[16]; | 232 | struct page *pages[16]; |
233 | unsigned long nr_pages = end_index - index + 1; | 233 | unsigned long nr_pages = end_index - index + 1; |
234 | int i; | 234 | int i; |
235 | int ret; | 235 | int ret; |
236 | 236 | ||
237 | if (cb->errors) | ||
238 | mapping_set_error(inode->i_mapping, -EIO); | ||
239 | |||
237 | while (nr_pages > 0) { | 240 | while (nr_pages > 0) { |
238 | ret = find_get_pages_contig(inode->i_mapping, index, | 241 | ret = find_get_pages_contig(inode->i_mapping, index, |
239 | min_t(unsigned long, | 242 | min_t(unsigned long, |
@@ -244,6 +247,8 @@ static noinline void end_compressed_writeback(struct inode *inode, u64 start, | |||
244 | continue; | 247 | continue; |
245 | } | 248 | } |
246 | for (i = 0; i < ret; i++) { | 249 | for (i = 0; i < ret; i++) { |
250 | if (cb->errors) | ||
251 | SetPageError(pages[i]); | ||
247 | end_page_writeback(pages[i]); | 252 | end_page_writeback(pages[i]); |
248 | page_cache_release(pages[i]); | 253 | page_cache_release(pages[i]); |
249 | } | 254 | } |
@@ -287,10 +292,11 @@ static void end_compressed_bio_write(struct bio *bio, int err) | |||
287 | tree->ops->writepage_end_io_hook(cb->compressed_pages[0], | 292 | tree->ops->writepage_end_io_hook(cb->compressed_pages[0], |
288 | cb->start, | 293 | cb->start, |
289 | cb->start + cb->len - 1, | 294 | cb->start + cb->len - 1, |
290 | NULL, 1); | 295 | NULL, |
296 | err ? 0 : 1); | ||
291 | cb->compressed_pages[0]->mapping = NULL; | 297 | cb->compressed_pages[0]->mapping = NULL; |
292 | 298 | ||
293 | end_compressed_writeback(inode, cb->start, cb->len); | 299 | end_compressed_writeback(inode, cb); |
294 | /* note, our inode could be gone now */ | 300 | /* note, our inode could be gone now */ |
295 | 301 | ||
296 | /* | 302 | /* |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 150822ee0a0b..14a72ed14ef7 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -2929,7 +2929,7 @@ done: | |||
2929 | */ | 2929 | */ |
2930 | if (!p->leave_spinning) | 2930 | if (!p->leave_spinning) |
2931 | btrfs_set_path_blocking(p); | 2931 | btrfs_set_path_blocking(p); |
2932 | if (ret < 0) | 2932 | if (ret < 0 && !p->skip_release_on_error) |
2933 | btrfs_release_path(p); | 2933 | btrfs_release_path(p); |
2934 | return ret; | 2934 | return ret; |
2935 | } | 2935 | } |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fe69edda11fb..e6fbbd74b716 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -607,6 +607,7 @@ struct btrfs_path { | |||
607 | unsigned int leave_spinning:1; | 607 | unsigned int leave_spinning:1; |
608 | unsigned int search_commit_root:1; | 608 | unsigned int search_commit_root:1; |
609 | unsigned int need_commit_sem:1; | 609 | unsigned int need_commit_sem:1; |
610 | unsigned int skip_release_on_error:1; | ||
610 | }; | 611 | }; |
611 | 612 | ||
612 | /* | 613 | /* |
@@ -1170,6 +1171,7 @@ struct btrfs_space_info { | |||
1170 | struct percpu_counter total_bytes_pinned; | 1171 | struct percpu_counter total_bytes_pinned; |
1171 | 1172 | ||
1172 | struct list_head list; | 1173 | struct list_head list; |
1174 | struct list_head ro_bgs; | ||
1173 | 1175 | ||
1174 | struct rw_semaphore groups_sem; | 1176 | struct rw_semaphore groups_sem; |
1175 | /* for block groups in our same type */ | 1177 | /* for block groups in our same type */ |
@@ -1276,6 +1278,8 @@ struct btrfs_block_group_cache { | |||
1276 | unsigned int ro:1; | 1278 | unsigned int ro:1; |
1277 | unsigned int dirty:1; | 1279 | unsigned int dirty:1; |
1278 | unsigned int iref:1; | 1280 | unsigned int iref:1; |
1281 | unsigned int has_caching_ctl:1; | ||
1282 | unsigned int removed:1; | ||
1279 | 1283 | ||
1280 | int disk_cache_state; | 1284 | int disk_cache_state; |
1281 | 1285 | ||
@@ -1305,6 +1309,11 @@ struct btrfs_block_group_cache { | |||
1305 | 1309 | ||
1306 | /* For delayed block group creation or deletion of empty block groups */ | 1310 | /* For delayed block group creation or deletion of empty block groups */ |
1307 | struct list_head bg_list; | 1311 | struct list_head bg_list; |
1312 | |||
1313 | /* For read-only block groups */ | ||
1314 | struct list_head ro_list; | ||
1315 | |||
1316 | atomic_t trimming; | ||
1308 | }; | 1317 | }; |
1309 | 1318 | ||
1310 | /* delayed seq elem */ | 1319 | /* delayed seq elem */ |
@@ -1402,6 +1411,11 @@ struct btrfs_fs_info { | |||
1402 | */ | 1411 | */ |
1403 | u64 last_trans_log_full_commit; | 1412 | u64 last_trans_log_full_commit; |
1404 | unsigned long mount_opt; | 1413 | unsigned long mount_opt; |
1414 | /* | ||
1415 | * Track requests for actions that need to be done during transaction | ||
1416 | * commit (like for some mount options). | ||
1417 | */ | ||
1418 | unsigned long pending_changes; | ||
1405 | unsigned long compress_type:4; | 1419 | unsigned long compress_type:4; |
1406 | int commit_interval; | 1420 | int commit_interval; |
1407 | /* | 1421 | /* |
@@ -1729,6 +1743,12 @@ struct btrfs_fs_info { | |||
1729 | 1743 | ||
1730 | /* For btrfs to record security options */ | 1744 | /* For btrfs to record security options */ |
1731 | struct security_mnt_opts security_opts; | 1745 | struct security_mnt_opts security_opts; |
1746 | |||
1747 | /* | ||
1748 | * Chunks that can't be freed yet (under a trim/discard operation) | ||
1749 | * and will be latter freed. Protected by fs_info->chunk_mutex. | ||
1750 | */ | ||
1751 | struct list_head pinned_chunks; | ||
1732 | }; | 1752 | }; |
1733 | 1753 | ||
1734 | struct btrfs_subvolume_writers { | 1754 | struct btrfs_subvolume_writers { |
@@ -2093,7 +2113,6 @@ struct btrfs_ioctl_defrag_range_args { | |||
2093 | #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) | 2113 | #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) |
2094 | #define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22) | 2114 | #define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22) |
2095 | #define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23) | 2115 | #define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23) |
2096 | #define BTRFS_MOUNT_CHANGE_INODE_CACHE (1 << 24) | ||
2097 | 2116 | ||
2098 | #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) | 2117 | #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) |
2099 | #define BTRFS_DEFAULT_MAX_INLINE (8192) | 2118 | #define BTRFS_DEFAULT_MAX_INLINE (8192) |
@@ -2103,6 +2122,7 @@ struct btrfs_ioctl_defrag_range_args { | |||
2103 | #define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt) | 2122 | #define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt) |
2104 | #define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \ | 2123 | #define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \ |
2105 | BTRFS_MOUNT_##opt) | 2124 | BTRFS_MOUNT_##opt) |
2125 | |||
2106 | #define btrfs_set_and_info(root, opt, fmt, args...) \ | 2126 | #define btrfs_set_and_info(root, opt, fmt, args...) \ |
2107 | { \ | 2127 | { \ |
2108 | if (!btrfs_test_opt(root, opt)) \ | 2128 | if (!btrfs_test_opt(root, opt)) \ |
@@ -2118,6 +2138,49 @@ struct btrfs_ioctl_defrag_range_args { | |||
2118 | } | 2138 | } |
2119 | 2139 | ||
2120 | /* | 2140 | /* |
2141 | * Requests for changes that need to be done during transaction commit. | ||
2142 | * | ||
2143 | * Internal mount options that are used for special handling of the real | ||
2144 | * mount options (eg. cannot be set during remount and have to be set during | ||
2145 | * transaction commit) | ||
2146 | */ | ||
2147 | |||
2148 | #define BTRFS_PENDING_SET_INODE_MAP_CACHE (0) | ||
2149 | #define BTRFS_PENDING_CLEAR_INODE_MAP_CACHE (1) | ||
2150 | #define BTRFS_PENDING_COMMIT (2) | ||
2151 | |||
2152 | #define btrfs_test_pending(info, opt) \ | ||
2153 | test_bit(BTRFS_PENDING_##opt, &(info)->pending_changes) | ||
2154 | #define btrfs_set_pending(info, opt) \ | ||
2155 | set_bit(BTRFS_PENDING_##opt, &(info)->pending_changes) | ||
2156 | #define btrfs_clear_pending(info, opt) \ | ||
2157 | clear_bit(BTRFS_PENDING_##opt, &(info)->pending_changes) | ||
2158 | |||
2159 | /* | ||
2160 | * Helpers for setting pending mount option changes. | ||
2161 | * | ||
2162 | * Expects corresponding macros | ||
2163 | * BTRFS_PENDING_SET_ and CLEAR_ + short mount option name | ||
2164 | */ | ||
2165 | #define btrfs_set_pending_and_info(info, opt, fmt, args...) \ | ||
2166 | do { \ | ||
2167 | if (!btrfs_raw_test_opt((info)->mount_opt, opt)) { \ | ||
2168 | btrfs_info((info), fmt, ##args); \ | ||
2169 | btrfs_set_pending((info), SET_##opt); \ | ||
2170 | btrfs_clear_pending((info), CLEAR_##opt); \ | ||
2171 | } \ | ||
2172 | } while(0) | ||
2173 | |||
2174 | #define btrfs_clear_pending_and_info(info, opt, fmt, args...) \ | ||
2175 | do { \ | ||
2176 | if (btrfs_raw_test_opt((info)->mount_opt, opt)) { \ | ||
2177 | btrfs_info((info), fmt, ##args); \ | ||
2178 | btrfs_set_pending((info), CLEAR_##opt); \ | ||
2179 | btrfs_clear_pending((info), SET_##opt); \ | ||
2180 | } \ | ||
2181 | } while(0) | ||
2182 | |||
2183 | /* | ||
2121 | * Inode flags | 2184 | * Inode flags |
2122 | */ | 2185 | */ |
2123 | #define BTRFS_INODE_NODATASUM (1 << 0) | 2186 | #define BTRFS_INODE_NODATASUM (1 << 0) |
@@ -3351,7 +3414,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
3351 | u64 type, u64 chunk_objectid, u64 chunk_offset, | 3414 | u64 type, u64 chunk_objectid, u64 chunk_offset, |
3352 | u64 size); | 3415 | u64 size); |
3353 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 3416 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
3354 | struct btrfs_root *root, u64 group_start); | 3417 | struct btrfs_root *root, u64 group_start, |
3418 | struct extent_map *em); | ||
3355 | void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info); | 3419 | void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info); |
3356 | void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, | 3420 | void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, |
3357 | struct btrfs_root *root); | 3421 | struct btrfs_root *root); |
@@ -3427,8 +3491,8 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info); | |||
3427 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, | 3491 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, |
3428 | struct btrfs_fs_info *fs_info); | 3492 | struct btrfs_fs_info *fs_info); |
3429 | int __get_raid_index(u64 flags); | 3493 | int __get_raid_index(u64 flags); |
3430 | int btrfs_start_nocow_write(struct btrfs_root *root); | 3494 | int btrfs_start_write_no_snapshoting(struct btrfs_root *root); |
3431 | void btrfs_end_nocow_write(struct btrfs_root *root); | 3495 | void btrfs_end_write_no_snapshoting(struct btrfs_root *root); |
3432 | /* ctree.c */ | 3496 | /* ctree.c */ |
3433 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 3497 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
3434 | int level, int *slot); | 3498 | int level, int *slot); |
@@ -3686,6 +3750,10 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | |||
3686 | int verify_dir_item(struct btrfs_root *root, | 3750 | int verify_dir_item(struct btrfs_root *root, |
3687 | struct extent_buffer *leaf, | 3751 | struct extent_buffer *leaf, |
3688 | struct btrfs_dir_item *dir_item); | 3752 | struct btrfs_dir_item *dir_item); |
3753 | struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, | ||
3754 | struct btrfs_path *path, | ||
3755 | const char *name, | ||
3756 | int name_len); | ||
3689 | 3757 | ||
3690 | /* orphan.c */ | 3758 | /* orphan.c */ |
3691 | int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, | 3759 | int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, |
@@ -3857,6 +3925,7 @@ int btrfs_prealloc_file_range_trans(struct inode *inode, | |||
3857 | struct btrfs_trans_handle *trans, int mode, | 3925 | struct btrfs_trans_handle *trans, int mode, |
3858 | u64 start, u64 num_bytes, u64 min_size, | 3926 | u64 start, u64 num_bytes, u64 min_size, |
3859 | loff_t actual_len, u64 *alloc_hint); | 3927 | loff_t actual_len, u64 *alloc_hint); |
3928 | int btrfs_inode_check_errors(struct inode *inode); | ||
3860 | extern const struct dentry_operations btrfs_dentry_operations; | 3929 | extern const struct dentry_operations btrfs_dentry_operations; |
3861 | 3930 | ||
3862 | /* ioctl.c */ | 3931 | /* ioctl.c */ |
@@ -3901,6 +3970,7 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, | |||
3901 | struct page **pages, size_t num_pages, | 3970 | struct page **pages, size_t num_pages, |
3902 | loff_t pos, size_t write_bytes, | 3971 | loff_t pos, size_t write_bytes, |
3903 | struct extent_state **cached); | 3972 | struct extent_state **cached); |
3973 | int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end); | ||
3904 | 3974 | ||
3905 | /* tree-defrag.c */ | 3975 | /* tree-defrag.c */ |
3906 | int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | 3976 | int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, |
@@ -4097,7 +4167,12 @@ int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, | |||
4097 | /* dev-replace.c */ | 4167 | /* dev-replace.c */ |
4098 | void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info); | 4168 | void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info); |
4099 | void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info); | 4169 | void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info); |
4100 | void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info); | 4170 | void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount); |
4171 | |||
4172 | static inline void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info) | ||
4173 | { | ||
4174 | btrfs_bio_counter_sub(fs_info, 1); | ||
4175 | } | ||
4101 | 4176 | ||
4102 | /* reada.c */ | 4177 | /* reada.c */ |
4103 | struct reada_control { | 4178 | struct reada_control { |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 6f662b34ba0e..ca6a3a3b6b6c 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c | |||
@@ -316,11 +316,6 @@ int btrfs_dev_replace_start(struct btrfs_root *root, | |||
316 | struct btrfs_device *tgt_device = NULL; | 316 | struct btrfs_device *tgt_device = NULL; |
317 | struct btrfs_device *src_device = NULL; | 317 | struct btrfs_device *src_device = NULL; |
318 | 318 | ||
319 | if (btrfs_fs_incompat(fs_info, RAID56)) { | ||
320 | btrfs_warn(fs_info, "dev_replace cannot yet handle RAID5/RAID6"); | ||
321 | return -EOPNOTSUPP; | ||
322 | } | ||
323 | |||
324 | switch (args->start.cont_reading_from_srcdev_mode) { | 319 | switch (args->start.cont_reading_from_srcdev_mode) { |
325 | case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS: | 320 | case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS: |
326 | case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID: | 321 | case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID: |
@@ -422,9 +417,15 @@ int btrfs_dev_replace_start(struct btrfs_root *root, | |||
422 | &dev_replace->scrub_progress, 0, 1); | 417 | &dev_replace->scrub_progress, 0, 1); |
423 | 418 | ||
424 | ret = btrfs_dev_replace_finishing(root->fs_info, ret); | 419 | ret = btrfs_dev_replace_finishing(root->fs_info, ret); |
425 | WARN_ON(ret); | 420 | /* don't warn if EINPROGRESS, someone else might be running scrub */ |
421 | if (ret == -EINPROGRESS) { | ||
422 | args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS; | ||
423 | ret = 0; | ||
424 | } else { | ||
425 | WARN_ON(ret); | ||
426 | } | ||
426 | 427 | ||
427 | return 0; | 428 | return ret; |
428 | 429 | ||
429 | leave: | 430 | leave: |
430 | dev_replace->srcdev = NULL; | 431 | dev_replace->srcdev = NULL; |
@@ -542,7 +543,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
542 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); | 543 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); |
543 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | 544 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); |
544 | 545 | ||
545 | return 0; | 546 | return scrub_ret; |
546 | } | 547 | } |
547 | 548 | ||
548 | printk_in_rcu(KERN_INFO | 549 | printk_in_rcu(KERN_INFO |
@@ -571,15 +572,11 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
571 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); | 572 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); |
572 | fs_info->fs_devices->rw_devices++; | 573 | fs_info->fs_devices->rw_devices++; |
573 | 574 | ||
574 | /* replace the sysfs entry */ | ||
575 | btrfs_kobj_rm_device(fs_info, src_device); | ||
576 | btrfs_kobj_add_device(fs_info, tgt_device); | ||
577 | |||
578 | btrfs_dev_replace_unlock(dev_replace); | 575 | btrfs_dev_replace_unlock(dev_replace); |
579 | 576 | ||
580 | btrfs_rm_dev_replace_blocked(fs_info); | 577 | btrfs_rm_dev_replace_blocked(fs_info); |
581 | 578 | ||
582 | btrfs_rm_dev_replace_srcdev(fs_info, src_device); | 579 | btrfs_rm_dev_replace_remove_srcdev(fs_info, src_device); |
583 | 580 | ||
584 | btrfs_rm_dev_replace_unblocked(fs_info); | 581 | btrfs_rm_dev_replace_unblocked(fs_info); |
585 | 582 | ||
@@ -594,6 +591,11 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
594 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 591 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
595 | mutex_unlock(&uuid_mutex); | 592 | mutex_unlock(&uuid_mutex); |
596 | 593 | ||
594 | /* replace the sysfs entry */ | ||
595 | btrfs_kobj_rm_device(fs_info, src_device); | ||
596 | btrfs_kobj_add_device(fs_info, tgt_device); | ||
597 | btrfs_rm_dev_replace_free_srcdev(fs_info, src_device); | ||
598 | |||
597 | /* write back the superblocks */ | 599 | /* write back the superblocks */ |
598 | trans = btrfs_start_transaction(root, 0); | 600 | trans = btrfs_start_transaction(root, 0); |
599 | if (!IS_ERR(trans)) | 601 | if (!IS_ERR(trans)) |
@@ -920,9 +922,9 @@ void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info) | |||
920 | percpu_counter_inc(&fs_info->bio_counter); | 922 | percpu_counter_inc(&fs_info->bio_counter); |
921 | } | 923 | } |
922 | 924 | ||
923 | void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info) | 925 | void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount) |
924 | { | 926 | { |
925 | percpu_counter_dec(&fs_info->bio_counter); | 927 | percpu_counter_sub(&fs_info->bio_counter, amount); |
926 | 928 | ||
927 | if (waitqueue_active(&fs_info->replace_wait)) | 929 | if (waitqueue_active(&fs_info->replace_wait)) |
928 | wake_up(&fs_info->replace_wait); | 930 | wake_up(&fs_info->replace_wait); |
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index fc8df866e919..1752625fb4dd 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
@@ -21,10 +21,6 @@ | |||
21 | #include "hash.h" | 21 | #include "hash.h" |
22 | #include "transaction.h" | 22 | #include "transaction.h" |
23 | 23 | ||
24 | static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, | ||
25 | struct btrfs_path *path, | ||
26 | const char *name, int name_len); | ||
27 | |||
28 | /* | 24 | /* |
29 | * insert a name into a directory, doing overflow properly if there is a hash | 25 | * insert a name into a directory, doing overflow properly if there is a hash |
30 | * collision. data_size indicates how big the item inserted should be. On | 26 | * collision. data_size indicates how big the item inserted should be. On |
@@ -383,9 +379,9 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | |||
383 | * this walks through all the entries in a dir item and finds one | 379 | * this walks through all the entries in a dir item and finds one |
384 | * for a specific name. | 380 | * for a specific name. |
385 | */ | 381 | */ |
386 | static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, | 382 | struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, |
387 | struct btrfs_path *path, | 383 | struct btrfs_path *path, |
388 | const char *name, int name_len) | 384 | const char *name, int name_len) |
389 | { | 385 | { |
390 | struct btrfs_dir_item *dir_item; | 386 | struct btrfs_dir_item *dir_item; |
391 | unsigned long name_ptr; | 387 | unsigned long name_ptr; |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1bf9f897065d..30965120772b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -2384,6 +2384,8 @@ int open_ctree(struct super_block *sb, | |||
2384 | init_waitqueue_head(&fs_info->transaction_blocked_wait); | 2384 | init_waitqueue_head(&fs_info->transaction_blocked_wait); |
2385 | init_waitqueue_head(&fs_info->async_submit_wait); | 2385 | init_waitqueue_head(&fs_info->async_submit_wait); |
2386 | 2386 | ||
2387 | INIT_LIST_HEAD(&fs_info->pinned_chunks); | ||
2388 | |||
2387 | ret = btrfs_alloc_stripe_hash_table(fs_info); | 2389 | ret = btrfs_alloc_stripe_hash_table(fs_info); |
2388 | if (ret) { | 2390 | if (ret) { |
2389 | err = ret; | 2391 | err = ret; |
@@ -2830,9 +2832,11 @@ retry_root_backup: | |||
2830 | btrfs_set_opt(fs_info->mount_opt, SSD); | 2832 | btrfs_set_opt(fs_info->mount_opt, SSD); |
2831 | } | 2833 | } |
2832 | 2834 | ||
2833 | /* Set the real inode map cache flag */ | 2835 | /* |
2834 | if (btrfs_test_opt(tree_root, CHANGE_INODE_CACHE)) | 2836 | * Mount does not set all options immediatelly, we can do it now and do |
2835 | btrfs_set_opt(tree_root->fs_info->mount_opt, INODE_MAP_CACHE); | 2837 | * not have to wait for transaction commit |
2838 | */ | ||
2839 | btrfs_apply_pending_changes(fs_info); | ||
2836 | 2840 | ||
2837 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 2841 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
2838 | if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) { | 2842 | if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) { |
@@ -3713,6 +3717,17 @@ void close_ctree(struct btrfs_root *root) | |||
3713 | 3717 | ||
3714 | btrfs_free_block_rsv(root, root->orphan_block_rsv); | 3718 | btrfs_free_block_rsv(root, root->orphan_block_rsv); |
3715 | root->orphan_block_rsv = NULL; | 3719 | root->orphan_block_rsv = NULL; |
3720 | |||
3721 | lock_chunks(root); | ||
3722 | while (!list_empty(&fs_info->pinned_chunks)) { | ||
3723 | struct extent_map *em; | ||
3724 | |||
3725 | em = list_first_entry(&fs_info->pinned_chunks, | ||
3726 | struct extent_map, list); | ||
3727 | list_del_init(&em->list); | ||
3728 | free_extent_map(em); | ||
3729 | } | ||
3730 | unlock_chunks(root); | ||
3716 | } | 3731 | } |
3717 | 3732 | ||
3718 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, | 3733 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, |
@@ -3839,12 +3854,12 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | |||
3839 | */ | 3854 | */ |
3840 | if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) | 3855 | if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) |
3841 | printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", | 3856 | printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", |
3842 | sb->root); | 3857 | btrfs_super_root(sb)); |
3843 | if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) | 3858 | if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) |
3844 | printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", | 3859 | printk(KERN_WARNING "BTRFS: chunk_root block unaligned: %llu\n", |
3845 | sb->chunk_root); | 3860 | btrfs_super_chunk_root(sb)); |
3846 | if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) | 3861 | if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) |
3847 | printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", | 3862 | printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n", |
3848 | btrfs_super_log_root(sb)); | 3863 | btrfs_super_log_root(sb)); |
3849 | 3864 | ||
3850 | if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { | 3865 | if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { |
@@ -4129,6 +4144,25 @@ again: | |||
4129 | return 0; | 4144 | return 0; |
4130 | } | 4145 | } |
4131 | 4146 | ||
4147 | static void btrfs_free_pending_ordered(struct btrfs_transaction *cur_trans, | ||
4148 | struct btrfs_fs_info *fs_info) | ||
4149 | { | ||
4150 | struct btrfs_ordered_extent *ordered; | ||
4151 | |||
4152 | spin_lock(&fs_info->trans_lock); | ||
4153 | while (!list_empty(&cur_trans->pending_ordered)) { | ||
4154 | ordered = list_first_entry(&cur_trans->pending_ordered, | ||
4155 | struct btrfs_ordered_extent, | ||
4156 | trans_list); | ||
4157 | list_del_init(&ordered->trans_list); | ||
4158 | spin_unlock(&fs_info->trans_lock); | ||
4159 | |||
4160 | btrfs_put_ordered_extent(ordered); | ||
4161 | spin_lock(&fs_info->trans_lock); | ||
4162 | } | ||
4163 | spin_unlock(&fs_info->trans_lock); | ||
4164 | } | ||
4165 | |||
4132 | void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, | 4166 | void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, |
4133 | struct btrfs_root *root) | 4167 | struct btrfs_root *root) |
4134 | { | 4168 | { |
@@ -4140,6 +4174,7 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, | |||
4140 | cur_trans->state = TRANS_STATE_UNBLOCKED; | 4174 | cur_trans->state = TRANS_STATE_UNBLOCKED; |
4141 | wake_up(&root->fs_info->transaction_wait); | 4175 | wake_up(&root->fs_info->transaction_wait); |
4142 | 4176 | ||
4177 | btrfs_free_pending_ordered(cur_trans, root->fs_info); | ||
4143 | btrfs_destroy_delayed_inodes(root); | 4178 | btrfs_destroy_delayed_inodes(root); |
4144 | btrfs_assert_delayed_root_empty(root); | 4179 | btrfs_assert_delayed_root_empty(root); |
4145 | 4180 | ||
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 47c1ba141082..222d6aea4a8a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -315,12 +315,6 @@ get_caching_control(struct btrfs_block_group_cache *cache) | |||
315 | struct btrfs_caching_control *ctl; | 315 | struct btrfs_caching_control *ctl; |
316 | 316 | ||
317 | spin_lock(&cache->lock); | 317 | spin_lock(&cache->lock); |
318 | if (cache->cached != BTRFS_CACHE_STARTED) { | ||
319 | spin_unlock(&cache->lock); | ||
320 | return NULL; | ||
321 | } | ||
322 | |||
323 | /* We're loading it the fast way, so we don't have a caching_ctl. */ | ||
324 | if (!cache->caching_ctl) { | 318 | if (!cache->caching_ctl) { |
325 | spin_unlock(&cache->lock); | 319 | spin_unlock(&cache->lock); |
326 | return NULL; | 320 | return NULL; |
@@ -594,6 +588,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
594 | spin_unlock(&cache->lock); | 588 | spin_unlock(&cache->lock); |
595 | 589 | ||
596 | if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) { | 590 | if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) { |
591 | mutex_lock(&caching_ctl->mutex); | ||
597 | ret = load_free_space_cache(fs_info, cache); | 592 | ret = load_free_space_cache(fs_info, cache); |
598 | 593 | ||
599 | spin_lock(&cache->lock); | 594 | spin_lock(&cache->lock); |
@@ -601,15 +596,19 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
601 | cache->caching_ctl = NULL; | 596 | cache->caching_ctl = NULL; |
602 | cache->cached = BTRFS_CACHE_FINISHED; | 597 | cache->cached = BTRFS_CACHE_FINISHED; |
603 | cache->last_byte_to_unpin = (u64)-1; | 598 | cache->last_byte_to_unpin = (u64)-1; |
599 | caching_ctl->progress = (u64)-1; | ||
604 | } else { | 600 | } else { |
605 | if (load_cache_only) { | 601 | if (load_cache_only) { |
606 | cache->caching_ctl = NULL; | 602 | cache->caching_ctl = NULL; |
607 | cache->cached = BTRFS_CACHE_NO; | 603 | cache->cached = BTRFS_CACHE_NO; |
608 | } else { | 604 | } else { |
609 | cache->cached = BTRFS_CACHE_STARTED; | 605 | cache->cached = BTRFS_CACHE_STARTED; |
606 | cache->has_caching_ctl = 1; | ||
610 | } | 607 | } |
611 | } | 608 | } |
612 | spin_unlock(&cache->lock); | 609 | spin_unlock(&cache->lock); |
610 | mutex_unlock(&caching_ctl->mutex); | ||
611 | |||
613 | wake_up(&caching_ctl->wait); | 612 | wake_up(&caching_ctl->wait); |
614 | if (ret == 1) { | 613 | if (ret == 1) { |
615 | put_caching_control(caching_ctl); | 614 | put_caching_control(caching_ctl); |
@@ -627,6 +626,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
627 | cache->cached = BTRFS_CACHE_NO; | 626 | cache->cached = BTRFS_CACHE_NO; |
628 | } else { | 627 | } else { |
629 | cache->cached = BTRFS_CACHE_STARTED; | 628 | cache->cached = BTRFS_CACHE_STARTED; |
629 | cache->has_caching_ctl = 1; | ||
630 | } | 630 | } |
631 | spin_unlock(&cache->lock); | 631 | spin_unlock(&cache->lock); |
632 | wake_up(&caching_ctl->wait); | 632 | wake_up(&caching_ctl->wait); |
@@ -3162,7 +3162,19 @@ next_block_group(struct btrfs_root *root, | |||
3162 | struct btrfs_block_group_cache *cache) | 3162 | struct btrfs_block_group_cache *cache) |
3163 | { | 3163 | { |
3164 | struct rb_node *node; | 3164 | struct rb_node *node; |
3165 | |||
3165 | spin_lock(&root->fs_info->block_group_cache_lock); | 3166 | spin_lock(&root->fs_info->block_group_cache_lock); |
3167 | |||
3168 | /* If our block group was removed, we need a full search. */ | ||
3169 | if (RB_EMPTY_NODE(&cache->cache_node)) { | ||
3170 | const u64 next_bytenr = cache->key.objectid + cache->key.offset; | ||
3171 | |||
3172 | spin_unlock(&root->fs_info->block_group_cache_lock); | ||
3173 | btrfs_put_block_group(cache); | ||
3174 | cache = btrfs_lookup_first_block_group(root->fs_info, | ||
3175 | next_bytenr); | ||
3176 | return cache; | ||
3177 | } | ||
3166 | node = rb_next(&cache->cache_node); | 3178 | node = rb_next(&cache->cache_node); |
3167 | btrfs_put_block_group(cache); | 3179 | btrfs_put_block_group(cache); |
3168 | if (node) { | 3180 | if (node) { |
@@ -3504,6 +3516,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
3504 | found->chunk_alloc = 0; | 3516 | found->chunk_alloc = 0; |
3505 | found->flush = 0; | 3517 | found->flush = 0; |
3506 | init_waitqueue_head(&found->wait); | 3518 | init_waitqueue_head(&found->wait); |
3519 | INIT_LIST_HEAD(&found->ro_bgs); | ||
3507 | 3520 | ||
3508 | ret = kobject_init_and_add(&found->kobj, &space_info_ktype, | 3521 | ret = kobject_init_and_add(&found->kobj, &space_info_ktype, |
3509 | info->space_info_kobj, "%s", | 3522 | info->space_info_kobj, "%s", |
@@ -5425,7 +5438,17 @@ static int update_block_group(struct btrfs_root *root, | |||
5425 | spin_unlock(&cache->space_info->lock); | 5438 | spin_unlock(&cache->space_info->lock); |
5426 | } else { | 5439 | } else { |
5427 | old_val -= num_bytes; | 5440 | old_val -= num_bytes; |
5441 | btrfs_set_block_group_used(&cache->item, old_val); | ||
5442 | cache->pinned += num_bytes; | ||
5443 | cache->space_info->bytes_pinned += num_bytes; | ||
5444 | cache->space_info->bytes_used -= num_bytes; | ||
5445 | cache->space_info->disk_used -= num_bytes * factor; | ||
5446 | spin_unlock(&cache->lock); | ||
5447 | spin_unlock(&cache->space_info->lock); | ||
5428 | 5448 | ||
5449 | set_extent_dirty(info->pinned_extents, | ||
5450 | bytenr, bytenr + num_bytes - 1, | ||
5451 | GFP_NOFS | __GFP_NOFAIL); | ||
5429 | /* | 5452 | /* |
5430 | * No longer have used bytes in this block group, queue | 5453 | * No longer have used bytes in this block group, queue |
5431 | * it for deletion. | 5454 | * it for deletion. |
@@ -5439,17 +5462,6 @@ static int update_block_group(struct btrfs_root *root, | |||
5439 | } | 5462 | } |
5440 | spin_unlock(&info->unused_bgs_lock); | 5463 | spin_unlock(&info->unused_bgs_lock); |
5441 | } | 5464 | } |
5442 | btrfs_set_block_group_used(&cache->item, old_val); | ||
5443 | cache->pinned += num_bytes; | ||
5444 | cache->space_info->bytes_pinned += num_bytes; | ||
5445 | cache->space_info->bytes_used -= num_bytes; | ||
5446 | cache->space_info->disk_used -= num_bytes * factor; | ||
5447 | spin_unlock(&cache->lock); | ||
5448 | spin_unlock(&cache->space_info->lock); | ||
5449 | |||
5450 | set_extent_dirty(info->pinned_extents, | ||
5451 | bytenr, bytenr + num_bytes - 1, | ||
5452 | GFP_NOFS | __GFP_NOFAIL); | ||
5453 | } | 5465 | } |
5454 | btrfs_put_block_group(cache); | 5466 | btrfs_put_block_group(cache); |
5455 | total -= num_bytes; | 5467 | total -= num_bytes; |
@@ -8511,6 +8523,7 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force) | |||
8511 | min_allocable_bytes <= sinfo->total_bytes) { | 8523 | min_allocable_bytes <= sinfo->total_bytes) { |
8512 | sinfo->bytes_readonly += num_bytes; | 8524 | sinfo->bytes_readonly += num_bytes; |
8513 | cache->ro = 1; | 8525 | cache->ro = 1; |
8526 | list_add_tail(&cache->ro_list, &sinfo->ro_bgs); | ||
8514 | ret = 0; | 8527 | ret = 0; |
8515 | } | 8528 | } |
8516 | out: | 8529 | out: |
@@ -8565,15 +8578,20 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, | |||
8565 | 8578 | ||
8566 | /* | 8579 | /* |
8567 | * helper to account the unused space of all the readonly block group in the | 8580 | * helper to account the unused space of all the readonly block group in the |
8568 | * list. takes mirrors into account. | 8581 | * space_info. takes mirrors into account. |
8569 | */ | 8582 | */ |
8570 | static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list) | 8583 | u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) |
8571 | { | 8584 | { |
8572 | struct btrfs_block_group_cache *block_group; | 8585 | struct btrfs_block_group_cache *block_group; |
8573 | u64 free_bytes = 0; | 8586 | u64 free_bytes = 0; |
8574 | int factor; | 8587 | int factor; |
8575 | 8588 | ||
8576 | list_for_each_entry(block_group, groups_list, list) { | 8589 | /* It's df, we don't care if it's racey */ |
8590 | if (list_empty(&sinfo->ro_bgs)) | ||
8591 | return 0; | ||
8592 | |||
8593 | spin_lock(&sinfo->lock); | ||
8594 | list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) { | ||
8577 | spin_lock(&block_group->lock); | 8595 | spin_lock(&block_group->lock); |
8578 | 8596 | ||
8579 | if (!block_group->ro) { | 8597 | if (!block_group->ro) { |
@@ -8594,26 +8612,6 @@ static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list) | |||
8594 | 8612 | ||
8595 | spin_unlock(&block_group->lock); | 8613 | spin_unlock(&block_group->lock); |
8596 | } | 8614 | } |
8597 | |||
8598 | return free_bytes; | ||
8599 | } | ||
8600 | |||
8601 | /* | ||
8602 | * helper to account the unused space of all the readonly block group in the | ||
8603 | * space_info. takes mirrors into account. | ||
8604 | */ | ||
8605 | u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) | ||
8606 | { | ||
8607 | int i; | ||
8608 | u64 free_bytes = 0; | ||
8609 | |||
8610 | spin_lock(&sinfo->lock); | ||
8611 | |||
8612 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) | ||
8613 | if (!list_empty(&sinfo->block_groups[i])) | ||
8614 | free_bytes += __btrfs_get_ro_block_group_free_space( | ||
8615 | &sinfo->block_groups[i]); | ||
8616 | |||
8617 | spin_unlock(&sinfo->lock); | 8615 | spin_unlock(&sinfo->lock); |
8618 | 8616 | ||
8619 | return free_bytes; | 8617 | return free_bytes; |
@@ -8633,6 +8631,7 @@ void btrfs_set_block_group_rw(struct btrfs_root *root, | |||
8633 | cache->bytes_super - btrfs_block_group_used(&cache->item); | 8631 | cache->bytes_super - btrfs_block_group_used(&cache->item); |
8634 | sinfo->bytes_readonly -= num_bytes; | 8632 | sinfo->bytes_readonly -= num_bytes; |
8635 | cache->ro = 0; | 8633 | cache->ro = 0; |
8634 | list_del_init(&cache->ro_list); | ||
8636 | spin_unlock(&cache->lock); | 8635 | spin_unlock(&cache->lock); |
8637 | spin_unlock(&sinfo->lock); | 8636 | spin_unlock(&sinfo->lock); |
8638 | } | 8637 | } |
@@ -9002,7 +9001,9 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size) | |||
9002 | INIT_LIST_HEAD(&cache->list); | 9001 | INIT_LIST_HEAD(&cache->list); |
9003 | INIT_LIST_HEAD(&cache->cluster_list); | 9002 | INIT_LIST_HEAD(&cache->cluster_list); |
9004 | INIT_LIST_HEAD(&cache->bg_list); | 9003 | INIT_LIST_HEAD(&cache->bg_list); |
9004 | INIT_LIST_HEAD(&cache->ro_list); | ||
9005 | btrfs_init_free_space_ctl(cache); | 9005 | btrfs_init_free_space_ctl(cache); |
9006 | atomic_set(&cache->trimming, 0); | ||
9006 | 9007 | ||
9007 | return cache; | 9008 | return cache; |
9008 | } | 9009 | } |
@@ -9195,9 +9196,8 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, | |||
9195 | int ret = 0; | 9196 | int ret = 0; |
9196 | 9197 | ||
9197 | list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { | 9198 | list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { |
9198 | list_del_init(&block_group->bg_list); | ||
9199 | if (ret) | 9199 | if (ret) |
9200 | continue; | 9200 | goto next; |
9201 | 9201 | ||
9202 | spin_lock(&block_group->lock); | 9202 | spin_lock(&block_group->lock); |
9203 | memcpy(&item, &block_group->item, sizeof(item)); | 9203 | memcpy(&item, &block_group->item, sizeof(item)); |
@@ -9212,6 +9212,8 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, | |||
9212 | key.objectid, key.offset); | 9212 | key.objectid, key.offset); |
9213 | if (ret) | 9213 | if (ret) |
9214 | btrfs_abort_transaction(trans, extent_root, ret); | 9214 | btrfs_abort_transaction(trans, extent_root, ret); |
9215 | next: | ||
9216 | list_del_init(&block_group->bg_list); | ||
9215 | } | 9217 | } |
9216 | } | 9218 | } |
9217 | 9219 | ||
@@ -9304,7 +9306,8 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | |||
9304 | } | 9306 | } |
9305 | 9307 | ||
9306 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 9308 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
9307 | struct btrfs_root *root, u64 group_start) | 9309 | struct btrfs_root *root, u64 group_start, |
9310 | struct extent_map *em) | ||
9308 | { | 9311 | { |
9309 | struct btrfs_path *path; | 9312 | struct btrfs_path *path; |
9310 | struct btrfs_block_group_cache *block_group; | 9313 | struct btrfs_block_group_cache *block_group; |
@@ -9316,6 +9319,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
9316 | int ret; | 9319 | int ret; |
9317 | int index; | 9320 | int index; |
9318 | int factor; | 9321 | int factor; |
9322 | struct btrfs_caching_control *caching_ctl = NULL; | ||
9323 | bool remove_em; | ||
9319 | 9324 | ||
9320 | root = root->fs_info->extent_root; | 9325 | root = root->fs_info->extent_root; |
9321 | 9326 | ||
@@ -9400,6 +9405,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
9400 | spin_lock(&root->fs_info->block_group_cache_lock); | 9405 | spin_lock(&root->fs_info->block_group_cache_lock); |
9401 | rb_erase(&block_group->cache_node, | 9406 | rb_erase(&block_group->cache_node, |
9402 | &root->fs_info->block_group_cache_tree); | 9407 | &root->fs_info->block_group_cache_tree); |
9408 | RB_CLEAR_NODE(&block_group->cache_node); | ||
9403 | 9409 | ||
9404 | if (root->fs_info->first_logical_byte == block_group->key.objectid) | 9410 | if (root->fs_info->first_logical_byte == block_group->key.objectid) |
9405 | root->fs_info->first_logical_byte = (u64)-1; | 9411 | root->fs_info->first_logical_byte = (u64)-1; |
@@ -9411,6 +9417,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
9411 | * are still on the list after taking the semaphore | 9417 | * are still on the list after taking the semaphore |
9412 | */ | 9418 | */ |
9413 | list_del_init(&block_group->list); | 9419 | list_del_init(&block_group->list); |
9420 | list_del_init(&block_group->ro_list); | ||
9414 | if (list_empty(&block_group->space_info->block_groups[index])) { | 9421 | if (list_empty(&block_group->space_info->block_groups[index])) { |
9415 | kobj = block_group->space_info->block_group_kobjs[index]; | 9422 | kobj = block_group->space_info->block_group_kobjs[index]; |
9416 | block_group->space_info->block_group_kobjs[index] = NULL; | 9423 | block_group->space_info->block_group_kobjs[index] = NULL; |
@@ -9422,8 +9429,32 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
9422 | kobject_put(kobj); | 9429 | kobject_put(kobj); |
9423 | } | 9430 | } |
9424 | 9431 | ||
9432 | if (block_group->has_caching_ctl) | ||
9433 | caching_ctl = get_caching_control(block_group); | ||
9425 | if (block_group->cached == BTRFS_CACHE_STARTED) | 9434 | if (block_group->cached == BTRFS_CACHE_STARTED) |
9426 | wait_block_group_cache_done(block_group); | 9435 | wait_block_group_cache_done(block_group); |
9436 | if (block_group->has_caching_ctl) { | ||
9437 | down_write(&root->fs_info->commit_root_sem); | ||
9438 | if (!caching_ctl) { | ||
9439 | struct btrfs_caching_control *ctl; | ||
9440 | |||
9441 | list_for_each_entry(ctl, | ||
9442 | &root->fs_info->caching_block_groups, list) | ||
9443 | if (ctl->block_group == block_group) { | ||
9444 | caching_ctl = ctl; | ||
9445 | atomic_inc(&caching_ctl->count); | ||
9446 | break; | ||
9447 | } | ||
9448 | } | ||
9449 | if (caching_ctl) | ||
9450 | list_del_init(&caching_ctl->list); | ||
9451 | up_write(&root->fs_info->commit_root_sem); | ||
9452 | if (caching_ctl) { | ||
9453 | /* Once for the caching bgs list and once for us. */ | ||
9454 | put_caching_control(caching_ctl); | ||
9455 | put_caching_control(caching_ctl); | ||
9456 | } | ||
9457 | } | ||
9427 | 9458 | ||
9428 | btrfs_remove_free_space_cache(block_group); | 9459 | btrfs_remove_free_space_cache(block_group); |
9429 | 9460 | ||
@@ -9435,6 +9466,71 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
9435 | 9466 | ||
9436 | memcpy(&key, &block_group->key, sizeof(key)); | 9467 | memcpy(&key, &block_group->key, sizeof(key)); |
9437 | 9468 | ||
9469 | lock_chunks(root); | ||
9470 | if (!list_empty(&em->list)) { | ||
9471 | /* We're in the transaction->pending_chunks list. */ | ||
9472 | free_extent_map(em); | ||
9473 | } | ||
9474 | spin_lock(&block_group->lock); | ||
9475 | block_group->removed = 1; | ||
9476 | /* | ||
9477 | * At this point trimming can't start on this block group, because we | ||
9478 | * removed the block group from the tree fs_info->block_group_cache_tree | ||
9479 | * so no one can't find it anymore and even if someone already got this | ||
9480 | * block group before we removed it from the rbtree, they have already | ||
9481 | * incremented block_group->trimming - if they didn't, they won't find | ||
9482 | * any free space entries because we already removed them all when we | ||
9483 | * called btrfs_remove_free_space_cache(). | ||
9484 | * | ||
9485 | * And we must not remove the extent map from the fs_info->mapping_tree | ||
9486 | * to prevent the same logical address range and physical device space | ||
9487 | * ranges from being reused for a new block group. This is because our | ||
9488 | * fs trim operation (btrfs_trim_fs() / btrfs_ioctl_fitrim()) is | ||
9489 | * completely transactionless, so while it is trimming a range the | ||
9490 | * currently running transaction might finish and a new one start, | ||
9491 | * allowing for new block groups to be created that can reuse the same | ||
9492 | * physical device locations unless we take this special care. | ||
9493 | */ | ||
9494 | remove_em = (atomic_read(&block_group->trimming) == 0); | ||
9495 | /* | ||
9496 | * Make sure a trimmer task always sees the em in the pinned_chunks list | ||
9497 | * if it sees block_group->removed == 1 (needs to lock block_group->lock | ||
9498 | * before checking block_group->removed). | ||
9499 | */ | ||
9500 | if (!remove_em) { | ||
9501 | /* | ||
9502 | * Our em might be in trans->transaction->pending_chunks which | ||
9503 | * is protected by fs_info->chunk_mutex ([lock|unlock]_chunks), | ||
9504 | * and so is the fs_info->pinned_chunks list. | ||
9505 | * | ||
9506 | * So at this point we must be holding the chunk_mutex to avoid | ||
9507 | * any races with chunk allocation (more specifically at | ||
9508 | * volumes.c:contains_pending_extent()), to ensure it always | ||
9509 | * sees the em, either in the pending_chunks list or in the | ||
9510 | * pinned_chunks list. | ||
9511 | */ | ||
9512 | list_move_tail(&em->list, &root->fs_info->pinned_chunks); | ||
9513 | } | ||
9514 | spin_unlock(&block_group->lock); | ||
9515 | |||
9516 | if (remove_em) { | ||
9517 | struct extent_map_tree *em_tree; | ||
9518 | |||
9519 | em_tree = &root->fs_info->mapping_tree.map_tree; | ||
9520 | write_lock(&em_tree->lock); | ||
9521 | /* | ||
9522 | * The em might be in the pending_chunks list, so make sure the | ||
9523 | * chunk mutex is locked, since remove_extent_mapping() will | ||
9524 | * delete us from that list. | ||
9525 | */ | ||
9526 | remove_extent_mapping(em_tree, em); | ||
9527 | write_unlock(&em_tree->lock); | ||
9528 | /* once for the tree */ | ||
9529 | free_extent_map(em); | ||
9530 | } | ||
9531 | |||
9532 | unlock_chunks(root); | ||
9533 | |||
9438 | btrfs_put_block_group(block_group); | 9534 | btrfs_put_block_group(block_group); |
9439 | btrfs_put_block_group(block_group); | 9535 | btrfs_put_block_group(block_group); |
9440 | 9536 | ||
@@ -9523,10 +9619,18 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) | |||
9523 | */ | 9619 | */ |
9524 | start = block_group->key.objectid; | 9620 | start = block_group->key.objectid; |
9525 | end = start + block_group->key.offset - 1; | 9621 | end = start + block_group->key.offset - 1; |
9526 | clear_extent_bits(&fs_info->freed_extents[0], start, end, | 9622 | ret = clear_extent_bits(&fs_info->freed_extents[0], start, end, |
9527 | EXTENT_DIRTY, GFP_NOFS); | 9623 | EXTENT_DIRTY, GFP_NOFS); |
9528 | clear_extent_bits(&fs_info->freed_extents[1], start, end, | 9624 | if (ret) { |
9625 | btrfs_set_block_group_rw(root, block_group); | ||
9626 | goto end_trans; | ||
9627 | } | ||
9628 | ret = clear_extent_bits(&fs_info->freed_extents[1], start, end, | ||
9529 | EXTENT_DIRTY, GFP_NOFS); | 9629 | EXTENT_DIRTY, GFP_NOFS); |
9630 | if (ret) { | ||
9631 | btrfs_set_block_group_rw(root, block_group); | ||
9632 | goto end_trans; | ||
9633 | } | ||
9530 | 9634 | ||
9531 | /* Reset pinned so btrfs_put_block_group doesn't complain */ | 9635 | /* Reset pinned so btrfs_put_block_group doesn't complain */ |
9532 | block_group->pinned = 0; | 9636 | block_group->pinned = 0; |
@@ -9537,6 +9641,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) | |||
9537 | */ | 9641 | */ |
9538 | ret = btrfs_remove_chunk(trans, root, | 9642 | ret = btrfs_remove_chunk(trans, root, |
9539 | block_group->key.objectid); | 9643 | block_group->key.objectid); |
9644 | end_trans: | ||
9540 | btrfs_end_transaction(trans, root); | 9645 | btrfs_end_transaction(trans, root); |
9541 | next: | 9646 | next: |
9542 | btrfs_put_block_group(block_group); | 9647 | btrfs_put_block_group(block_group); |
@@ -9657,12 +9762,14 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) | |||
9657 | } | 9762 | } |
9658 | 9763 | ||
9659 | /* | 9764 | /* |
9660 | * btrfs_{start,end}_write() is similar to mnt_{want, drop}_write(), | 9765 | * btrfs_{start,end}_write_no_snapshoting() are similar to |
9661 | * they are used to prevent the some tasks writing data into the page cache | 9766 | * mnt_{want,drop}_write(), they are used to prevent some tasks from writing |
9662 | * by nocow before the subvolume is snapshoted, but flush the data into | 9767 | * data into the page cache through nocow before the subvolume is snapshoted, |
9663 | * the disk after the snapshot creation. | 9768 | * but flush the data into disk after the snapshot creation, or to prevent |
9769 | * operations while snapshoting is ongoing and that cause the snapshot to be | ||
9770 | * inconsistent (writes followed by expanding truncates for example). | ||
9664 | */ | 9771 | */ |
9665 | void btrfs_end_nocow_write(struct btrfs_root *root) | 9772 | void btrfs_end_write_no_snapshoting(struct btrfs_root *root) |
9666 | { | 9773 | { |
9667 | percpu_counter_dec(&root->subv_writers->counter); | 9774 | percpu_counter_dec(&root->subv_writers->counter); |
9668 | /* | 9775 | /* |
@@ -9674,7 +9781,7 @@ void btrfs_end_nocow_write(struct btrfs_root *root) | |||
9674 | wake_up(&root->subv_writers->wait); | 9781 | wake_up(&root->subv_writers->wait); |
9675 | } | 9782 | } |
9676 | 9783 | ||
9677 | int btrfs_start_nocow_write(struct btrfs_root *root) | 9784 | int btrfs_start_write_no_snapshoting(struct btrfs_root *root) |
9678 | { | 9785 | { |
9679 | if (atomic_read(&root->will_be_snapshoted)) | 9786 | if (atomic_read(&root->will_be_snapshoted)) |
9680 | return 0; | 9787 | return 0; |
@@ -9685,7 +9792,7 @@ int btrfs_start_nocow_write(struct btrfs_root *root) | |||
9685 | */ | 9792 | */ |
9686 | smp_mb(); | 9793 | smp_mb(); |
9687 | if (atomic_read(&root->will_be_snapshoted)) { | 9794 | if (atomic_read(&root->will_be_snapshoted)) { |
9688 | btrfs_end_nocow_write(root); | 9795 | btrfs_end_write_no_snapshoting(root); |
9689 | return 0; | 9796 | return 0; |
9690 | } | 9797 | } |
9691 | return 1; | 9798 | return 1; |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index bf3f424e0013..4ebabd237153 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -595,9 +595,14 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
595 | clear = 1; | 595 | clear = 1; |
596 | again: | 596 | again: |
597 | if (!prealloc && (mask & __GFP_WAIT)) { | 597 | if (!prealloc && (mask & __GFP_WAIT)) { |
598 | /* | ||
599 | * Don't care for allocation failure here because we might end | ||
600 | * up not needing the pre-allocated extent state at all, which | ||
601 | * is the case if we only have in the tree extent states that | ||
602 | * cover our input range and don't cover too any other range. | ||
603 | * If we end up needing a new extent state we allocate it later. | ||
604 | */ | ||
598 | prealloc = alloc_extent_state(mask); | 605 | prealloc = alloc_extent_state(mask); |
599 | if (!prealloc) | ||
600 | return -ENOMEM; | ||
601 | } | 606 | } |
602 | 607 | ||
603 | spin_lock(&tree->lock); | 608 | spin_lock(&tree->lock); |
@@ -796,17 +801,25 @@ static void set_state_bits(struct extent_io_tree *tree, | |||
796 | state->state |= bits_to_set; | 801 | state->state |= bits_to_set; |
797 | } | 802 | } |
798 | 803 | ||
799 | static void cache_state(struct extent_state *state, | 804 | static void cache_state_if_flags(struct extent_state *state, |
800 | struct extent_state **cached_ptr) | 805 | struct extent_state **cached_ptr, |
806 | const u64 flags) | ||
801 | { | 807 | { |
802 | if (cached_ptr && !(*cached_ptr)) { | 808 | if (cached_ptr && !(*cached_ptr)) { |
803 | if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) { | 809 | if (!flags || (state->state & flags)) { |
804 | *cached_ptr = state; | 810 | *cached_ptr = state; |
805 | atomic_inc(&state->refs); | 811 | atomic_inc(&state->refs); |
806 | } | 812 | } |
807 | } | 813 | } |
808 | } | 814 | } |
809 | 815 | ||
816 | static void cache_state(struct extent_state *state, | ||
817 | struct extent_state **cached_ptr) | ||
818 | { | ||
819 | return cache_state_if_flags(state, cached_ptr, | ||
820 | EXTENT_IOBITS | EXTENT_BOUNDARY); | ||
821 | } | ||
822 | |||
810 | /* | 823 | /* |
811 | * set some bits on a range in the tree. This may require allocations or | 824 | * set some bits on a range in the tree. This may require allocations or |
812 | * sleeping, so the gfp mask is used to indicate what is allowed. | 825 | * sleeping, so the gfp mask is used to indicate what is allowed. |
@@ -1058,13 +1071,21 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
1058 | int err = 0; | 1071 | int err = 0; |
1059 | u64 last_start; | 1072 | u64 last_start; |
1060 | u64 last_end; | 1073 | u64 last_end; |
1074 | bool first_iteration = true; | ||
1061 | 1075 | ||
1062 | btrfs_debug_check_extent_io_range(tree, start, end); | 1076 | btrfs_debug_check_extent_io_range(tree, start, end); |
1063 | 1077 | ||
1064 | again: | 1078 | again: |
1065 | if (!prealloc && (mask & __GFP_WAIT)) { | 1079 | if (!prealloc && (mask & __GFP_WAIT)) { |
1080 | /* | ||
1081 | * Best effort, don't worry if extent state allocation fails | ||
1082 | * here for the first iteration. We might have a cached state | ||
1083 | * that matches exactly the target range, in which case no | ||
1084 | * extent state allocations are needed. We'll only know this | ||
1085 | * after locking the tree. | ||
1086 | */ | ||
1066 | prealloc = alloc_extent_state(mask); | 1087 | prealloc = alloc_extent_state(mask); |
1067 | if (!prealloc) | 1088 | if (!prealloc && !first_iteration) |
1068 | return -ENOMEM; | 1089 | return -ENOMEM; |
1069 | } | 1090 | } |
1070 | 1091 | ||
@@ -1234,6 +1255,7 @@ search_again: | |||
1234 | spin_unlock(&tree->lock); | 1255 | spin_unlock(&tree->lock); |
1235 | if (mask & __GFP_WAIT) | 1256 | if (mask & __GFP_WAIT) |
1236 | cond_resched(); | 1257 | cond_resched(); |
1258 | first_iteration = false; | ||
1237 | goto again; | 1259 | goto again; |
1238 | } | 1260 | } |
1239 | 1261 | ||
@@ -1482,7 +1504,7 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | |||
1482 | state = find_first_extent_bit_state(tree, start, bits); | 1504 | state = find_first_extent_bit_state(tree, start, bits); |
1483 | got_it: | 1505 | got_it: |
1484 | if (state) { | 1506 | if (state) { |
1485 | cache_state(state, cached_state); | 1507 | cache_state_if_flags(state, cached_state, 0); |
1486 | *start_ret = state->start; | 1508 | *start_ret = state->start; |
1487 | *end_ret = state->end; | 1509 | *end_ret = state->end; |
1488 | ret = 0; | 1510 | ret = 0; |
@@ -1746,6 +1768,9 @@ int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, | |||
1746 | if (page_ops == 0) | 1768 | if (page_ops == 0) |
1747 | return 0; | 1769 | return 0; |
1748 | 1770 | ||
1771 | if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0) | ||
1772 | mapping_set_error(inode->i_mapping, -EIO); | ||
1773 | |||
1749 | while (nr_pages > 0) { | 1774 | while (nr_pages > 0) { |
1750 | ret = find_get_pages_contig(inode->i_mapping, index, | 1775 | ret = find_get_pages_contig(inode->i_mapping, index, |
1751 | min_t(unsigned long, | 1776 | min_t(unsigned long, |
@@ -1763,6 +1788,8 @@ int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, | |||
1763 | clear_page_dirty_for_io(pages[i]); | 1788 | clear_page_dirty_for_io(pages[i]); |
1764 | if (page_ops & PAGE_SET_WRITEBACK) | 1789 | if (page_ops & PAGE_SET_WRITEBACK) |
1765 | set_page_writeback(pages[i]); | 1790 | set_page_writeback(pages[i]); |
1791 | if (page_ops & PAGE_SET_ERROR) | ||
1792 | SetPageError(pages[i]); | ||
1766 | if (page_ops & PAGE_END_WRITEBACK) | 1793 | if (page_ops & PAGE_END_WRITEBACK) |
1767 | end_page_writeback(pages[i]); | 1794 | end_page_writeback(pages[i]); |
1768 | if (page_ops & PAGE_UNLOCK) | 1795 | if (page_ops & PAGE_UNLOCK) |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 6d4b938be986..ece9ce87edff 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -49,6 +49,7 @@ | |||
49 | #define PAGE_SET_WRITEBACK (1 << 2) | 49 | #define PAGE_SET_WRITEBACK (1 << 2) |
50 | #define PAGE_END_WRITEBACK (1 << 3) | 50 | #define PAGE_END_WRITEBACK (1 << 3) |
51 | #define PAGE_SET_PRIVATE2 (1 << 4) | 51 | #define PAGE_SET_PRIVATE2 (1 << 4) |
52 | #define PAGE_SET_ERROR (1 << 5) | ||
52 | 53 | ||
53 | /* | 54 | /* |
54 | * page->private values. Every page that is controlled by the extent | 55 | * page->private values. Every page that is controlled by the extent |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 225302b39afb..6a98bddd8f33 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -287,8 +287,6 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, | |||
287 | if (!em) | 287 | if (!em) |
288 | goto out; | 288 | goto out; |
289 | 289 | ||
290 | if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) | ||
291 | list_move(&em->list, &tree->modified_extents); | ||
292 | em->generation = gen; | 290 | em->generation = gen; |
293 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | 291 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); |
294 | em->mod_start = em->start; | 292 | em->mod_start = em->start; |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a18ceabd99a8..e4090259569b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -1428,7 +1428,7 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos, | |||
1428 | u64 num_bytes; | 1428 | u64 num_bytes; |
1429 | int ret; | 1429 | int ret; |
1430 | 1430 | ||
1431 | ret = btrfs_start_nocow_write(root); | 1431 | ret = btrfs_start_write_no_snapshoting(root); |
1432 | if (!ret) | 1432 | if (!ret) |
1433 | return -ENOSPC; | 1433 | return -ENOSPC; |
1434 | 1434 | ||
@@ -1451,7 +1451,7 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos, | |||
1451 | ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL); | 1451 | ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL); |
1452 | if (ret <= 0) { | 1452 | if (ret <= 0) { |
1453 | ret = 0; | 1453 | ret = 0; |
1454 | btrfs_end_nocow_write(root); | 1454 | btrfs_end_write_no_snapshoting(root); |
1455 | } else { | 1455 | } else { |
1456 | *write_bytes = min_t(size_t, *write_bytes , | 1456 | *write_bytes = min_t(size_t, *write_bytes , |
1457 | num_bytes - pos + lockstart); | 1457 | num_bytes - pos + lockstart); |
@@ -1543,7 +1543,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1543 | btrfs_free_reserved_data_space(inode, | 1543 | btrfs_free_reserved_data_space(inode, |
1544 | reserve_bytes); | 1544 | reserve_bytes); |
1545 | else | 1545 | else |
1546 | btrfs_end_nocow_write(root); | 1546 | btrfs_end_write_no_snapshoting(root); |
1547 | break; | 1547 | break; |
1548 | } | 1548 | } |
1549 | 1549 | ||
@@ -1632,7 +1632,7 @@ again: | |||
1632 | 1632 | ||
1633 | release_bytes = 0; | 1633 | release_bytes = 0; |
1634 | if (only_release_metadata) | 1634 | if (only_release_metadata) |
1635 | btrfs_end_nocow_write(root); | 1635 | btrfs_end_write_no_snapshoting(root); |
1636 | 1636 | ||
1637 | if (only_release_metadata && copied > 0) { | 1637 | if (only_release_metadata && copied > 0) { |
1638 | u64 lockstart = round_down(pos, root->sectorsize); | 1638 | u64 lockstart = round_down(pos, root->sectorsize); |
@@ -1661,7 +1661,7 @@ again: | |||
1661 | 1661 | ||
1662 | if (release_bytes) { | 1662 | if (release_bytes) { |
1663 | if (only_release_metadata) { | 1663 | if (only_release_metadata) { |
1664 | btrfs_end_nocow_write(root); | 1664 | btrfs_end_write_no_snapshoting(root); |
1665 | btrfs_delalloc_release_metadata(inode, release_bytes); | 1665 | btrfs_delalloc_release_metadata(inode, release_bytes); |
1666 | } else { | 1666 | } else { |
1667 | btrfs_delalloc_release_space(inode, release_bytes); | 1667 | btrfs_delalloc_release_space(inode, release_bytes); |
@@ -1676,6 +1676,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, | |||
1676 | loff_t pos) | 1676 | loff_t pos) |
1677 | { | 1677 | { |
1678 | struct file *file = iocb->ki_filp; | 1678 | struct file *file = iocb->ki_filp; |
1679 | struct inode *inode = file_inode(file); | ||
1679 | ssize_t written; | 1680 | ssize_t written; |
1680 | ssize_t written_buffered; | 1681 | ssize_t written_buffered; |
1681 | loff_t endbyte; | 1682 | loff_t endbyte; |
@@ -1692,8 +1693,15 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, | |||
1692 | err = written_buffered; | 1693 | err = written_buffered; |
1693 | goto out; | 1694 | goto out; |
1694 | } | 1695 | } |
1696 | /* | ||
1697 | * Ensure all data is persisted. We want the next direct IO read to be | ||
1698 | * able to read what was just written. | ||
1699 | */ | ||
1695 | endbyte = pos + written_buffered - 1; | 1700 | endbyte = pos + written_buffered - 1; |
1696 | err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte); | 1701 | err = btrfs_fdatawrite_range(inode, pos, endbyte); |
1702 | if (err) | ||
1703 | goto out; | ||
1704 | err = filemap_fdatawait_range(inode->i_mapping, pos, endbyte); | ||
1697 | if (err) | 1705 | if (err) |
1698 | goto out; | 1706 | goto out; |
1699 | written += written_buffered; | 1707 | written += written_buffered; |
@@ -1854,10 +1862,7 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end) | |||
1854 | int ret; | 1862 | int ret; |
1855 | 1863 | ||
1856 | atomic_inc(&BTRFS_I(inode)->sync_writers); | 1864 | atomic_inc(&BTRFS_I(inode)->sync_writers); |
1857 | ret = filemap_fdatawrite_range(inode->i_mapping, start, end); | 1865 | ret = btrfs_fdatawrite_range(inode, start, end); |
1858 | if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, | ||
1859 | &BTRFS_I(inode)->runtime_flags)) | ||
1860 | ret = filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
1861 | atomic_dec(&BTRFS_I(inode)->sync_writers); | 1866 | atomic_dec(&BTRFS_I(inode)->sync_writers); |
1862 | 1867 | ||
1863 | return ret; | 1868 | return ret; |
@@ -2810,3 +2815,29 @@ int btrfs_auto_defrag_init(void) | |||
2810 | 2815 | ||
2811 | return 0; | 2816 | return 0; |
2812 | } | 2817 | } |
2818 | |||
2819 | int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end) | ||
2820 | { | ||
2821 | int ret; | ||
2822 | |||
2823 | /* | ||
2824 | * So with compression we will find and lock a dirty page and clear the | ||
2825 | * first one as dirty, setup an async extent, and immediately return | ||
2826 | * with the entire range locked but with nobody actually marked with | ||
2827 | * writeback. So we can't just filemap_write_and_wait_range() and | ||
2828 | * expect it to work since it will just kick off a thread to do the | ||
2829 | * actual work. So we need to call filemap_fdatawrite_range _again_ | ||
2830 | * since it will wait on the page lock, which won't be unlocked until | ||
2831 | * after the pages have been marked as writeback and so we're good to go | ||
2832 | * from there. We have to do this otherwise we'll miss the ordered | ||
2833 | * extents and that results in badness. Please Josef, do not think you | ||
2834 | * know better and pull this out at some point in the future, it is | ||
2835 | * right and you are wrong. | ||
2836 | */ | ||
2837 | ret = filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
2838 | if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, | ||
2839 | &BTRFS_I(inode)->runtime_flags)) | ||
2840 | ret = filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
2841 | |||
2842 | return ret; | ||
2843 | } | ||
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 33848196550e..030847bf7cec 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -27,10 +27,17 @@ | |||
27 | #include "disk-io.h" | 27 | #include "disk-io.h" |
28 | #include "extent_io.h" | 28 | #include "extent_io.h" |
29 | #include "inode-map.h" | 29 | #include "inode-map.h" |
30 | #include "volumes.h" | ||
30 | 31 | ||
31 | #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) | 32 | #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) |
32 | #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) | 33 | #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) |
33 | 34 | ||
35 | struct btrfs_trim_range { | ||
36 | u64 start; | ||
37 | u64 bytes; | ||
38 | struct list_head list; | ||
39 | }; | ||
40 | |||
34 | static int link_free_space(struct btrfs_free_space_ctl *ctl, | 41 | static int link_free_space(struct btrfs_free_space_ctl *ctl, |
35 | struct btrfs_free_space *info); | 42 | struct btrfs_free_space *info); |
36 | static void unlink_free_space(struct btrfs_free_space_ctl *ctl, | 43 | static void unlink_free_space(struct btrfs_free_space_ctl *ctl, |
@@ -881,6 +888,7 @@ int write_cache_extent_entries(struct io_ctl *io_ctl, | |||
881 | int ret; | 888 | int ret; |
882 | struct btrfs_free_cluster *cluster = NULL; | 889 | struct btrfs_free_cluster *cluster = NULL; |
883 | struct rb_node *node = rb_first(&ctl->free_space_offset); | 890 | struct rb_node *node = rb_first(&ctl->free_space_offset); |
891 | struct btrfs_trim_range *trim_entry; | ||
884 | 892 | ||
885 | /* Get the cluster for this block_group if it exists */ | 893 | /* Get the cluster for this block_group if it exists */ |
886 | if (block_group && !list_empty(&block_group->cluster_list)) { | 894 | if (block_group && !list_empty(&block_group->cluster_list)) { |
@@ -916,6 +924,21 @@ int write_cache_extent_entries(struct io_ctl *io_ctl, | |||
916 | cluster = NULL; | 924 | cluster = NULL; |
917 | } | 925 | } |
918 | } | 926 | } |
927 | |||
928 | /* | ||
929 | * Make sure we don't miss any range that was removed from our rbtree | ||
930 | * because trimming is running. Otherwise after a umount+mount (or crash | ||
931 | * after committing the transaction) we would leak free space and get | ||
932 | * an inconsistent free space cache report from fsck. | ||
933 | */ | ||
934 | list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) { | ||
935 | ret = io_ctl_add_entry(io_ctl, trim_entry->start, | ||
936 | trim_entry->bytes, NULL); | ||
937 | if (ret) | ||
938 | goto fail; | ||
939 | *entries += 1; | ||
940 | } | ||
941 | |||
919 | return 0; | 942 | return 0; |
920 | fail: | 943 | fail: |
921 | return -ENOSPC; | 944 | return -ENOSPC; |
@@ -1135,12 +1158,15 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
1135 | 1158 | ||
1136 | io_ctl_set_generation(&io_ctl, trans->transid); | 1159 | io_ctl_set_generation(&io_ctl, trans->transid); |
1137 | 1160 | ||
1161 | mutex_lock(&ctl->cache_writeout_mutex); | ||
1138 | /* Write out the extent entries in the free space cache */ | 1162 | /* Write out the extent entries in the free space cache */ |
1139 | ret = write_cache_extent_entries(&io_ctl, ctl, | 1163 | ret = write_cache_extent_entries(&io_ctl, ctl, |
1140 | block_group, &entries, &bitmaps, | 1164 | block_group, &entries, &bitmaps, |
1141 | &bitmap_list); | 1165 | &bitmap_list); |
1142 | if (ret) | 1166 | if (ret) { |
1167 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
1143 | goto out_nospc; | 1168 | goto out_nospc; |
1169 | } | ||
1144 | 1170 | ||
1145 | /* | 1171 | /* |
1146 | * Some spaces that are freed in the current transaction are pinned, | 1172 | * Some spaces that are freed in the current transaction are pinned, |
@@ -1148,11 +1174,18 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
1148 | * committed, we shouldn't lose them. | 1174 | * committed, we shouldn't lose them. |
1149 | */ | 1175 | */ |
1150 | ret = write_pinned_extent_entries(root, block_group, &io_ctl, &entries); | 1176 | ret = write_pinned_extent_entries(root, block_group, &io_ctl, &entries); |
1151 | if (ret) | 1177 | if (ret) { |
1178 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
1152 | goto out_nospc; | 1179 | goto out_nospc; |
1180 | } | ||
1153 | 1181 | ||
1154 | /* At last, we write out all the bitmaps. */ | 1182 | /* |
1183 | * At last, we write out all the bitmaps and keep cache_writeout_mutex | ||
1184 | * locked while doing it because a concurrent trim can be manipulating | ||
1185 | * or freeing the bitmap. | ||
1186 | */ | ||
1155 | ret = write_bitmap_entries(&io_ctl, &bitmap_list); | 1187 | ret = write_bitmap_entries(&io_ctl, &bitmap_list); |
1188 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
1156 | if (ret) | 1189 | if (ret) |
1157 | goto out_nospc; | 1190 | goto out_nospc; |
1158 | 1191 | ||
@@ -2295,6 +2328,8 @@ void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) | |||
2295 | ctl->start = block_group->key.objectid; | 2328 | ctl->start = block_group->key.objectid; |
2296 | ctl->private = block_group; | 2329 | ctl->private = block_group; |
2297 | ctl->op = &free_space_op; | 2330 | ctl->op = &free_space_op; |
2331 | INIT_LIST_HEAD(&ctl->trimming_ranges); | ||
2332 | mutex_init(&ctl->cache_writeout_mutex); | ||
2298 | 2333 | ||
2299 | /* | 2334 | /* |
2300 | * we only want to have 32k of ram per block group for keeping | 2335 | * we only want to have 32k of ram per block group for keeping |
@@ -2911,10 +2946,12 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) | |||
2911 | 2946 | ||
2912 | static int do_trimming(struct btrfs_block_group_cache *block_group, | 2947 | static int do_trimming(struct btrfs_block_group_cache *block_group, |
2913 | u64 *total_trimmed, u64 start, u64 bytes, | 2948 | u64 *total_trimmed, u64 start, u64 bytes, |
2914 | u64 reserved_start, u64 reserved_bytes) | 2949 | u64 reserved_start, u64 reserved_bytes, |
2950 | struct btrfs_trim_range *trim_entry) | ||
2915 | { | 2951 | { |
2916 | struct btrfs_space_info *space_info = block_group->space_info; | 2952 | struct btrfs_space_info *space_info = block_group->space_info; |
2917 | struct btrfs_fs_info *fs_info = block_group->fs_info; | 2953 | struct btrfs_fs_info *fs_info = block_group->fs_info; |
2954 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | ||
2918 | int ret; | 2955 | int ret; |
2919 | int update = 0; | 2956 | int update = 0; |
2920 | u64 trimmed = 0; | 2957 | u64 trimmed = 0; |
@@ -2934,7 +2971,10 @@ static int do_trimming(struct btrfs_block_group_cache *block_group, | |||
2934 | if (!ret) | 2971 | if (!ret) |
2935 | *total_trimmed += trimmed; | 2972 | *total_trimmed += trimmed; |
2936 | 2973 | ||
2974 | mutex_lock(&ctl->cache_writeout_mutex); | ||
2937 | btrfs_add_free_space(block_group, reserved_start, reserved_bytes); | 2975 | btrfs_add_free_space(block_group, reserved_start, reserved_bytes); |
2976 | list_del(&trim_entry->list); | ||
2977 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
2938 | 2978 | ||
2939 | if (update) { | 2979 | if (update) { |
2940 | spin_lock(&space_info->lock); | 2980 | spin_lock(&space_info->lock); |
@@ -2962,16 +3002,21 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, | |||
2962 | u64 bytes; | 3002 | u64 bytes; |
2963 | 3003 | ||
2964 | while (start < end) { | 3004 | while (start < end) { |
3005 | struct btrfs_trim_range trim_entry; | ||
3006 | |||
3007 | mutex_lock(&ctl->cache_writeout_mutex); | ||
2965 | spin_lock(&ctl->tree_lock); | 3008 | spin_lock(&ctl->tree_lock); |
2966 | 3009 | ||
2967 | if (ctl->free_space < minlen) { | 3010 | if (ctl->free_space < minlen) { |
2968 | spin_unlock(&ctl->tree_lock); | 3011 | spin_unlock(&ctl->tree_lock); |
3012 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
2969 | break; | 3013 | break; |
2970 | } | 3014 | } |
2971 | 3015 | ||
2972 | entry = tree_search_offset(ctl, start, 0, 1); | 3016 | entry = tree_search_offset(ctl, start, 0, 1); |
2973 | if (!entry) { | 3017 | if (!entry) { |
2974 | spin_unlock(&ctl->tree_lock); | 3018 | spin_unlock(&ctl->tree_lock); |
3019 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
2975 | break; | 3020 | break; |
2976 | } | 3021 | } |
2977 | 3022 | ||
@@ -2980,6 +3025,7 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, | |||
2980 | node = rb_next(&entry->offset_index); | 3025 | node = rb_next(&entry->offset_index); |
2981 | if (!node) { | 3026 | if (!node) { |
2982 | spin_unlock(&ctl->tree_lock); | 3027 | spin_unlock(&ctl->tree_lock); |
3028 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
2983 | goto out; | 3029 | goto out; |
2984 | } | 3030 | } |
2985 | entry = rb_entry(node, struct btrfs_free_space, | 3031 | entry = rb_entry(node, struct btrfs_free_space, |
@@ -2988,6 +3034,7 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, | |||
2988 | 3034 | ||
2989 | if (entry->offset >= end) { | 3035 | if (entry->offset >= end) { |
2990 | spin_unlock(&ctl->tree_lock); | 3036 | spin_unlock(&ctl->tree_lock); |
3037 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
2991 | break; | 3038 | break; |
2992 | } | 3039 | } |
2993 | 3040 | ||
@@ -2997,6 +3044,7 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, | |||
2997 | bytes = min(extent_start + extent_bytes, end) - start; | 3044 | bytes = min(extent_start + extent_bytes, end) - start; |
2998 | if (bytes < minlen) { | 3045 | if (bytes < minlen) { |
2999 | spin_unlock(&ctl->tree_lock); | 3046 | spin_unlock(&ctl->tree_lock); |
3047 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
3000 | goto next; | 3048 | goto next; |
3001 | } | 3049 | } |
3002 | 3050 | ||
@@ -3004,9 +3052,13 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, | |||
3004 | kmem_cache_free(btrfs_free_space_cachep, entry); | 3052 | kmem_cache_free(btrfs_free_space_cachep, entry); |
3005 | 3053 | ||
3006 | spin_unlock(&ctl->tree_lock); | 3054 | spin_unlock(&ctl->tree_lock); |
3055 | trim_entry.start = extent_start; | ||
3056 | trim_entry.bytes = extent_bytes; | ||
3057 | list_add_tail(&trim_entry.list, &ctl->trimming_ranges); | ||
3058 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
3007 | 3059 | ||
3008 | ret = do_trimming(block_group, total_trimmed, start, bytes, | 3060 | ret = do_trimming(block_group, total_trimmed, start, bytes, |
3009 | extent_start, extent_bytes); | 3061 | extent_start, extent_bytes, &trim_entry); |
3010 | if (ret) | 3062 | if (ret) |
3011 | break; | 3063 | break; |
3012 | next: | 3064 | next: |
@@ -3035,17 +3087,21 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group, | |||
3035 | 3087 | ||
3036 | while (offset < end) { | 3088 | while (offset < end) { |
3037 | bool next_bitmap = false; | 3089 | bool next_bitmap = false; |
3090 | struct btrfs_trim_range trim_entry; | ||
3038 | 3091 | ||
3092 | mutex_lock(&ctl->cache_writeout_mutex); | ||
3039 | spin_lock(&ctl->tree_lock); | 3093 | spin_lock(&ctl->tree_lock); |
3040 | 3094 | ||
3041 | if (ctl->free_space < minlen) { | 3095 | if (ctl->free_space < minlen) { |
3042 | spin_unlock(&ctl->tree_lock); | 3096 | spin_unlock(&ctl->tree_lock); |
3097 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
3043 | break; | 3098 | break; |
3044 | } | 3099 | } |
3045 | 3100 | ||
3046 | entry = tree_search_offset(ctl, offset, 1, 0); | 3101 | entry = tree_search_offset(ctl, offset, 1, 0); |
3047 | if (!entry) { | 3102 | if (!entry) { |
3048 | spin_unlock(&ctl->tree_lock); | 3103 | spin_unlock(&ctl->tree_lock); |
3104 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
3049 | next_bitmap = true; | 3105 | next_bitmap = true; |
3050 | goto next; | 3106 | goto next; |
3051 | } | 3107 | } |
@@ -3054,6 +3110,7 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group, | |||
3054 | ret2 = search_bitmap(ctl, entry, &start, &bytes); | 3110 | ret2 = search_bitmap(ctl, entry, &start, &bytes); |
3055 | if (ret2 || start >= end) { | 3111 | if (ret2 || start >= end) { |
3056 | spin_unlock(&ctl->tree_lock); | 3112 | spin_unlock(&ctl->tree_lock); |
3113 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
3057 | next_bitmap = true; | 3114 | next_bitmap = true; |
3058 | goto next; | 3115 | goto next; |
3059 | } | 3116 | } |
@@ -3061,6 +3118,7 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group, | |||
3061 | bytes = min(bytes, end - start); | 3118 | bytes = min(bytes, end - start); |
3062 | if (bytes < minlen) { | 3119 | if (bytes < minlen) { |
3063 | spin_unlock(&ctl->tree_lock); | 3120 | spin_unlock(&ctl->tree_lock); |
3121 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
3064 | goto next; | 3122 | goto next; |
3065 | } | 3123 | } |
3066 | 3124 | ||
@@ -3069,9 +3127,13 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group, | |||
3069 | free_bitmap(ctl, entry); | 3127 | free_bitmap(ctl, entry); |
3070 | 3128 | ||
3071 | spin_unlock(&ctl->tree_lock); | 3129 | spin_unlock(&ctl->tree_lock); |
3130 | trim_entry.start = start; | ||
3131 | trim_entry.bytes = bytes; | ||
3132 | list_add_tail(&trim_entry.list, &ctl->trimming_ranges); | ||
3133 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
3072 | 3134 | ||
3073 | ret = do_trimming(block_group, total_trimmed, start, bytes, | 3135 | ret = do_trimming(block_group, total_trimmed, start, bytes, |
3074 | start, bytes); | 3136 | start, bytes, &trim_entry); |
3075 | if (ret) | 3137 | if (ret) |
3076 | break; | 3138 | break; |
3077 | next: | 3139 | next: |
@@ -3101,11 +3163,52 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | |||
3101 | 3163 | ||
3102 | *trimmed = 0; | 3164 | *trimmed = 0; |
3103 | 3165 | ||
3166 | spin_lock(&block_group->lock); | ||
3167 | if (block_group->removed) { | ||
3168 | spin_unlock(&block_group->lock); | ||
3169 | return 0; | ||
3170 | } | ||
3171 | atomic_inc(&block_group->trimming); | ||
3172 | spin_unlock(&block_group->lock); | ||
3173 | |||
3104 | ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); | 3174 | ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); |
3105 | if (ret) | 3175 | if (ret) |
3106 | return ret; | 3176 | goto out; |
3107 | 3177 | ||
3108 | ret = trim_bitmaps(block_group, trimmed, start, end, minlen); | 3178 | ret = trim_bitmaps(block_group, trimmed, start, end, minlen); |
3179 | out: | ||
3180 | spin_lock(&block_group->lock); | ||
3181 | if (atomic_dec_and_test(&block_group->trimming) && | ||
3182 | block_group->removed) { | ||
3183 | struct extent_map_tree *em_tree; | ||
3184 | struct extent_map *em; | ||
3185 | |||
3186 | spin_unlock(&block_group->lock); | ||
3187 | |||
3188 | em_tree = &block_group->fs_info->mapping_tree.map_tree; | ||
3189 | write_lock(&em_tree->lock); | ||
3190 | em = lookup_extent_mapping(em_tree, block_group->key.objectid, | ||
3191 | 1); | ||
3192 | BUG_ON(!em); /* logic error, can't happen */ | ||
3193 | remove_extent_mapping(em_tree, em); | ||
3194 | write_unlock(&em_tree->lock); | ||
3195 | |||
3196 | lock_chunks(block_group->fs_info->chunk_root); | ||
3197 | list_del_init(&em->list); | ||
3198 | unlock_chunks(block_group->fs_info->chunk_root); | ||
3199 | |||
3200 | /* once for us and once for the tree */ | ||
3201 | free_extent_map(em); | ||
3202 | free_extent_map(em); | ||
3203 | |||
3204 | /* | ||
3205 | * We've left one free space entry and other tasks trimming | ||
3206 | * this block group have left 1 entry each one. Free them. | ||
3207 | */ | ||
3208 | __btrfs_remove_free_space_cache(block_group->free_space_ctl); | ||
3209 | } else { | ||
3210 | spin_unlock(&block_group->lock); | ||
3211 | } | ||
3109 | 3212 | ||
3110 | return ret; | 3213 | return ret; |
3111 | } | 3214 | } |
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 0cf4977ef70d..88b2238a0aed 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h | |||
@@ -38,6 +38,8 @@ struct btrfs_free_space_ctl { | |||
38 | u64 start; | 38 | u64 start; |
39 | struct btrfs_free_space_op *op; | 39 | struct btrfs_free_space_op *op; |
40 | void *private; | 40 | void *private; |
41 | struct mutex cache_writeout_mutex; | ||
42 | struct list_head trimming_ranges; | ||
41 | }; | 43 | }; |
42 | 44 | ||
43 | struct btrfs_free_space_op { | 45 | struct btrfs_free_space_op { |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 83d646bd2e4b..74faea3a516e 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -178,7 +178,7 @@ static void start_caching(struct btrfs_root *root) | |||
178 | root->root_key.objectid); | 178 | root->root_key.objectid); |
179 | if (IS_ERR(tsk)) { | 179 | if (IS_ERR(tsk)) { |
180 | btrfs_warn(root->fs_info, "failed to start inode caching task"); | 180 | btrfs_warn(root->fs_info, "failed to start inode caching task"); |
181 | btrfs_clear_and_info(root, CHANGE_INODE_CACHE, | 181 | btrfs_clear_pending_and_info(root->fs_info, INODE_MAP_CACHE, |
182 | "disabling inode map caching"); | 182 | "disabling inode map caching"); |
183 | } | 183 | } |
184 | } | 184 | } |
@@ -364,6 +364,8 @@ void btrfs_init_free_ino_ctl(struct btrfs_root *root) | |||
364 | ctl->start = 0; | 364 | ctl->start = 0; |
365 | ctl->private = NULL; | 365 | ctl->private = NULL; |
366 | ctl->op = &free_ino_op; | 366 | ctl->op = &free_ino_op; |
367 | INIT_LIST_HEAD(&ctl->trimming_ranges); | ||
368 | mutex_init(&ctl->cache_writeout_mutex); | ||
367 | 369 | ||
368 | /* | 370 | /* |
369 | * Initially we allow to use 16K of ram to cache chunks of | 371 | * Initially we allow to use 16K of ram to cache chunks of |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ff0dcc016b71..e687bb0dc73a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -382,7 +382,7 @@ static inline int inode_need_compress(struct inode *inode) | |||
382 | * are written in the same order that the flusher thread sent them | 382 | * are written in the same order that the flusher thread sent them |
383 | * down. | 383 | * down. |
384 | */ | 384 | */ |
385 | static noinline int compress_file_range(struct inode *inode, | 385 | static noinline void compress_file_range(struct inode *inode, |
386 | struct page *locked_page, | 386 | struct page *locked_page, |
387 | u64 start, u64 end, | 387 | u64 start, u64 end, |
388 | struct async_cow *async_cow, | 388 | struct async_cow *async_cow, |
@@ -411,14 +411,6 @@ static noinline int compress_file_range(struct inode *inode, | |||
411 | (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) | 411 | (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) |
412 | btrfs_add_inode_defrag(NULL, inode); | 412 | btrfs_add_inode_defrag(NULL, inode); |
413 | 413 | ||
414 | /* | ||
415 | * skip compression for a small file range(<=blocksize) that | ||
416 | * isn't an inline extent, since it dosen't save disk space at all. | ||
417 | */ | ||
418 | if ((end - start + 1) <= blocksize && | ||
419 | (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) | ||
420 | goto cleanup_and_bail_uncompressed; | ||
421 | |||
422 | actual_end = min_t(u64, isize, end + 1); | 414 | actual_end = min_t(u64, isize, end + 1); |
423 | again: | 415 | again: |
424 | will_compress = 0; | 416 | will_compress = 0; |
@@ -440,6 +432,14 @@ again: | |||
440 | 432 | ||
441 | total_compressed = actual_end - start; | 433 | total_compressed = actual_end - start; |
442 | 434 | ||
435 | /* | ||
436 | * skip compression for a small file range(<=blocksize) that | ||
437 | * isn't an inline extent, since it dosen't save disk space at all. | ||
438 | */ | ||
439 | if (total_compressed <= blocksize && | ||
440 | (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) | ||
441 | goto cleanup_and_bail_uncompressed; | ||
442 | |||
443 | /* we want to make sure that amount of ram required to uncompress | 443 | /* we want to make sure that amount of ram required to uncompress |
444 | * an extent is reasonable, so we limit the total size in ram | 444 | * an extent is reasonable, so we limit the total size in ram |
445 | * of a compressed extent to 128k. This is a crucial number | 445 | * of a compressed extent to 128k. This is a crucial number |
@@ -527,7 +527,10 @@ cont: | |||
527 | if (ret <= 0) { | 527 | if (ret <= 0) { |
528 | unsigned long clear_flags = EXTENT_DELALLOC | | 528 | unsigned long clear_flags = EXTENT_DELALLOC | |
529 | EXTENT_DEFRAG; | 529 | EXTENT_DEFRAG; |
530 | unsigned long page_error_op; | ||
531 | |||
530 | clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0; | 532 | clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0; |
533 | page_error_op = ret < 0 ? PAGE_SET_ERROR : 0; | ||
531 | 534 | ||
532 | /* | 535 | /* |
533 | * inline extent creation worked or returned error, | 536 | * inline extent creation worked or returned error, |
@@ -538,6 +541,7 @@ cont: | |||
538 | clear_flags, PAGE_UNLOCK | | 541 | clear_flags, PAGE_UNLOCK | |
539 | PAGE_CLEAR_DIRTY | | 542 | PAGE_CLEAR_DIRTY | |
540 | PAGE_SET_WRITEBACK | | 543 | PAGE_SET_WRITEBACK | |
544 | page_error_op | | ||
541 | PAGE_END_WRITEBACK); | 545 | PAGE_END_WRITEBACK); |
542 | goto free_pages_out; | 546 | goto free_pages_out; |
543 | } | 547 | } |
@@ -620,8 +624,7 @@ cleanup_and_bail_uncompressed: | |||
620 | *num_added += 1; | 624 | *num_added += 1; |
621 | } | 625 | } |
622 | 626 | ||
623 | out: | 627 | return; |
624 | return ret; | ||
625 | 628 | ||
626 | free_pages_out: | 629 | free_pages_out: |
627 | for (i = 0; i < nr_pages_ret; i++) { | 630 | for (i = 0; i < nr_pages_ret; i++) { |
@@ -629,8 +632,22 @@ free_pages_out: | |||
629 | page_cache_release(pages[i]); | 632 | page_cache_release(pages[i]); |
630 | } | 633 | } |
631 | kfree(pages); | 634 | kfree(pages); |
635 | } | ||
632 | 636 | ||
633 | goto out; | 637 | static void free_async_extent_pages(struct async_extent *async_extent) |
638 | { | ||
639 | int i; | ||
640 | |||
641 | if (!async_extent->pages) | ||
642 | return; | ||
643 | |||
644 | for (i = 0; i < async_extent->nr_pages; i++) { | ||
645 | WARN_ON(async_extent->pages[i]->mapping); | ||
646 | page_cache_release(async_extent->pages[i]); | ||
647 | } | ||
648 | kfree(async_extent->pages); | ||
649 | async_extent->nr_pages = 0; | ||
650 | async_extent->pages = NULL; | ||
634 | } | 651 | } |
635 | 652 | ||
636 | /* | 653 | /* |
@@ -639,7 +656,7 @@ free_pages_out: | |||
639 | * queued. We walk all the async extents created by compress_file_range | 656 | * queued. We walk all the async extents created by compress_file_range |
640 | * and send them down to the disk. | 657 | * and send them down to the disk. |
641 | */ | 658 | */ |
642 | static noinline int submit_compressed_extents(struct inode *inode, | 659 | static noinline void submit_compressed_extents(struct inode *inode, |
643 | struct async_cow *async_cow) | 660 | struct async_cow *async_cow) |
644 | { | 661 | { |
645 | struct async_extent *async_extent; | 662 | struct async_extent *async_extent; |
@@ -651,9 +668,6 @@ static noinline int submit_compressed_extents(struct inode *inode, | |||
651 | struct extent_io_tree *io_tree; | 668 | struct extent_io_tree *io_tree; |
652 | int ret = 0; | 669 | int ret = 0; |
653 | 670 | ||
654 | if (list_empty(&async_cow->extents)) | ||
655 | return 0; | ||
656 | |||
657 | again: | 671 | again: |
658 | while (!list_empty(&async_cow->extents)) { | 672 | while (!list_empty(&async_cow->extents)) { |
659 | async_extent = list_entry(async_cow->extents.next, | 673 | async_extent = list_entry(async_cow->extents.next, |
@@ -709,15 +723,7 @@ retry: | |||
709 | async_extent->compressed_size, | 723 | async_extent->compressed_size, |
710 | 0, alloc_hint, &ins, 1, 1); | 724 | 0, alloc_hint, &ins, 1, 1); |
711 | if (ret) { | 725 | if (ret) { |
712 | int i; | 726 | free_async_extent_pages(async_extent); |
713 | |||
714 | for (i = 0; i < async_extent->nr_pages; i++) { | ||
715 | WARN_ON(async_extent->pages[i]->mapping); | ||
716 | page_cache_release(async_extent->pages[i]); | ||
717 | } | ||
718 | kfree(async_extent->pages); | ||
719 | async_extent->nr_pages = 0; | ||
720 | async_extent->pages = NULL; | ||
721 | 727 | ||
722 | if (ret == -ENOSPC) { | 728 | if (ret == -ENOSPC) { |
723 | unlock_extent(io_tree, async_extent->start, | 729 | unlock_extent(io_tree, async_extent->start, |
@@ -814,15 +820,26 @@ retry: | |||
814 | ins.objectid, | 820 | ins.objectid, |
815 | ins.offset, async_extent->pages, | 821 | ins.offset, async_extent->pages, |
816 | async_extent->nr_pages); | 822 | async_extent->nr_pages); |
823 | if (ret) { | ||
824 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; | ||
825 | struct page *p = async_extent->pages[0]; | ||
826 | const u64 start = async_extent->start; | ||
827 | const u64 end = start + async_extent->ram_size - 1; | ||
828 | |||
829 | p->mapping = inode->i_mapping; | ||
830 | tree->ops->writepage_end_io_hook(p, start, end, | ||
831 | NULL, 0); | ||
832 | p->mapping = NULL; | ||
833 | extent_clear_unlock_delalloc(inode, start, end, NULL, 0, | ||
834 | PAGE_END_WRITEBACK | | ||
835 | PAGE_SET_ERROR); | ||
836 | free_async_extent_pages(async_extent); | ||
837 | } | ||
817 | alloc_hint = ins.objectid + ins.offset; | 838 | alloc_hint = ins.objectid + ins.offset; |
818 | kfree(async_extent); | 839 | kfree(async_extent); |
819 | if (ret) | ||
820 | goto out; | ||
821 | cond_resched(); | 840 | cond_resched(); |
822 | } | 841 | } |
823 | ret = 0; | 842 | return; |
824 | out: | ||
825 | return ret; | ||
826 | out_free_reserve: | 843 | out_free_reserve: |
827 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); | 844 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); |
828 | out_free: | 845 | out_free: |
@@ -832,7 +849,9 @@ out_free: | |||
832 | NULL, EXTENT_LOCKED | EXTENT_DELALLOC | | 849 | NULL, EXTENT_LOCKED | EXTENT_DELALLOC | |
833 | EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING, | 850 | EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING, |
834 | PAGE_UNLOCK | PAGE_CLEAR_DIRTY | | 851 | PAGE_UNLOCK | PAGE_CLEAR_DIRTY | |
835 | PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK); | 852 | PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK | |
853 | PAGE_SET_ERROR); | ||
854 | free_async_extent_pages(async_extent); | ||
836 | kfree(async_extent); | 855 | kfree(async_extent); |
837 | goto again; | 856 | goto again; |
838 | } | 857 | } |
@@ -1318,7 +1337,7 @@ next_slot: | |||
1318 | * we fall into common COW way. | 1337 | * we fall into common COW way. |
1319 | */ | 1338 | */ |
1320 | if (!nolock) { | 1339 | if (!nolock) { |
1321 | err = btrfs_start_nocow_write(root); | 1340 | err = btrfs_start_write_no_snapshoting(root); |
1322 | if (!err) | 1341 | if (!err) |
1323 | goto out_check; | 1342 | goto out_check; |
1324 | } | 1343 | } |
@@ -1342,7 +1361,7 @@ out_check: | |||
1342 | if (extent_end <= start) { | 1361 | if (extent_end <= start) { |
1343 | path->slots[0]++; | 1362 | path->slots[0]++; |
1344 | if (!nolock && nocow) | 1363 | if (!nolock && nocow) |
1345 | btrfs_end_nocow_write(root); | 1364 | btrfs_end_write_no_snapshoting(root); |
1346 | goto next_slot; | 1365 | goto next_slot; |
1347 | } | 1366 | } |
1348 | if (!nocow) { | 1367 | if (!nocow) { |
@@ -1362,7 +1381,7 @@ out_check: | |||
1362 | page_started, nr_written, 1); | 1381 | page_started, nr_written, 1); |
1363 | if (ret) { | 1382 | if (ret) { |
1364 | if (!nolock && nocow) | 1383 | if (!nolock && nocow) |
1365 | btrfs_end_nocow_write(root); | 1384 | btrfs_end_write_no_snapshoting(root); |
1366 | goto error; | 1385 | goto error; |
1367 | } | 1386 | } |
1368 | cow_start = (u64)-1; | 1387 | cow_start = (u64)-1; |
@@ -1413,7 +1432,7 @@ out_check: | |||
1413 | num_bytes); | 1432 | num_bytes); |
1414 | if (ret) { | 1433 | if (ret) { |
1415 | if (!nolock && nocow) | 1434 | if (!nolock && nocow) |
1416 | btrfs_end_nocow_write(root); | 1435 | btrfs_end_write_no_snapshoting(root); |
1417 | goto error; | 1436 | goto error; |
1418 | } | 1437 | } |
1419 | } | 1438 | } |
@@ -1424,7 +1443,7 @@ out_check: | |||
1424 | EXTENT_DELALLOC, PAGE_UNLOCK | | 1443 | EXTENT_DELALLOC, PAGE_UNLOCK | |
1425 | PAGE_SET_PRIVATE2); | 1444 | PAGE_SET_PRIVATE2); |
1426 | if (!nolock && nocow) | 1445 | if (!nolock && nocow) |
1427 | btrfs_end_nocow_write(root); | 1446 | btrfs_end_write_no_snapshoting(root); |
1428 | cur_offset = extent_end; | 1447 | cur_offset = extent_end; |
1429 | if (cur_offset > end) | 1448 | if (cur_offset > end) |
1430 | break; | 1449 | break; |
@@ -4580,6 +4599,26 @@ next: | |||
4580 | return err; | 4599 | return err; |
4581 | } | 4600 | } |
4582 | 4601 | ||
4602 | static int wait_snapshoting_atomic_t(atomic_t *a) | ||
4603 | { | ||
4604 | schedule(); | ||
4605 | return 0; | ||
4606 | } | ||
4607 | |||
4608 | static void wait_for_snapshot_creation(struct btrfs_root *root) | ||
4609 | { | ||
4610 | while (true) { | ||
4611 | int ret; | ||
4612 | |||
4613 | ret = btrfs_start_write_no_snapshoting(root); | ||
4614 | if (ret) | ||
4615 | break; | ||
4616 | wait_on_atomic_t(&root->will_be_snapshoted, | ||
4617 | wait_snapshoting_atomic_t, | ||
4618 | TASK_UNINTERRUPTIBLE); | ||
4619 | } | ||
4620 | } | ||
4621 | |||
4583 | static int btrfs_setsize(struct inode *inode, struct iattr *attr) | 4622 | static int btrfs_setsize(struct inode *inode, struct iattr *attr) |
4584 | { | 4623 | { |
4585 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4624 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -4604,17 +4643,30 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) | |||
4604 | 4643 | ||
4605 | if (newsize > oldsize) { | 4644 | if (newsize > oldsize) { |
4606 | truncate_pagecache(inode, newsize); | 4645 | truncate_pagecache(inode, newsize); |
4646 | /* | ||
4647 | * Don't do an expanding truncate while snapshoting is ongoing. | ||
4648 | * This is to ensure the snapshot captures a fully consistent | ||
4649 | * state of this file - if the snapshot captures this expanding | ||
4650 | * truncation, it must capture all writes that happened before | ||
4651 | * this truncation. | ||
4652 | */ | ||
4653 | wait_for_snapshot_creation(root); | ||
4607 | ret = btrfs_cont_expand(inode, oldsize, newsize); | 4654 | ret = btrfs_cont_expand(inode, oldsize, newsize); |
4608 | if (ret) | 4655 | if (ret) { |
4656 | btrfs_end_write_no_snapshoting(root); | ||
4609 | return ret; | 4657 | return ret; |
4658 | } | ||
4610 | 4659 | ||
4611 | trans = btrfs_start_transaction(root, 1); | 4660 | trans = btrfs_start_transaction(root, 1); |
4612 | if (IS_ERR(trans)) | 4661 | if (IS_ERR(trans)) { |
4662 | btrfs_end_write_no_snapshoting(root); | ||
4613 | return PTR_ERR(trans); | 4663 | return PTR_ERR(trans); |
4664 | } | ||
4614 | 4665 | ||
4615 | i_size_write(inode, newsize); | 4666 | i_size_write(inode, newsize); |
4616 | btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); | 4667 | btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); |
4617 | ret = btrfs_update_inode(trans, root, inode); | 4668 | ret = btrfs_update_inode(trans, root, inode); |
4669 | btrfs_end_write_no_snapshoting(root); | ||
4618 | btrfs_end_transaction(trans, root); | 4670 | btrfs_end_transaction(trans, root); |
4619 | } else { | 4671 | } else { |
4620 | 4672 | ||
@@ -7000,9 +7052,12 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, | |||
7000 | btrfs_put_ordered_extent(ordered); | 7052 | btrfs_put_ordered_extent(ordered); |
7001 | } else { | 7053 | } else { |
7002 | /* Screw you mmap */ | 7054 | /* Screw you mmap */ |
7003 | ret = filemap_write_and_wait_range(inode->i_mapping, | 7055 | ret = btrfs_fdatawrite_range(inode, lockstart, lockend); |
7004 | lockstart, | 7056 | if (ret) |
7005 | lockend); | 7057 | break; |
7058 | ret = filemap_fdatawait_range(inode->i_mapping, | ||
7059 | lockstart, | ||
7060 | lockend); | ||
7006 | if (ret) | 7061 | if (ret) |
7007 | break; | 7062 | break; |
7008 | 7063 | ||
@@ -9442,6 +9497,21 @@ out_inode: | |||
9442 | 9497 | ||
9443 | } | 9498 | } |
9444 | 9499 | ||
9500 | /* Inspired by filemap_check_errors() */ | ||
9501 | int btrfs_inode_check_errors(struct inode *inode) | ||
9502 | { | ||
9503 | int ret = 0; | ||
9504 | |||
9505 | if (test_bit(AS_ENOSPC, &inode->i_mapping->flags) && | ||
9506 | test_and_clear_bit(AS_ENOSPC, &inode->i_mapping->flags)) | ||
9507 | ret = -ENOSPC; | ||
9508 | if (test_bit(AS_EIO, &inode->i_mapping->flags) && | ||
9509 | test_and_clear_bit(AS_EIO, &inode->i_mapping->flags)) | ||
9510 | ret = -EIO; | ||
9511 | |||
9512 | return ret; | ||
9513 | } | ||
9514 | |||
9445 | static const struct inode_operations btrfs_dir_inode_operations = { | 9515 | static const struct inode_operations btrfs_dir_inode_operations = { |
9446 | .getattr = btrfs_getattr, | 9516 | .getattr = btrfs_getattr, |
9447 | .lookup = btrfs_lookup, | 9517 | .lookup = btrfs_lookup, |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 080fe66c0349..d49fe8a0f6b5 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -617,7 +617,7 @@ fail: | |||
617 | return ret; | 617 | return ret; |
618 | } | 618 | } |
619 | 619 | ||
620 | static void btrfs_wait_nocow_write(struct btrfs_root *root) | 620 | static void btrfs_wait_for_no_snapshoting_writes(struct btrfs_root *root) |
621 | { | 621 | { |
622 | s64 writers; | 622 | s64 writers; |
623 | DEFINE_WAIT(wait); | 623 | DEFINE_WAIT(wait); |
@@ -649,7 +649,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
649 | 649 | ||
650 | atomic_inc(&root->will_be_snapshoted); | 650 | atomic_inc(&root->will_be_snapshoted); |
651 | smp_mb__after_atomic(); | 651 | smp_mb__after_atomic(); |
652 | btrfs_wait_nocow_write(root); | 652 | btrfs_wait_for_no_snapshoting_writes(root); |
653 | 653 | ||
654 | ret = btrfs_start_delalloc_inodes(root, 0); | 654 | ret = btrfs_start_delalloc_inodes(root, 0); |
655 | if (ret) | 655 | if (ret) |
@@ -717,35 +717,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
717 | if (ret) | 717 | if (ret) |
718 | goto fail; | 718 | goto fail; |
719 | 719 | ||
720 | /* | ||
721 | * If orphan cleanup did remove any orphans, it means the tree was | ||
722 | * modified and therefore the commit root is not the same as the | ||
723 | * current root anymore. This is a problem, because send uses the | ||
724 | * commit root and therefore can see inode items that don't exist | ||
725 | * in the current root anymore, and for example make calls to | ||
726 | * btrfs_iget, which will do tree lookups based on the current root | ||
727 | * and not on the commit root. Those lookups will fail, returning a | ||
728 | * -ESTALE error, and making send fail with that error. So make sure | ||
729 | * a send does not see any orphans we have just removed, and that it | ||
730 | * will see the same inodes regardless of whether a transaction | ||
731 | * commit happened before it started (meaning that the commit root | ||
732 | * will be the same as the current root) or not. | ||
733 | */ | ||
734 | if (readonly && pending_snapshot->snap->node != | ||
735 | pending_snapshot->snap->commit_root) { | ||
736 | trans = btrfs_join_transaction(pending_snapshot->snap); | ||
737 | if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) { | ||
738 | ret = PTR_ERR(trans); | ||
739 | goto fail; | ||
740 | } | ||
741 | if (!IS_ERR(trans)) { | ||
742 | ret = btrfs_commit_transaction(trans, | ||
743 | pending_snapshot->snap); | ||
744 | if (ret) | ||
745 | goto fail; | ||
746 | } | ||
747 | } | ||
748 | |||
749 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); | 720 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); |
750 | if (IS_ERR(inode)) { | 721 | if (IS_ERR(inode)) { |
751 | ret = PTR_ERR(inode); | 722 | ret = PTR_ERR(inode); |
@@ -761,7 +732,8 @@ fail: | |||
761 | free: | 732 | free: |
762 | kfree(pending_snapshot); | 733 | kfree(pending_snapshot); |
763 | out: | 734 | out: |
764 | atomic_dec(&root->will_be_snapshoted); | 735 | if (atomic_dec_and_test(&root->will_be_snapshoted)) |
736 | wake_up_atomic_t(&root->will_be_snapshoted); | ||
765 | return ret; | 737 | return ret; |
766 | } | 738 | } |
767 | 739 | ||
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index ac734ec4cc20..534544e08f76 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -220,6 +220,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
220 | INIT_LIST_HEAD(&entry->work_list); | 220 | INIT_LIST_HEAD(&entry->work_list); |
221 | init_completion(&entry->completion); | 221 | init_completion(&entry->completion); |
222 | INIT_LIST_HEAD(&entry->log_list); | 222 | INIT_LIST_HEAD(&entry->log_list); |
223 | INIT_LIST_HEAD(&entry->trans_list); | ||
223 | 224 | ||
224 | trace_btrfs_ordered_extent_add(inode, entry); | 225 | trace_btrfs_ordered_extent_add(inode, entry); |
225 | 226 | ||
@@ -431,19 +432,31 @@ out: | |||
431 | 432 | ||
432 | /* Needs to either be called under a log transaction or the log_mutex */ | 433 | /* Needs to either be called under a log transaction or the log_mutex */ |
433 | void btrfs_get_logged_extents(struct inode *inode, | 434 | void btrfs_get_logged_extents(struct inode *inode, |
434 | struct list_head *logged_list) | 435 | struct list_head *logged_list, |
436 | const loff_t start, | ||
437 | const loff_t end) | ||
435 | { | 438 | { |
436 | struct btrfs_ordered_inode_tree *tree; | 439 | struct btrfs_ordered_inode_tree *tree; |
437 | struct btrfs_ordered_extent *ordered; | 440 | struct btrfs_ordered_extent *ordered; |
438 | struct rb_node *n; | 441 | struct rb_node *n; |
442 | struct rb_node *prev; | ||
439 | 443 | ||
440 | tree = &BTRFS_I(inode)->ordered_tree; | 444 | tree = &BTRFS_I(inode)->ordered_tree; |
441 | spin_lock_irq(&tree->lock); | 445 | spin_lock_irq(&tree->lock); |
442 | for (n = rb_first(&tree->tree); n; n = rb_next(n)) { | 446 | n = __tree_search(&tree->tree, end, &prev); |
447 | if (!n) | ||
448 | n = prev; | ||
449 | for (; n; n = rb_prev(n)) { | ||
443 | ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); | 450 | ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); |
451 | if (ordered->file_offset > end) | ||
452 | continue; | ||
453 | if (entry_end(ordered) <= start) | ||
454 | break; | ||
444 | if (!list_empty(&ordered->log_list)) | 455 | if (!list_empty(&ordered->log_list)) |
445 | continue; | 456 | continue; |
446 | list_add_tail(&ordered->log_list, logged_list); | 457 | if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) |
458 | continue; | ||
459 | list_add(&ordered->log_list, logged_list); | ||
447 | atomic_inc(&ordered->refs); | 460 | atomic_inc(&ordered->refs); |
448 | } | 461 | } |
449 | spin_unlock_irq(&tree->lock); | 462 | spin_unlock_irq(&tree->lock); |
@@ -472,7 +485,8 @@ void btrfs_submit_logged_extents(struct list_head *logged_list, | |||
472 | spin_unlock_irq(&log->log_extents_lock[index]); | 485 | spin_unlock_irq(&log->log_extents_lock[index]); |
473 | } | 486 | } |
474 | 487 | ||
475 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid) | 488 | void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, |
489 | struct btrfs_root *log, u64 transid) | ||
476 | { | 490 | { |
477 | struct btrfs_ordered_extent *ordered; | 491 | struct btrfs_ordered_extent *ordered; |
478 | int index = transid % 2; | 492 | int index = transid % 2; |
@@ -497,7 +511,8 @@ void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid) | |||
497 | wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE, | 511 | wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE, |
498 | &ordered->flags)); | 512 | &ordered->flags)); |
499 | 513 | ||
500 | btrfs_put_ordered_extent(ordered); | 514 | if (!test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) |
515 | list_add_tail(&ordered->trans_list, &trans->ordered); | ||
501 | spin_lock_irq(&log->log_extents_lock[index]); | 516 | spin_lock_irq(&log->log_extents_lock[index]); |
502 | } | 517 | } |
503 | spin_unlock_irq(&log->log_extents_lock[index]); | 518 | spin_unlock_irq(&log->log_extents_lock[index]); |
@@ -725,30 +740,10 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
725 | /* start IO across the range first to instantiate any delalloc | 740 | /* start IO across the range first to instantiate any delalloc |
726 | * extents | 741 | * extents |
727 | */ | 742 | */ |
728 | ret = filemap_fdatawrite_range(inode->i_mapping, start, orig_end); | 743 | ret = btrfs_fdatawrite_range(inode, start, orig_end); |
729 | if (ret) | 744 | if (ret) |
730 | return ret; | 745 | return ret; |
731 | /* | 746 | |
732 | * So with compression we will find and lock a dirty page and clear the | ||
733 | * first one as dirty, setup an async extent, and immediately return | ||
734 | * with the entire range locked but with nobody actually marked with | ||
735 | * writeback. So we can't just filemap_write_and_wait_range() and | ||
736 | * expect it to work since it will just kick off a thread to do the | ||
737 | * actual work. So we need to call filemap_fdatawrite_range _again_ | ||
738 | * since it will wait on the page lock, which won't be unlocked until | ||
739 | * after the pages have been marked as writeback and so we're good to go | ||
740 | * from there. We have to do this otherwise we'll miss the ordered | ||
741 | * extents and that results in badness. Please Josef, do not think you | ||
742 | * know better and pull this out at some point in the future, it is | ||
743 | * right and you are wrong. | ||
744 | */ | ||
745 | if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, | ||
746 | &BTRFS_I(inode)->runtime_flags)) { | ||
747 | ret = filemap_fdatawrite_range(inode->i_mapping, start, | ||
748 | orig_end); | ||
749 | if (ret) | ||
750 | return ret; | ||
751 | } | ||
752 | ret = filemap_fdatawait_range(inode->i_mapping, start, orig_end); | 747 | ret = filemap_fdatawait_range(inode->i_mapping, start, orig_end); |
753 | if (ret) | 748 | if (ret) |
754 | return ret; | 749 | return ret; |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index d81a274d621e..e96cd4ccd805 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -71,6 +71,8 @@ struct btrfs_ordered_sum { | |||
71 | ordered extent */ | 71 | ordered extent */ |
72 | #define BTRFS_ORDERED_TRUNCATED 9 /* Set when we have to truncate an extent */ | 72 | #define BTRFS_ORDERED_TRUNCATED 9 /* Set when we have to truncate an extent */ |
73 | 73 | ||
74 | #define BTRFS_ORDERED_LOGGED 10 /* Set when we've waited on this ordered extent | ||
75 | * in the logging code. */ | ||
74 | struct btrfs_ordered_extent { | 76 | struct btrfs_ordered_extent { |
75 | /* logical offset in the file */ | 77 | /* logical offset in the file */ |
76 | u64 file_offset; | 78 | u64 file_offset; |
@@ -121,6 +123,9 @@ struct btrfs_ordered_extent { | |||
121 | /* If we need to wait on this to be done */ | 123 | /* If we need to wait on this to be done */ |
122 | struct list_head log_list; | 124 | struct list_head log_list; |
123 | 125 | ||
126 | /* If the transaction needs to wait on this ordered extent */ | ||
127 | struct list_head trans_list; | ||
128 | |||
124 | /* used to wait for the BTRFS_ORDERED_COMPLETE bit */ | 129 | /* used to wait for the BTRFS_ORDERED_COMPLETE bit */ |
125 | wait_queue_head_t wait; | 130 | wait_queue_head_t wait; |
126 | 131 | ||
@@ -193,11 +198,14 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, | |||
193 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); | 198 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); |
194 | void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); | 199 | void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); |
195 | void btrfs_get_logged_extents(struct inode *inode, | 200 | void btrfs_get_logged_extents(struct inode *inode, |
196 | struct list_head *logged_list); | 201 | struct list_head *logged_list, |
202 | const loff_t start, | ||
203 | const loff_t end); | ||
197 | void btrfs_put_logged_extents(struct list_head *logged_list); | 204 | void btrfs_put_logged_extents(struct list_head *logged_list); |
198 | void btrfs_submit_logged_extents(struct list_head *logged_list, | 205 | void btrfs_submit_logged_extents(struct list_head *logged_list, |
199 | struct btrfs_root *log); | 206 | struct btrfs_root *log); |
200 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); | 207 | void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, |
208 | struct btrfs_root *log, u64 transid); | ||
201 | void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); | 209 | void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); |
202 | int __init ordered_data_init(void); | 210 | int __init ordered_data_init(void); |
203 | void ordered_data_exit(void); | 211 | void ordered_data_exit(void); |
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 6a41631cb959..8ab2a17bbba8 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c | |||
@@ -58,9 +58,23 @@ | |||
58 | */ | 58 | */ |
59 | #define RBIO_CACHE_READY_BIT 3 | 59 | #define RBIO_CACHE_READY_BIT 3 |
60 | 60 | ||
61 | /* | ||
62 | * bbio and raid_map is managed by the caller, so we shouldn't free | ||
63 | * them here. And besides that, all rbios with this flag should not | ||
64 | * be cached, because we need raid_map to check the rbios' stripe | ||
65 | * is the same or not, but it is very likely that the caller has | ||
66 | * free raid_map, so don't cache those rbios. | ||
67 | */ | ||
68 | #define RBIO_HOLD_BBIO_MAP_BIT 4 | ||
61 | 69 | ||
62 | #define RBIO_CACHE_SIZE 1024 | 70 | #define RBIO_CACHE_SIZE 1024 |
63 | 71 | ||
72 | enum btrfs_rbio_ops { | ||
73 | BTRFS_RBIO_WRITE = 0, | ||
74 | BTRFS_RBIO_READ_REBUILD = 1, | ||
75 | BTRFS_RBIO_PARITY_SCRUB = 2, | ||
76 | }; | ||
77 | |||
64 | struct btrfs_raid_bio { | 78 | struct btrfs_raid_bio { |
65 | struct btrfs_fs_info *fs_info; | 79 | struct btrfs_fs_info *fs_info; |
66 | struct btrfs_bio *bbio; | 80 | struct btrfs_bio *bbio; |
@@ -117,13 +131,16 @@ struct btrfs_raid_bio { | |||
117 | /* number of data stripes (no p/q) */ | 131 | /* number of data stripes (no p/q) */ |
118 | int nr_data; | 132 | int nr_data; |
119 | 133 | ||
134 | int real_stripes; | ||
135 | |||
136 | int stripe_npages; | ||
120 | /* | 137 | /* |
121 | * set if we're doing a parity rebuild | 138 | * set if we're doing a parity rebuild |
122 | * for a read from higher up, which is handled | 139 | * for a read from higher up, which is handled |
123 | * differently from a parity rebuild as part of | 140 | * differently from a parity rebuild as part of |
124 | * rmw | 141 | * rmw |
125 | */ | 142 | */ |
126 | int read_rebuild; | 143 | enum btrfs_rbio_ops operation; |
127 | 144 | ||
128 | /* first bad stripe */ | 145 | /* first bad stripe */ |
129 | int faila; | 146 | int faila; |
@@ -131,6 +148,7 @@ struct btrfs_raid_bio { | |||
131 | /* second bad stripe (for raid6 use) */ | 148 | /* second bad stripe (for raid6 use) */ |
132 | int failb; | 149 | int failb; |
133 | 150 | ||
151 | int scrubp; | ||
134 | /* | 152 | /* |
135 | * number of pages needed to represent the full | 153 | * number of pages needed to represent the full |
136 | * stripe | 154 | * stripe |
@@ -144,8 +162,13 @@ struct btrfs_raid_bio { | |||
144 | */ | 162 | */ |
145 | int bio_list_bytes; | 163 | int bio_list_bytes; |
146 | 164 | ||
165 | int generic_bio_cnt; | ||
166 | |||
147 | atomic_t refs; | 167 | atomic_t refs; |
148 | 168 | ||
169 | atomic_t stripes_pending; | ||
170 | |||
171 | atomic_t error; | ||
149 | /* | 172 | /* |
150 | * these are two arrays of pointers. We allocate the | 173 | * these are two arrays of pointers. We allocate the |
151 | * rbio big enough to hold them both and setup their | 174 | * rbio big enough to hold them both and setup their |
@@ -162,6 +185,11 @@ struct btrfs_raid_bio { | |||
162 | * here for faster lookup | 185 | * here for faster lookup |
163 | */ | 186 | */ |
164 | struct page **bio_pages; | 187 | struct page **bio_pages; |
188 | |||
189 | /* | ||
190 | * bitmap to record which horizontal stripe has data | ||
191 | */ | ||
192 | unsigned long *dbitmap; | ||
165 | }; | 193 | }; |
166 | 194 | ||
167 | static int __raid56_parity_recover(struct btrfs_raid_bio *rbio); | 195 | static int __raid56_parity_recover(struct btrfs_raid_bio *rbio); |
@@ -176,6 +204,10 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio); | |||
176 | static void index_rbio_pages(struct btrfs_raid_bio *rbio); | 204 | static void index_rbio_pages(struct btrfs_raid_bio *rbio); |
177 | static int alloc_rbio_pages(struct btrfs_raid_bio *rbio); | 205 | static int alloc_rbio_pages(struct btrfs_raid_bio *rbio); |
178 | 206 | ||
207 | static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, | ||
208 | int need_check); | ||
209 | static void async_scrub_parity(struct btrfs_raid_bio *rbio); | ||
210 | |||
179 | /* | 211 | /* |
180 | * the stripe hash table is used for locking, and to collect | 212 | * the stripe hash table is used for locking, and to collect |
181 | * bios in hopes of making a full stripe | 213 | * bios in hopes of making a full stripe |
@@ -324,6 +356,7 @@ static void merge_rbio(struct btrfs_raid_bio *dest, | |||
324 | { | 356 | { |
325 | bio_list_merge(&dest->bio_list, &victim->bio_list); | 357 | bio_list_merge(&dest->bio_list, &victim->bio_list); |
326 | dest->bio_list_bytes += victim->bio_list_bytes; | 358 | dest->bio_list_bytes += victim->bio_list_bytes; |
359 | dest->generic_bio_cnt += victim->generic_bio_cnt; | ||
327 | bio_list_init(&victim->bio_list); | 360 | bio_list_init(&victim->bio_list); |
328 | } | 361 | } |
329 | 362 | ||
@@ -577,11 +610,20 @@ static int rbio_can_merge(struct btrfs_raid_bio *last, | |||
577 | cur->raid_map[0]) | 610 | cur->raid_map[0]) |
578 | return 0; | 611 | return 0; |
579 | 612 | ||
580 | /* reads can't merge with writes */ | 613 | /* we can't merge with different operations */ |
581 | if (last->read_rebuild != | 614 | if (last->operation != cur->operation) |
582 | cur->read_rebuild) { | 615 | return 0; |
616 | /* | ||
617 | * We've need read the full stripe from the drive. | ||
618 | * check and repair the parity and write the new results. | ||
619 | * | ||
620 | * We're not allowed to add any new bios to the | ||
621 | * bio list here, anyone else that wants to | ||
622 | * change this stripe needs to do their own rmw. | ||
623 | */ | ||
624 | if (last->operation == BTRFS_RBIO_PARITY_SCRUB || | ||
625 | cur->operation == BTRFS_RBIO_PARITY_SCRUB) | ||
583 | return 0; | 626 | return 0; |
584 | } | ||
585 | 627 | ||
586 | return 1; | 628 | return 1; |
587 | } | 629 | } |
@@ -601,7 +643,7 @@ static struct page *rbio_pstripe_page(struct btrfs_raid_bio *rbio, int index) | |||
601 | */ | 643 | */ |
602 | static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index) | 644 | static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index) |
603 | { | 645 | { |
604 | if (rbio->nr_data + 1 == rbio->bbio->num_stripes) | 646 | if (rbio->nr_data + 1 == rbio->real_stripes) |
605 | return NULL; | 647 | return NULL; |
606 | 648 | ||
607 | index += ((rbio->nr_data + 1) * rbio->stripe_len) >> | 649 | index += ((rbio->nr_data + 1) * rbio->stripe_len) >> |
@@ -772,11 +814,14 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio) | |||
772 | spin_unlock(&rbio->bio_list_lock); | 814 | spin_unlock(&rbio->bio_list_lock); |
773 | spin_unlock_irqrestore(&h->lock, flags); | 815 | spin_unlock_irqrestore(&h->lock, flags); |
774 | 816 | ||
775 | if (next->read_rebuild) | 817 | if (next->operation == BTRFS_RBIO_READ_REBUILD) |
776 | async_read_rebuild(next); | 818 | async_read_rebuild(next); |
777 | else { | 819 | else if (next->operation == BTRFS_RBIO_WRITE) { |
778 | steal_rbio(rbio, next); | 820 | steal_rbio(rbio, next); |
779 | async_rmw_stripe(next); | 821 | async_rmw_stripe(next); |
822 | } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) { | ||
823 | steal_rbio(rbio, next); | ||
824 | async_scrub_parity(next); | ||
780 | } | 825 | } |
781 | 826 | ||
782 | goto done_nolock; | 827 | goto done_nolock; |
@@ -796,6 +841,21 @@ done_nolock: | |||
796 | remove_rbio_from_cache(rbio); | 841 | remove_rbio_from_cache(rbio); |
797 | } | 842 | } |
798 | 843 | ||
844 | static inline void | ||
845 | __free_bbio_and_raid_map(struct btrfs_bio *bbio, u64 *raid_map, int need) | ||
846 | { | ||
847 | if (need) { | ||
848 | kfree(raid_map); | ||
849 | kfree(bbio); | ||
850 | } | ||
851 | } | ||
852 | |||
853 | static inline void free_bbio_and_raid_map(struct btrfs_raid_bio *rbio) | ||
854 | { | ||
855 | __free_bbio_and_raid_map(rbio->bbio, rbio->raid_map, | ||
856 | !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags)); | ||
857 | } | ||
858 | |||
799 | static void __free_raid_bio(struct btrfs_raid_bio *rbio) | 859 | static void __free_raid_bio(struct btrfs_raid_bio *rbio) |
800 | { | 860 | { |
801 | int i; | 861 | int i; |
@@ -814,8 +874,9 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio) | |||
814 | rbio->stripe_pages[i] = NULL; | 874 | rbio->stripe_pages[i] = NULL; |
815 | } | 875 | } |
816 | } | 876 | } |
817 | kfree(rbio->raid_map); | 877 | |
818 | kfree(rbio->bbio); | 878 | free_bbio_and_raid_map(rbio); |
879 | |||
819 | kfree(rbio); | 880 | kfree(rbio); |
820 | } | 881 | } |
821 | 882 | ||
@@ -833,6 +894,10 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err, int uptodate) | |||
833 | { | 894 | { |
834 | struct bio *cur = bio_list_get(&rbio->bio_list); | 895 | struct bio *cur = bio_list_get(&rbio->bio_list); |
835 | struct bio *next; | 896 | struct bio *next; |
897 | |||
898 | if (rbio->generic_bio_cnt) | ||
899 | btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt); | ||
900 | |||
836 | free_raid_bio(rbio); | 901 | free_raid_bio(rbio); |
837 | 902 | ||
838 | while (cur) { | 903 | while (cur) { |
@@ -858,13 +923,13 @@ static void raid_write_end_io(struct bio *bio, int err) | |||
858 | 923 | ||
859 | bio_put(bio); | 924 | bio_put(bio); |
860 | 925 | ||
861 | if (!atomic_dec_and_test(&rbio->bbio->stripes_pending)) | 926 | if (!atomic_dec_and_test(&rbio->stripes_pending)) |
862 | return; | 927 | return; |
863 | 928 | ||
864 | err = 0; | 929 | err = 0; |
865 | 930 | ||
866 | /* OK, we have read all the stripes we need to. */ | 931 | /* OK, we have read all the stripes we need to. */ |
867 | if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors) | 932 | if (atomic_read(&rbio->error) > rbio->bbio->max_errors) |
868 | err = -EIO; | 933 | err = -EIO; |
869 | 934 | ||
870 | rbio_orig_end_io(rbio, err, 0); | 935 | rbio_orig_end_io(rbio, err, 0); |
@@ -925,16 +990,16 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root, | |||
925 | { | 990 | { |
926 | struct btrfs_raid_bio *rbio; | 991 | struct btrfs_raid_bio *rbio; |
927 | int nr_data = 0; | 992 | int nr_data = 0; |
928 | int num_pages = rbio_nr_pages(stripe_len, bbio->num_stripes); | 993 | int real_stripes = bbio->num_stripes - bbio->num_tgtdevs; |
994 | int num_pages = rbio_nr_pages(stripe_len, real_stripes); | ||
995 | int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE); | ||
929 | void *p; | 996 | void *p; |
930 | 997 | ||
931 | rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2, | 998 | rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2 + |
999 | DIV_ROUND_UP(stripe_npages, BITS_PER_LONG / 8), | ||
932 | GFP_NOFS); | 1000 | GFP_NOFS); |
933 | if (!rbio) { | 1001 | if (!rbio) |
934 | kfree(raid_map); | ||
935 | kfree(bbio); | ||
936 | return ERR_PTR(-ENOMEM); | 1002 | return ERR_PTR(-ENOMEM); |
937 | } | ||
938 | 1003 | ||
939 | bio_list_init(&rbio->bio_list); | 1004 | bio_list_init(&rbio->bio_list); |
940 | INIT_LIST_HEAD(&rbio->plug_list); | 1005 | INIT_LIST_HEAD(&rbio->plug_list); |
@@ -946,9 +1011,13 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root, | |||
946 | rbio->fs_info = root->fs_info; | 1011 | rbio->fs_info = root->fs_info; |
947 | rbio->stripe_len = stripe_len; | 1012 | rbio->stripe_len = stripe_len; |
948 | rbio->nr_pages = num_pages; | 1013 | rbio->nr_pages = num_pages; |
1014 | rbio->real_stripes = real_stripes; | ||
1015 | rbio->stripe_npages = stripe_npages; | ||
949 | rbio->faila = -1; | 1016 | rbio->faila = -1; |
950 | rbio->failb = -1; | 1017 | rbio->failb = -1; |
951 | atomic_set(&rbio->refs, 1); | 1018 | atomic_set(&rbio->refs, 1); |
1019 | atomic_set(&rbio->error, 0); | ||
1020 | atomic_set(&rbio->stripes_pending, 0); | ||
952 | 1021 | ||
953 | /* | 1022 | /* |
954 | * the stripe_pages and bio_pages array point to the extra | 1023 | * the stripe_pages and bio_pages array point to the extra |
@@ -957,11 +1026,12 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root, | |||
957 | p = rbio + 1; | 1026 | p = rbio + 1; |
958 | rbio->stripe_pages = p; | 1027 | rbio->stripe_pages = p; |
959 | rbio->bio_pages = p + sizeof(struct page *) * num_pages; | 1028 | rbio->bio_pages = p + sizeof(struct page *) * num_pages; |
1029 | rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2; | ||
960 | 1030 | ||
961 | if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE) | 1031 | if (raid_map[real_stripes - 1] == RAID6_Q_STRIPE) |
962 | nr_data = bbio->num_stripes - 2; | 1032 | nr_data = real_stripes - 2; |
963 | else | 1033 | else |
964 | nr_data = bbio->num_stripes - 1; | 1034 | nr_data = real_stripes - 1; |
965 | 1035 | ||
966 | rbio->nr_data = nr_data; | 1036 | rbio->nr_data = nr_data; |
967 | return rbio; | 1037 | return rbio; |
@@ -1073,7 +1143,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio, | |||
1073 | static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio) | 1143 | static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio) |
1074 | { | 1144 | { |
1075 | if (rbio->faila >= 0 || rbio->failb >= 0) { | 1145 | if (rbio->faila >= 0 || rbio->failb >= 0) { |
1076 | BUG_ON(rbio->faila == rbio->bbio->num_stripes - 1); | 1146 | BUG_ON(rbio->faila == rbio->real_stripes - 1); |
1077 | __raid56_parity_recover(rbio); | 1147 | __raid56_parity_recover(rbio); |
1078 | } else { | 1148 | } else { |
1079 | finish_rmw(rbio); | 1149 | finish_rmw(rbio); |
@@ -1134,7 +1204,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio) | |||
1134 | static noinline void finish_rmw(struct btrfs_raid_bio *rbio) | 1204 | static noinline void finish_rmw(struct btrfs_raid_bio *rbio) |
1135 | { | 1205 | { |
1136 | struct btrfs_bio *bbio = rbio->bbio; | 1206 | struct btrfs_bio *bbio = rbio->bbio; |
1137 | void *pointers[bbio->num_stripes]; | 1207 | void *pointers[rbio->real_stripes]; |
1138 | int stripe_len = rbio->stripe_len; | 1208 | int stripe_len = rbio->stripe_len; |
1139 | int nr_data = rbio->nr_data; | 1209 | int nr_data = rbio->nr_data; |
1140 | int stripe; | 1210 | int stripe; |
@@ -1148,11 +1218,11 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) | |||
1148 | 1218 | ||
1149 | bio_list_init(&bio_list); | 1219 | bio_list_init(&bio_list); |
1150 | 1220 | ||
1151 | if (bbio->num_stripes - rbio->nr_data == 1) { | 1221 | if (rbio->real_stripes - rbio->nr_data == 1) { |
1152 | p_stripe = bbio->num_stripes - 1; | 1222 | p_stripe = rbio->real_stripes - 1; |
1153 | } else if (bbio->num_stripes - rbio->nr_data == 2) { | 1223 | } else if (rbio->real_stripes - rbio->nr_data == 2) { |
1154 | p_stripe = bbio->num_stripes - 2; | 1224 | p_stripe = rbio->real_stripes - 2; |
1155 | q_stripe = bbio->num_stripes - 1; | 1225 | q_stripe = rbio->real_stripes - 1; |
1156 | } else { | 1226 | } else { |
1157 | BUG(); | 1227 | BUG(); |
1158 | } | 1228 | } |
@@ -1169,7 +1239,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) | |||
1169 | set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); | 1239 | set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); |
1170 | spin_unlock_irq(&rbio->bio_list_lock); | 1240 | spin_unlock_irq(&rbio->bio_list_lock); |
1171 | 1241 | ||
1172 | atomic_set(&rbio->bbio->error, 0); | 1242 | atomic_set(&rbio->error, 0); |
1173 | 1243 | ||
1174 | /* | 1244 | /* |
1175 | * now that we've set rmw_locked, run through the | 1245 | * now that we've set rmw_locked, run through the |
@@ -1209,7 +1279,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) | |||
1209 | SetPageUptodate(p); | 1279 | SetPageUptodate(p); |
1210 | pointers[stripe++] = kmap(p); | 1280 | pointers[stripe++] = kmap(p); |
1211 | 1281 | ||
1212 | raid6_call.gen_syndrome(bbio->num_stripes, PAGE_SIZE, | 1282 | raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE, |
1213 | pointers); | 1283 | pointers); |
1214 | } else { | 1284 | } else { |
1215 | /* raid5 */ | 1285 | /* raid5 */ |
@@ -1218,7 +1288,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) | |||
1218 | } | 1288 | } |
1219 | 1289 | ||
1220 | 1290 | ||
1221 | for (stripe = 0; stripe < bbio->num_stripes; stripe++) | 1291 | for (stripe = 0; stripe < rbio->real_stripes; stripe++) |
1222 | kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); | 1292 | kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); |
1223 | } | 1293 | } |
1224 | 1294 | ||
@@ -1227,7 +1297,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) | |||
1227 | * higher layers (the bio_list in our rbio) and our p/q. Ignore | 1297 | * higher layers (the bio_list in our rbio) and our p/q. Ignore |
1228 | * everything else. | 1298 | * everything else. |
1229 | */ | 1299 | */ |
1230 | for (stripe = 0; stripe < bbio->num_stripes; stripe++) { | 1300 | for (stripe = 0; stripe < rbio->real_stripes; stripe++) { |
1231 | for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) { | 1301 | for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) { |
1232 | struct page *page; | 1302 | struct page *page; |
1233 | if (stripe < rbio->nr_data) { | 1303 | if (stripe < rbio->nr_data) { |
@@ -1245,8 +1315,34 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) | |||
1245 | } | 1315 | } |
1246 | } | 1316 | } |
1247 | 1317 | ||
1248 | atomic_set(&bbio->stripes_pending, bio_list_size(&bio_list)); | 1318 | if (likely(!bbio->num_tgtdevs)) |
1249 | BUG_ON(atomic_read(&bbio->stripes_pending) == 0); | 1319 | goto write_data; |
1320 | |||
1321 | for (stripe = 0; stripe < rbio->real_stripes; stripe++) { | ||
1322 | if (!bbio->tgtdev_map[stripe]) | ||
1323 | continue; | ||
1324 | |||
1325 | for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) { | ||
1326 | struct page *page; | ||
1327 | if (stripe < rbio->nr_data) { | ||
1328 | page = page_in_rbio(rbio, stripe, pagenr, 1); | ||
1329 | if (!page) | ||
1330 | continue; | ||
1331 | } else { | ||
1332 | page = rbio_stripe_page(rbio, stripe, pagenr); | ||
1333 | } | ||
1334 | |||
1335 | ret = rbio_add_io_page(rbio, &bio_list, page, | ||
1336 | rbio->bbio->tgtdev_map[stripe], | ||
1337 | pagenr, rbio->stripe_len); | ||
1338 | if (ret) | ||
1339 | goto cleanup; | ||
1340 | } | ||
1341 | } | ||
1342 | |||
1343 | write_data: | ||
1344 | atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list)); | ||
1345 | BUG_ON(atomic_read(&rbio->stripes_pending) == 0); | ||
1250 | 1346 | ||
1251 | while (1) { | 1347 | while (1) { |
1252 | bio = bio_list_pop(&bio_list); | 1348 | bio = bio_list_pop(&bio_list); |
@@ -1283,7 +1379,8 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio, | |||
1283 | stripe = &rbio->bbio->stripes[i]; | 1379 | stripe = &rbio->bbio->stripes[i]; |
1284 | stripe_start = stripe->physical; | 1380 | stripe_start = stripe->physical; |
1285 | if (physical >= stripe_start && | 1381 | if (physical >= stripe_start && |
1286 | physical < stripe_start + rbio->stripe_len) { | 1382 | physical < stripe_start + rbio->stripe_len && |
1383 | bio->bi_bdev == stripe->dev->bdev) { | ||
1287 | return i; | 1384 | return i; |
1288 | } | 1385 | } |
1289 | } | 1386 | } |
@@ -1331,11 +1428,11 @@ static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed) | |||
1331 | if (rbio->faila == -1) { | 1428 | if (rbio->faila == -1) { |
1332 | /* first failure on this rbio */ | 1429 | /* first failure on this rbio */ |
1333 | rbio->faila = failed; | 1430 | rbio->faila = failed; |
1334 | atomic_inc(&rbio->bbio->error); | 1431 | atomic_inc(&rbio->error); |
1335 | } else if (rbio->failb == -1) { | 1432 | } else if (rbio->failb == -1) { |
1336 | /* second failure on this rbio */ | 1433 | /* second failure on this rbio */ |
1337 | rbio->failb = failed; | 1434 | rbio->failb = failed; |
1338 | atomic_inc(&rbio->bbio->error); | 1435 | atomic_inc(&rbio->error); |
1339 | } else { | 1436 | } else { |
1340 | ret = -EIO; | 1437 | ret = -EIO; |
1341 | } | 1438 | } |
@@ -1394,11 +1491,11 @@ static void raid_rmw_end_io(struct bio *bio, int err) | |||
1394 | 1491 | ||
1395 | bio_put(bio); | 1492 | bio_put(bio); |
1396 | 1493 | ||
1397 | if (!atomic_dec_and_test(&rbio->bbio->stripes_pending)) | 1494 | if (!atomic_dec_and_test(&rbio->stripes_pending)) |
1398 | return; | 1495 | return; |
1399 | 1496 | ||
1400 | err = 0; | 1497 | err = 0; |
1401 | if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors) | 1498 | if (atomic_read(&rbio->error) > rbio->bbio->max_errors) |
1402 | goto cleanup; | 1499 | goto cleanup; |
1403 | 1500 | ||
1404 | /* | 1501 | /* |
@@ -1439,7 +1536,6 @@ static void async_read_rebuild(struct btrfs_raid_bio *rbio) | |||
1439 | static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) | 1536 | static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) |
1440 | { | 1537 | { |
1441 | int bios_to_read = 0; | 1538 | int bios_to_read = 0; |
1442 | struct btrfs_bio *bbio = rbio->bbio; | ||
1443 | struct bio_list bio_list; | 1539 | struct bio_list bio_list; |
1444 | int ret; | 1540 | int ret; |
1445 | int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE); | 1541 | int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE); |
@@ -1455,7 +1551,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) | |||
1455 | 1551 | ||
1456 | index_rbio_pages(rbio); | 1552 | index_rbio_pages(rbio); |
1457 | 1553 | ||
1458 | atomic_set(&rbio->bbio->error, 0); | 1554 | atomic_set(&rbio->error, 0); |
1459 | /* | 1555 | /* |
1460 | * build a list of bios to read all the missing parts of this | 1556 | * build a list of bios to read all the missing parts of this |
1461 | * stripe | 1557 | * stripe |
@@ -1503,7 +1599,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) | |||
1503 | * the bbio may be freed once we submit the last bio. Make sure | 1599 | * the bbio may be freed once we submit the last bio. Make sure |
1504 | * not to touch it after that | 1600 | * not to touch it after that |
1505 | */ | 1601 | */ |
1506 | atomic_set(&bbio->stripes_pending, bios_to_read); | 1602 | atomic_set(&rbio->stripes_pending, bios_to_read); |
1507 | while (1) { | 1603 | while (1) { |
1508 | bio = bio_list_pop(&bio_list); | 1604 | bio = bio_list_pop(&bio_list); |
1509 | if (!bio) | 1605 | if (!bio) |
@@ -1686,19 +1782,30 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio, | |||
1686 | struct btrfs_raid_bio *rbio; | 1782 | struct btrfs_raid_bio *rbio; |
1687 | struct btrfs_plug_cb *plug = NULL; | 1783 | struct btrfs_plug_cb *plug = NULL; |
1688 | struct blk_plug_cb *cb; | 1784 | struct blk_plug_cb *cb; |
1785 | int ret; | ||
1689 | 1786 | ||
1690 | rbio = alloc_rbio(root, bbio, raid_map, stripe_len); | 1787 | rbio = alloc_rbio(root, bbio, raid_map, stripe_len); |
1691 | if (IS_ERR(rbio)) | 1788 | if (IS_ERR(rbio)) { |
1789 | __free_bbio_and_raid_map(bbio, raid_map, 1); | ||
1692 | return PTR_ERR(rbio); | 1790 | return PTR_ERR(rbio); |
1791 | } | ||
1693 | bio_list_add(&rbio->bio_list, bio); | 1792 | bio_list_add(&rbio->bio_list, bio); |
1694 | rbio->bio_list_bytes = bio->bi_iter.bi_size; | 1793 | rbio->bio_list_bytes = bio->bi_iter.bi_size; |
1794 | rbio->operation = BTRFS_RBIO_WRITE; | ||
1795 | |||
1796 | btrfs_bio_counter_inc_noblocked(root->fs_info); | ||
1797 | rbio->generic_bio_cnt = 1; | ||
1695 | 1798 | ||
1696 | /* | 1799 | /* |
1697 | * don't plug on full rbios, just get them out the door | 1800 | * don't plug on full rbios, just get them out the door |
1698 | * as quickly as we can | 1801 | * as quickly as we can |
1699 | */ | 1802 | */ |
1700 | if (rbio_is_full(rbio)) | 1803 | if (rbio_is_full(rbio)) { |
1701 | return full_stripe_write(rbio); | 1804 | ret = full_stripe_write(rbio); |
1805 | if (ret) | ||
1806 | btrfs_bio_counter_dec(root->fs_info); | ||
1807 | return ret; | ||
1808 | } | ||
1702 | 1809 | ||
1703 | cb = blk_check_plugged(btrfs_raid_unplug, root->fs_info, | 1810 | cb = blk_check_plugged(btrfs_raid_unplug, root->fs_info, |
1704 | sizeof(*plug)); | 1811 | sizeof(*plug)); |
@@ -1709,10 +1816,13 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio, | |||
1709 | INIT_LIST_HEAD(&plug->rbio_list); | 1816 | INIT_LIST_HEAD(&plug->rbio_list); |
1710 | } | 1817 | } |
1711 | list_add_tail(&rbio->plug_list, &plug->rbio_list); | 1818 | list_add_tail(&rbio->plug_list, &plug->rbio_list); |
1819 | ret = 0; | ||
1712 | } else { | 1820 | } else { |
1713 | return __raid56_parity_write(rbio); | 1821 | ret = __raid56_parity_write(rbio); |
1822 | if (ret) | ||
1823 | btrfs_bio_counter_dec(root->fs_info); | ||
1714 | } | 1824 | } |
1715 | return 0; | 1825 | return ret; |
1716 | } | 1826 | } |
1717 | 1827 | ||
1718 | /* | 1828 | /* |
@@ -1730,7 +1840,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) | |||
1730 | int err; | 1840 | int err; |
1731 | int i; | 1841 | int i; |
1732 | 1842 | ||
1733 | pointers = kzalloc(rbio->bbio->num_stripes * sizeof(void *), | 1843 | pointers = kzalloc(rbio->real_stripes * sizeof(void *), |
1734 | GFP_NOFS); | 1844 | GFP_NOFS); |
1735 | if (!pointers) { | 1845 | if (!pointers) { |
1736 | err = -ENOMEM; | 1846 | err = -ENOMEM; |
@@ -1740,7 +1850,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) | |||
1740 | faila = rbio->faila; | 1850 | faila = rbio->faila; |
1741 | failb = rbio->failb; | 1851 | failb = rbio->failb; |
1742 | 1852 | ||
1743 | if (rbio->read_rebuild) { | 1853 | if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { |
1744 | spin_lock_irq(&rbio->bio_list_lock); | 1854 | spin_lock_irq(&rbio->bio_list_lock); |
1745 | set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); | 1855 | set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); |
1746 | spin_unlock_irq(&rbio->bio_list_lock); | 1856 | spin_unlock_irq(&rbio->bio_list_lock); |
@@ -1749,15 +1859,23 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) | |||
1749 | index_rbio_pages(rbio); | 1859 | index_rbio_pages(rbio); |
1750 | 1860 | ||
1751 | for (pagenr = 0; pagenr < nr_pages; pagenr++) { | 1861 | for (pagenr = 0; pagenr < nr_pages; pagenr++) { |
1862 | /* | ||
1863 | * Now we just use bitmap to mark the horizontal stripes in | ||
1864 | * which we have data when doing parity scrub. | ||
1865 | */ | ||
1866 | if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB && | ||
1867 | !test_bit(pagenr, rbio->dbitmap)) | ||
1868 | continue; | ||
1869 | |||
1752 | /* setup our array of pointers with pages | 1870 | /* setup our array of pointers with pages |
1753 | * from each stripe | 1871 | * from each stripe |
1754 | */ | 1872 | */ |
1755 | for (stripe = 0; stripe < rbio->bbio->num_stripes; stripe++) { | 1873 | for (stripe = 0; stripe < rbio->real_stripes; stripe++) { |
1756 | /* | 1874 | /* |
1757 | * if we're rebuilding a read, we have to use | 1875 | * if we're rebuilding a read, we have to use |
1758 | * pages from the bio list | 1876 | * pages from the bio list |
1759 | */ | 1877 | */ |
1760 | if (rbio->read_rebuild && | 1878 | if (rbio->operation == BTRFS_RBIO_READ_REBUILD && |
1761 | (stripe == faila || stripe == failb)) { | 1879 | (stripe == faila || stripe == failb)) { |
1762 | page = page_in_rbio(rbio, stripe, pagenr, 0); | 1880 | page = page_in_rbio(rbio, stripe, pagenr, 0); |
1763 | } else { | 1881 | } else { |
@@ -1767,7 +1885,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) | |||
1767 | } | 1885 | } |
1768 | 1886 | ||
1769 | /* all raid6 handling here */ | 1887 | /* all raid6 handling here */ |
1770 | if (rbio->raid_map[rbio->bbio->num_stripes - 1] == | 1888 | if (rbio->raid_map[rbio->real_stripes - 1] == |
1771 | RAID6_Q_STRIPE) { | 1889 | RAID6_Q_STRIPE) { |
1772 | 1890 | ||
1773 | /* | 1891 | /* |
@@ -1817,10 +1935,10 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) | |||
1817 | } | 1935 | } |
1818 | 1936 | ||
1819 | if (rbio->raid_map[failb] == RAID5_P_STRIPE) { | 1937 | if (rbio->raid_map[failb] == RAID5_P_STRIPE) { |
1820 | raid6_datap_recov(rbio->bbio->num_stripes, | 1938 | raid6_datap_recov(rbio->real_stripes, |
1821 | PAGE_SIZE, faila, pointers); | 1939 | PAGE_SIZE, faila, pointers); |
1822 | } else { | 1940 | } else { |
1823 | raid6_2data_recov(rbio->bbio->num_stripes, | 1941 | raid6_2data_recov(rbio->real_stripes, |
1824 | PAGE_SIZE, faila, failb, | 1942 | PAGE_SIZE, faila, failb, |
1825 | pointers); | 1943 | pointers); |
1826 | } | 1944 | } |
@@ -1850,7 +1968,7 @@ pstripe: | |||
1850 | * know they can be trusted. If this was a read reconstruction, | 1968 | * know they can be trusted. If this was a read reconstruction, |
1851 | * other endio functions will fiddle the uptodate bits | 1969 | * other endio functions will fiddle the uptodate bits |
1852 | */ | 1970 | */ |
1853 | if (!rbio->read_rebuild) { | 1971 | if (rbio->operation == BTRFS_RBIO_WRITE) { |
1854 | for (i = 0; i < nr_pages; i++) { | 1972 | for (i = 0; i < nr_pages; i++) { |
1855 | if (faila != -1) { | 1973 | if (faila != -1) { |
1856 | page = rbio_stripe_page(rbio, faila, i); | 1974 | page = rbio_stripe_page(rbio, faila, i); |
@@ -1862,12 +1980,12 @@ pstripe: | |||
1862 | } | 1980 | } |
1863 | } | 1981 | } |
1864 | } | 1982 | } |
1865 | for (stripe = 0; stripe < rbio->bbio->num_stripes; stripe++) { | 1983 | for (stripe = 0; stripe < rbio->real_stripes; stripe++) { |
1866 | /* | 1984 | /* |
1867 | * if we're rebuilding a read, we have to use | 1985 | * if we're rebuilding a read, we have to use |
1868 | * pages from the bio list | 1986 | * pages from the bio list |
1869 | */ | 1987 | */ |
1870 | if (rbio->read_rebuild && | 1988 | if (rbio->operation == BTRFS_RBIO_READ_REBUILD && |
1871 | (stripe == faila || stripe == failb)) { | 1989 | (stripe == faila || stripe == failb)) { |
1872 | page = page_in_rbio(rbio, stripe, pagenr, 0); | 1990 | page = page_in_rbio(rbio, stripe, pagenr, 0); |
1873 | } else { | 1991 | } else { |
@@ -1882,9 +2000,9 @@ cleanup: | |||
1882 | kfree(pointers); | 2000 | kfree(pointers); |
1883 | 2001 | ||
1884 | cleanup_io: | 2002 | cleanup_io: |
1885 | 2003 | if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { | |
1886 | if (rbio->read_rebuild) { | 2004 | if (err == 0 && |
1887 | if (err == 0) | 2005 | !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags)) |
1888 | cache_rbio_pages(rbio); | 2006 | cache_rbio_pages(rbio); |
1889 | else | 2007 | else |
1890 | clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); | 2008 | clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); |
@@ -1893,7 +2011,13 @@ cleanup_io: | |||
1893 | } else if (err == 0) { | 2011 | } else if (err == 0) { |
1894 | rbio->faila = -1; | 2012 | rbio->faila = -1; |
1895 | rbio->failb = -1; | 2013 | rbio->failb = -1; |
1896 | finish_rmw(rbio); | 2014 | |
2015 | if (rbio->operation == BTRFS_RBIO_WRITE) | ||
2016 | finish_rmw(rbio); | ||
2017 | else if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) | ||
2018 | finish_parity_scrub(rbio, 0); | ||
2019 | else | ||
2020 | BUG(); | ||
1897 | } else { | 2021 | } else { |
1898 | rbio_orig_end_io(rbio, err, 0); | 2022 | rbio_orig_end_io(rbio, err, 0); |
1899 | } | 2023 | } |
@@ -1917,10 +2041,10 @@ static void raid_recover_end_io(struct bio *bio, int err) | |||
1917 | set_bio_pages_uptodate(bio); | 2041 | set_bio_pages_uptodate(bio); |
1918 | bio_put(bio); | 2042 | bio_put(bio); |
1919 | 2043 | ||
1920 | if (!atomic_dec_and_test(&rbio->bbio->stripes_pending)) | 2044 | if (!atomic_dec_and_test(&rbio->stripes_pending)) |
1921 | return; | 2045 | return; |
1922 | 2046 | ||
1923 | if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors) | 2047 | if (atomic_read(&rbio->error) > rbio->bbio->max_errors) |
1924 | rbio_orig_end_io(rbio, -EIO, 0); | 2048 | rbio_orig_end_io(rbio, -EIO, 0); |
1925 | else | 2049 | else |
1926 | __raid_recover_end_io(rbio); | 2050 | __raid_recover_end_io(rbio); |
@@ -1937,7 +2061,6 @@ static void raid_recover_end_io(struct bio *bio, int err) | |||
1937 | static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) | 2061 | static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) |
1938 | { | 2062 | { |
1939 | int bios_to_read = 0; | 2063 | int bios_to_read = 0; |
1940 | struct btrfs_bio *bbio = rbio->bbio; | ||
1941 | struct bio_list bio_list; | 2064 | struct bio_list bio_list; |
1942 | int ret; | 2065 | int ret; |
1943 | int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE); | 2066 | int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE); |
@@ -1951,16 +2074,16 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) | |||
1951 | if (ret) | 2074 | if (ret) |
1952 | goto cleanup; | 2075 | goto cleanup; |
1953 | 2076 | ||
1954 | atomic_set(&rbio->bbio->error, 0); | 2077 | atomic_set(&rbio->error, 0); |
1955 | 2078 | ||
1956 | /* | 2079 | /* |
1957 | * read everything that hasn't failed. Thanks to the | 2080 | * read everything that hasn't failed. Thanks to the |
1958 | * stripe cache, it is possible that some or all of these | 2081 | * stripe cache, it is possible that some or all of these |
1959 | * pages are going to be uptodate. | 2082 | * pages are going to be uptodate. |
1960 | */ | 2083 | */ |
1961 | for (stripe = 0; stripe < bbio->num_stripes; stripe++) { | 2084 | for (stripe = 0; stripe < rbio->real_stripes; stripe++) { |
1962 | if (rbio->faila == stripe || rbio->failb == stripe) { | 2085 | if (rbio->faila == stripe || rbio->failb == stripe) { |
1963 | atomic_inc(&rbio->bbio->error); | 2086 | atomic_inc(&rbio->error); |
1964 | continue; | 2087 | continue; |
1965 | } | 2088 | } |
1966 | 2089 | ||
@@ -1990,7 +2113,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) | |||
1990 | * were up to date, or we might have no bios to read because | 2113 | * were up to date, or we might have no bios to read because |
1991 | * the devices were gone. | 2114 | * the devices were gone. |
1992 | */ | 2115 | */ |
1993 | if (atomic_read(&rbio->bbio->error) <= rbio->bbio->max_errors) { | 2116 | if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) { |
1994 | __raid_recover_end_io(rbio); | 2117 | __raid_recover_end_io(rbio); |
1995 | goto out; | 2118 | goto out; |
1996 | } else { | 2119 | } else { |
@@ -2002,7 +2125,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) | |||
2002 | * the bbio may be freed once we submit the last bio. Make sure | 2125 | * the bbio may be freed once we submit the last bio. Make sure |
2003 | * not to touch it after that | 2126 | * not to touch it after that |
2004 | */ | 2127 | */ |
2005 | atomic_set(&bbio->stripes_pending, bios_to_read); | 2128 | atomic_set(&rbio->stripes_pending, bios_to_read); |
2006 | while (1) { | 2129 | while (1) { |
2007 | bio = bio_list_pop(&bio_list); | 2130 | bio = bio_list_pop(&bio_list); |
2008 | if (!bio) | 2131 | if (!bio) |
@@ -2021,7 +2144,7 @@ out: | |||
2021 | return 0; | 2144 | return 0; |
2022 | 2145 | ||
2023 | cleanup: | 2146 | cleanup: |
2024 | if (rbio->read_rebuild) | 2147 | if (rbio->operation == BTRFS_RBIO_READ_REBUILD) |
2025 | rbio_orig_end_io(rbio, -EIO, 0); | 2148 | rbio_orig_end_io(rbio, -EIO, 0); |
2026 | return -EIO; | 2149 | return -EIO; |
2027 | } | 2150 | } |
@@ -2034,34 +2157,42 @@ cleanup: | |||
2034 | */ | 2157 | */ |
2035 | int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, | 2158 | int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, |
2036 | struct btrfs_bio *bbio, u64 *raid_map, | 2159 | struct btrfs_bio *bbio, u64 *raid_map, |
2037 | u64 stripe_len, int mirror_num) | 2160 | u64 stripe_len, int mirror_num, int generic_io) |
2038 | { | 2161 | { |
2039 | struct btrfs_raid_bio *rbio; | 2162 | struct btrfs_raid_bio *rbio; |
2040 | int ret; | 2163 | int ret; |
2041 | 2164 | ||
2042 | rbio = alloc_rbio(root, bbio, raid_map, stripe_len); | 2165 | rbio = alloc_rbio(root, bbio, raid_map, stripe_len); |
2043 | if (IS_ERR(rbio)) | 2166 | if (IS_ERR(rbio)) { |
2167 | __free_bbio_and_raid_map(bbio, raid_map, generic_io); | ||
2044 | return PTR_ERR(rbio); | 2168 | return PTR_ERR(rbio); |
2169 | } | ||
2045 | 2170 | ||
2046 | rbio->read_rebuild = 1; | 2171 | rbio->operation = BTRFS_RBIO_READ_REBUILD; |
2047 | bio_list_add(&rbio->bio_list, bio); | 2172 | bio_list_add(&rbio->bio_list, bio); |
2048 | rbio->bio_list_bytes = bio->bi_iter.bi_size; | 2173 | rbio->bio_list_bytes = bio->bi_iter.bi_size; |
2049 | 2174 | ||
2050 | rbio->faila = find_logical_bio_stripe(rbio, bio); | 2175 | rbio->faila = find_logical_bio_stripe(rbio, bio); |
2051 | if (rbio->faila == -1) { | 2176 | if (rbio->faila == -1) { |
2052 | BUG(); | 2177 | BUG(); |
2053 | kfree(raid_map); | 2178 | __free_bbio_and_raid_map(bbio, raid_map, generic_io); |
2054 | kfree(bbio); | ||
2055 | kfree(rbio); | 2179 | kfree(rbio); |
2056 | return -EIO; | 2180 | return -EIO; |
2057 | } | 2181 | } |
2058 | 2182 | ||
2183 | if (generic_io) { | ||
2184 | btrfs_bio_counter_inc_noblocked(root->fs_info); | ||
2185 | rbio->generic_bio_cnt = 1; | ||
2186 | } else { | ||
2187 | set_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags); | ||
2188 | } | ||
2189 | |||
2059 | /* | 2190 | /* |
2060 | * reconstruct from the q stripe if they are | 2191 | * reconstruct from the q stripe if they are |
2061 | * asking for mirror 3 | 2192 | * asking for mirror 3 |
2062 | */ | 2193 | */ |
2063 | if (mirror_num == 3) | 2194 | if (mirror_num == 3) |
2064 | rbio->failb = bbio->num_stripes - 2; | 2195 | rbio->failb = rbio->real_stripes - 2; |
2065 | 2196 | ||
2066 | ret = lock_stripe_add(rbio); | 2197 | ret = lock_stripe_add(rbio); |
2067 | 2198 | ||
@@ -2098,3 +2229,483 @@ static void read_rebuild_work(struct btrfs_work *work) | |||
2098 | rbio = container_of(work, struct btrfs_raid_bio, work); | 2229 | rbio = container_of(work, struct btrfs_raid_bio, work); |
2099 | __raid56_parity_recover(rbio); | 2230 | __raid56_parity_recover(rbio); |
2100 | } | 2231 | } |
2232 | |||
2233 | /* | ||
2234 | * The following code is used to scrub/replace the parity stripe | ||
2235 | * | ||
2236 | * Note: We need make sure all the pages that add into the scrub/replace | ||
2237 | * raid bio are correct and not be changed during the scrub/replace. That | ||
2238 | * is those pages just hold metadata or file data with checksum. | ||
2239 | */ | ||
2240 | |||
2241 | struct btrfs_raid_bio * | ||
2242 | raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio, | ||
2243 | struct btrfs_bio *bbio, u64 *raid_map, | ||
2244 | u64 stripe_len, struct btrfs_device *scrub_dev, | ||
2245 | unsigned long *dbitmap, int stripe_nsectors) | ||
2246 | { | ||
2247 | struct btrfs_raid_bio *rbio; | ||
2248 | int i; | ||
2249 | |||
2250 | rbio = alloc_rbio(root, bbio, raid_map, stripe_len); | ||
2251 | if (IS_ERR(rbio)) | ||
2252 | return NULL; | ||
2253 | bio_list_add(&rbio->bio_list, bio); | ||
2254 | /* | ||
2255 | * This is a special bio which is used to hold the completion handler | ||
2256 | * and make the scrub rbio is similar to the other types | ||
2257 | */ | ||
2258 | ASSERT(!bio->bi_iter.bi_size); | ||
2259 | rbio->operation = BTRFS_RBIO_PARITY_SCRUB; | ||
2260 | |||
2261 | for (i = 0; i < rbio->real_stripes; i++) { | ||
2262 | if (bbio->stripes[i].dev == scrub_dev) { | ||
2263 | rbio->scrubp = i; | ||
2264 | break; | ||
2265 | } | ||
2266 | } | ||
2267 | |||
2268 | /* Now we just support the sectorsize equals to page size */ | ||
2269 | ASSERT(root->sectorsize == PAGE_SIZE); | ||
2270 | ASSERT(rbio->stripe_npages == stripe_nsectors); | ||
2271 | bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors); | ||
2272 | |||
2273 | return rbio; | ||
2274 | } | ||
2275 | |||
2276 | void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio, | ||
2277 | struct page *page, u64 logical) | ||
2278 | { | ||
2279 | int stripe_offset; | ||
2280 | int index; | ||
2281 | |||
2282 | ASSERT(logical >= rbio->raid_map[0]); | ||
2283 | ASSERT(logical + PAGE_SIZE <= rbio->raid_map[0] + | ||
2284 | rbio->stripe_len * rbio->nr_data); | ||
2285 | stripe_offset = (int)(logical - rbio->raid_map[0]); | ||
2286 | index = stripe_offset >> PAGE_CACHE_SHIFT; | ||
2287 | rbio->bio_pages[index] = page; | ||
2288 | } | ||
2289 | |||
2290 | /* | ||
2291 | * We just scrub the parity that we have correct data on the same horizontal, | ||
2292 | * so we needn't allocate all pages for all the stripes. | ||
2293 | */ | ||
2294 | static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio) | ||
2295 | { | ||
2296 | int i; | ||
2297 | int bit; | ||
2298 | int index; | ||
2299 | struct page *page; | ||
2300 | |||
2301 | for_each_set_bit(bit, rbio->dbitmap, rbio->stripe_npages) { | ||
2302 | for (i = 0; i < rbio->real_stripes; i++) { | ||
2303 | index = i * rbio->stripe_npages + bit; | ||
2304 | if (rbio->stripe_pages[index]) | ||
2305 | continue; | ||
2306 | |||
2307 | page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
2308 | if (!page) | ||
2309 | return -ENOMEM; | ||
2310 | rbio->stripe_pages[index] = page; | ||
2311 | ClearPageUptodate(page); | ||
2312 | } | ||
2313 | } | ||
2314 | return 0; | ||
2315 | } | ||
2316 | |||
2317 | /* | ||
2318 | * end io function used by finish_rmw. When we finally | ||
2319 | * get here, we've written a full stripe | ||
2320 | */ | ||
2321 | static void raid_write_parity_end_io(struct bio *bio, int err) | ||
2322 | { | ||
2323 | struct btrfs_raid_bio *rbio = bio->bi_private; | ||
2324 | |||
2325 | if (err) | ||
2326 | fail_bio_stripe(rbio, bio); | ||
2327 | |||
2328 | bio_put(bio); | ||
2329 | |||
2330 | if (!atomic_dec_and_test(&rbio->stripes_pending)) | ||
2331 | return; | ||
2332 | |||
2333 | err = 0; | ||
2334 | |||
2335 | if (atomic_read(&rbio->error)) | ||
2336 | err = -EIO; | ||
2337 | |||
2338 | rbio_orig_end_io(rbio, err, 0); | ||
2339 | } | ||
2340 | |||
2341 | static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, | ||
2342 | int need_check) | ||
2343 | { | ||
2344 | struct btrfs_bio *bbio = rbio->bbio; | ||
2345 | void *pointers[rbio->real_stripes]; | ||
2346 | DECLARE_BITMAP(pbitmap, rbio->stripe_npages); | ||
2347 | int nr_data = rbio->nr_data; | ||
2348 | int stripe; | ||
2349 | int pagenr; | ||
2350 | int p_stripe = -1; | ||
2351 | int q_stripe = -1; | ||
2352 | struct page *p_page = NULL; | ||
2353 | struct page *q_page = NULL; | ||
2354 | struct bio_list bio_list; | ||
2355 | struct bio *bio; | ||
2356 | int is_replace = 0; | ||
2357 | int ret; | ||
2358 | |||
2359 | bio_list_init(&bio_list); | ||
2360 | |||
2361 | if (rbio->real_stripes - rbio->nr_data == 1) { | ||
2362 | p_stripe = rbio->real_stripes - 1; | ||
2363 | } else if (rbio->real_stripes - rbio->nr_data == 2) { | ||
2364 | p_stripe = rbio->real_stripes - 2; | ||
2365 | q_stripe = rbio->real_stripes - 1; | ||
2366 | } else { | ||
2367 | BUG(); | ||
2368 | } | ||
2369 | |||
2370 | if (bbio->num_tgtdevs && bbio->tgtdev_map[rbio->scrubp]) { | ||
2371 | is_replace = 1; | ||
2372 | bitmap_copy(pbitmap, rbio->dbitmap, rbio->stripe_npages); | ||
2373 | } | ||
2374 | |||
2375 | /* | ||
2376 | * Because the higher layers(scrubber) are unlikely to | ||
2377 | * use this area of the disk again soon, so don't cache | ||
2378 | * it. | ||
2379 | */ | ||
2380 | clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); | ||
2381 | |||
2382 | if (!need_check) | ||
2383 | goto writeback; | ||
2384 | |||
2385 | p_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
2386 | if (!p_page) | ||
2387 | goto cleanup; | ||
2388 | SetPageUptodate(p_page); | ||
2389 | |||
2390 | if (q_stripe != -1) { | ||
2391 | q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
2392 | if (!q_page) { | ||
2393 | __free_page(p_page); | ||
2394 | goto cleanup; | ||
2395 | } | ||
2396 | SetPageUptodate(q_page); | ||
2397 | } | ||
2398 | |||
2399 | atomic_set(&rbio->error, 0); | ||
2400 | |||
2401 | for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { | ||
2402 | struct page *p; | ||
2403 | void *parity; | ||
2404 | /* first collect one page from each data stripe */ | ||
2405 | for (stripe = 0; stripe < nr_data; stripe++) { | ||
2406 | p = page_in_rbio(rbio, stripe, pagenr, 0); | ||
2407 | pointers[stripe] = kmap(p); | ||
2408 | } | ||
2409 | |||
2410 | /* then add the parity stripe */ | ||
2411 | pointers[stripe++] = kmap(p_page); | ||
2412 | |||
2413 | if (q_stripe != -1) { | ||
2414 | |||
2415 | /* | ||
2416 | * raid6, add the qstripe and call the | ||
2417 | * library function to fill in our p/q | ||
2418 | */ | ||
2419 | pointers[stripe++] = kmap(q_page); | ||
2420 | |||
2421 | raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE, | ||
2422 | pointers); | ||
2423 | } else { | ||
2424 | /* raid5 */ | ||
2425 | memcpy(pointers[nr_data], pointers[0], PAGE_SIZE); | ||
2426 | run_xor(pointers + 1, nr_data - 1, PAGE_CACHE_SIZE); | ||
2427 | } | ||
2428 | |||
2429 | /* Check scrubbing pairty and repair it */ | ||
2430 | p = rbio_stripe_page(rbio, rbio->scrubp, pagenr); | ||
2431 | parity = kmap(p); | ||
2432 | if (memcmp(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE)) | ||
2433 | memcpy(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE); | ||
2434 | else | ||
2435 | /* Parity is right, needn't writeback */ | ||
2436 | bitmap_clear(rbio->dbitmap, pagenr, 1); | ||
2437 | kunmap(p); | ||
2438 | |||
2439 | for (stripe = 0; stripe < rbio->real_stripes; stripe++) | ||
2440 | kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); | ||
2441 | } | ||
2442 | |||
2443 | __free_page(p_page); | ||
2444 | if (q_page) | ||
2445 | __free_page(q_page); | ||
2446 | |||
2447 | writeback: | ||
2448 | /* | ||
2449 | * time to start writing. Make bios for everything from the | ||
2450 | * higher layers (the bio_list in our rbio) and our p/q. Ignore | ||
2451 | * everything else. | ||
2452 | */ | ||
2453 | for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { | ||
2454 | struct page *page; | ||
2455 | |||
2456 | page = rbio_stripe_page(rbio, rbio->scrubp, pagenr); | ||
2457 | ret = rbio_add_io_page(rbio, &bio_list, | ||
2458 | page, rbio->scrubp, pagenr, rbio->stripe_len); | ||
2459 | if (ret) | ||
2460 | goto cleanup; | ||
2461 | } | ||
2462 | |||
2463 | if (!is_replace) | ||
2464 | goto submit_write; | ||
2465 | |||
2466 | for_each_set_bit(pagenr, pbitmap, rbio->stripe_npages) { | ||
2467 | struct page *page; | ||
2468 | |||
2469 | page = rbio_stripe_page(rbio, rbio->scrubp, pagenr); | ||
2470 | ret = rbio_add_io_page(rbio, &bio_list, page, | ||
2471 | bbio->tgtdev_map[rbio->scrubp], | ||
2472 | pagenr, rbio->stripe_len); | ||
2473 | if (ret) | ||
2474 | goto cleanup; | ||
2475 | } | ||
2476 | |||
2477 | submit_write: | ||
2478 | nr_data = bio_list_size(&bio_list); | ||
2479 | if (!nr_data) { | ||
2480 | /* Every parity is right */ | ||
2481 | rbio_orig_end_io(rbio, 0, 0); | ||
2482 | return; | ||
2483 | } | ||
2484 | |||
2485 | atomic_set(&rbio->stripes_pending, nr_data); | ||
2486 | |||
2487 | while (1) { | ||
2488 | bio = bio_list_pop(&bio_list); | ||
2489 | if (!bio) | ||
2490 | break; | ||
2491 | |||
2492 | bio->bi_private = rbio; | ||
2493 | bio->bi_end_io = raid_write_parity_end_io; | ||
2494 | BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags)); | ||
2495 | submit_bio(WRITE, bio); | ||
2496 | } | ||
2497 | return; | ||
2498 | |||
2499 | cleanup: | ||
2500 | rbio_orig_end_io(rbio, -EIO, 0); | ||
2501 | } | ||
2502 | |||
2503 | static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe) | ||
2504 | { | ||
2505 | if (stripe >= 0 && stripe < rbio->nr_data) | ||
2506 | return 1; | ||
2507 | return 0; | ||
2508 | } | ||
2509 | |||
2510 | /* | ||
2511 | * While we're doing the parity check and repair, we could have errors | ||
2512 | * in reading pages off the disk. This checks for errors and if we're | ||
2513 | * not able to read the page it'll trigger parity reconstruction. The | ||
2514 | * parity scrub will be finished after we've reconstructed the failed | ||
2515 | * stripes | ||
2516 | */ | ||
2517 | static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio) | ||
2518 | { | ||
2519 | if (atomic_read(&rbio->error) > rbio->bbio->max_errors) | ||
2520 | goto cleanup; | ||
2521 | |||
2522 | if (rbio->faila >= 0 || rbio->failb >= 0) { | ||
2523 | int dfail = 0, failp = -1; | ||
2524 | |||
2525 | if (is_data_stripe(rbio, rbio->faila)) | ||
2526 | dfail++; | ||
2527 | else if (is_parity_stripe(rbio->faila)) | ||
2528 | failp = rbio->faila; | ||
2529 | |||
2530 | if (is_data_stripe(rbio, rbio->failb)) | ||
2531 | dfail++; | ||
2532 | else if (is_parity_stripe(rbio->failb)) | ||
2533 | failp = rbio->failb; | ||
2534 | |||
2535 | /* | ||
2536 | * Because we can not use a scrubbing parity to repair | ||
2537 | * the data, so the capability of the repair is declined. | ||
2538 | * (In the case of RAID5, we can not repair anything) | ||
2539 | */ | ||
2540 | if (dfail > rbio->bbio->max_errors - 1) | ||
2541 | goto cleanup; | ||
2542 | |||
2543 | /* | ||
2544 | * If all data is good, only parity is correctly, just | ||
2545 | * repair the parity. | ||
2546 | */ | ||
2547 | if (dfail == 0) { | ||
2548 | finish_parity_scrub(rbio, 0); | ||
2549 | return; | ||
2550 | } | ||
2551 | |||
2552 | /* | ||
2553 | * Here means we got one corrupted data stripe and one | ||
2554 | * corrupted parity on RAID6, if the corrupted parity | ||
2555 | * is scrubbing parity, luckly, use the other one to repair | ||
2556 | * the data, or we can not repair the data stripe. | ||
2557 | */ | ||
2558 | if (failp != rbio->scrubp) | ||
2559 | goto cleanup; | ||
2560 | |||
2561 | __raid_recover_end_io(rbio); | ||
2562 | } else { | ||
2563 | finish_parity_scrub(rbio, 1); | ||
2564 | } | ||
2565 | return; | ||
2566 | |||
2567 | cleanup: | ||
2568 | rbio_orig_end_io(rbio, -EIO, 0); | ||
2569 | } | ||
2570 | |||
2571 | /* | ||
2572 | * end io for the read phase of the rmw cycle. All the bios here are physical | ||
2573 | * stripe bios we've read from the disk so we can recalculate the parity of the | ||
2574 | * stripe. | ||
2575 | * | ||
2576 | * This will usually kick off finish_rmw once all the bios are read in, but it | ||
2577 | * may trigger parity reconstruction if we had any errors along the way | ||
2578 | */ | ||
2579 | static void raid56_parity_scrub_end_io(struct bio *bio, int err) | ||
2580 | { | ||
2581 | struct btrfs_raid_bio *rbio = bio->bi_private; | ||
2582 | |||
2583 | if (err) | ||
2584 | fail_bio_stripe(rbio, bio); | ||
2585 | else | ||
2586 | set_bio_pages_uptodate(bio); | ||
2587 | |||
2588 | bio_put(bio); | ||
2589 | |||
2590 | if (!atomic_dec_and_test(&rbio->stripes_pending)) | ||
2591 | return; | ||
2592 | |||
2593 | /* | ||
2594 | * this will normally call finish_rmw to start our write | ||
2595 | * but if there are any failed stripes we'll reconstruct | ||
2596 | * from parity first | ||
2597 | */ | ||
2598 | validate_rbio_for_parity_scrub(rbio); | ||
2599 | } | ||
2600 | |||
2601 | static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) | ||
2602 | { | ||
2603 | int bios_to_read = 0; | ||
2604 | struct bio_list bio_list; | ||
2605 | int ret; | ||
2606 | int pagenr; | ||
2607 | int stripe; | ||
2608 | struct bio *bio; | ||
2609 | |||
2610 | ret = alloc_rbio_essential_pages(rbio); | ||
2611 | if (ret) | ||
2612 | goto cleanup; | ||
2613 | |||
2614 | bio_list_init(&bio_list); | ||
2615 | |||
2616 | atomic_set(&rbio->error, 0); | ||
2617 | /* | ||
2618 | * build a list of bios to read all the missing parts of this | ||
2619 | * stripe | ||
2620 | */ | ||
2621 | for (stripe = 0; stripe < rbio->real_stripes; stripe++) { | ||
2622 | for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { | ||
2623 | struct page *page; | ||
2624 | /* | ||
2625 | * we want to find all the pages missing from | ||
2626 | * the rbio and read them from the disk. If | ||
2627 | * page_in_rbio finds a page in the bio list | ||
2628 | * we don't need to read it off the stripe. | ||
2629 | */ | ||
2630 | page = page_in_rbio(rbio, stripe, pagenr, 1); | ||
2631 | if (page) | ||
2632 | continue; | ||
2633 | |||
2634 | page = rbio_stripe_page(rbio, stripe, pagenr); | ||
2635 | /* | ||
2636 | * the bio cache may have handed us an uptodate | ||
2637 | * page. If so, be happy and use it | ||
2638 | */ | ||
2639 | if (PageUptodate(page)) | ||
2640 | continue; | ||
2641 | |||
2642 | ret = rbio_add_io_page(rbio, &bio_list, page, | ||
2643 | stripe, pagenr, rbio->stripe_len); | ||
2644 | if (ret) | ||
2645 | goto cleanup; | ||
2646 | } | ||
2647 | } | ||
2648 | |||
2649 | bios_to_read = bio_list_size(&bio_list); | ||
2650 | if (!bios_to_read) { | ||
2651 | /* | ||
2652 | * this can happen if others have merged with | ||
2653 | * us, it means there is nothing left to read. | ||
2654 | * But if there are missing devices it may not be | ||
2655 | * safe to do the full stripe write yet. | ||
2656 | */ | ||
2657 | goto finish; | ||
2658 | } | ||
2659 | |||
2660 | /* | ||
2661 | * the bbio may be freed once we submit the last bio. Make sure | ||
2662 | * not to touch it after that | ||
2663 | */ | ||
2664 | atomic_set(&rbio->stripes_pending, bios_to_read); | ||
2665 | while (1) { | ||
2666 | bio = bio_list_pop(&bio_list); | ||
2667 | if (!bio) | ||
2668 | break; | ||
2669 | |||
2670 | bio->bi_private = rbio; | ||
2671 | bio->bi_end_io = raid56_parity_scrub_end_io; | ||
2672 | |||
2673 | btrfs_bio_wq_end_io(rbio->fs_info, bio, | ||
2674 | BTRFS_WQ_ENDIO_RAID56); | ||
2675 | |||
2676 | BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags)); | ||
2677 | submit_bio(READ, bio); | ||
2678 | } | ||
2679 | /* the actual write will happen once the reads are done */ | ||
2680 | return; | ||
2681 | |||
2682 | cleanup: | ||
2683 | rbio_orig_end_io(rbio, -EIO, 0); | ||
2684 | return; | ||
2685 | |||
2686 | finish: | ||
2687 | validate_rbio_for_parity_scrub(rbio); | ||
2688 | } | ||
2689 | |||
2690 | static void scrub_parity_work(struct btrfs_work *work) | ||
2691 | { | ||
2692 | struct btrfs_raid_bio *rbio; | ||
2693 | |||
2694 | rbio = container_of(work, struct btrfs_raid_bio, work); | ||
2695 | raid56_parity_scrub_stripe(rbio); | ||
2696 | } | ||
2697 | |||
2698 | static void async_scrub_parity(struct btrfs_raid_bio *rbio) | ||
2699 | { | ||
2700 | btrfs_init_work(&rbio->work, btrfs_rmw_helper, | ||
2701 | scrub_parity_work, NULL, NULL); | ||
2702 | |||
2703 | btrfs_queue_work(rbio->fs_info->rmw_workers, | ||
2704 | &rbio->work); | ||
2705 | } | ||
2706 | |||
2707 | void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio) | ||
2708 | { | ||
2709 | if (!lock_stripe_add(rbio)) | ||
2710 | async_scrub_parity(rbio); | ||
2711 | } | ||
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h index ea5d73bfdfbe..31d4a157b5e3 100644 --- a/fs/btrfs/raid56.h +++ b/fs/btrfs/raid56.h | |||
@@ -39,13 +39,25 @@ static inline int nr_data_stripes(struct map_lookup *map) | |||
39 | #define is_parity_stripe(x) (((x) == RAID5_P_STRIPE) || \ | 39 | #define is_parity_stripe(x) (((x) == RAID5_P_STRIPE) || \ |
40 | ((x) == RAID6_Q_STRIPE)) | 40 | ((x) == RAID6_Q_STRIPE)) |
41 | 41 | ||
42 | struct btrfs_raid_bio; | ||
43 | struct btrfs_device; | ||
44 | |||
42 | int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, | 45 | int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, |
43 | struct btrfs_bio *bbio, u64 *raid_map, | 46 | struct btrfs_bio *bbio, u64 *raid_map, |
44 | u64 stripe_len, int mirror_num); | 47 | u64 stripe_len, int mirror_num, int generic_io); |
45 | int raid56_parity_write(struct btrfs_root *root, struct bio *bio, | 48 | int raid56_parity_write(struct btrfs_root *root, struct bio *bio, |
46 | struct btrfs_bio *bbio, u64 *raid_map, | 49 | struct btrfs_bio *bbio, u64 *raid_map, |
47 | u64 stripe_len); | 50 | u64 stripe_len); |
48 | 51 | ||
52 | struct btrfs_raid_bio * | ||
53 | raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio, | ||
54 | struct btrfs_bio *bbio, u64 *raid_map, | ||
55 | u64 stripe_len, struct btrfs_device *scrub_dev, | ||
56 | unsigned long *dbitmap, int stripe_nsectors); | ||
57 | void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio, | ||
58 | struct page *page, u64 logical); | ||
59 | void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio); | ||
60 | |||
49 | int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info); | 61 | int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info); |
50 | void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info); | 62 | void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info); |
51 | #endif | 63 | #endif |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index efa083113827..f2bb13a23f86 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -63,10 +63,18 @@ struct scrub_ctx; | |||
63 | */ | 63 | */ |
64 | #define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */ | 64 | #define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */ |
65 | 65 | ||
66 | struct scrub_recover { | ||
67 | atomic_t refs; | ||
68 | struct btrfs_bio *bbio; | ||
69 | u64 *raid_map; | ||
70 | u64 map_length; | ||
71 | }; | ||
72 | |||
66 | struct scrub_page { | 73 | struct scrub_page { |
67 | struct scrub_block *sblock; | 74 | struct scrub_block *sblock; |
68 | struct page *page; | 75 | struct page *page; |
69 | struct btrfs_device *dev; | 76 | struct btrfs_device *dev; |
77 | struct list_head list; | ||
70 | u64 flags; /* extent flags */ | 78 | u64 flags; /* extent flags */ |
71 | u64 generation; | 79 | u64 generation; |
72 | u64 logical; | 80 | u64 logical; |
@@ -79,6 +87,8 @@ struct scrub_page { | |||
79 | unsigned int io_error:1; | 87 | unsigned int io_error:1; |
80 | }; | 88 | }; |
81 | u8 csum[BTRFS_CSUM_SIZE]; | 89 | u8 csum[BTRFS_CSUM_SIZE]; |
90 | |||
91 | struct scrub_recover *recover; | ||
82 | }; | 92 | }; |
83 | 93 | ||
84 | struct scrub_bio { | 94 | struct scrub_bio { |
@@ -105,14 +115,52 @@ struct scrub_block { | |||
105 | atomic_t outstanding_pages; | 115 | atomic_t outstanding_pages; |
106 | atomic_t ref_count; /* free mem on transition to zero */ | 116 | atomic_t ref_count; /* free mem on transition to zero */ |
107 | struct scrub_ctx *sctx; | 117 | struct scrub_ctx *sctx; |
118 | struct scrub_parity *sparity; | ||
108 | struct { | 119 | struct { |
109 | unsigned int header_error:1; | 120 | unsigned int header_error:1; |
110 | unsigned int checksum_error:1; | 121 | unsigned int checksum_error:1; |
111 | unsigned int no_io_error_seen:1; | 122 | unsigned int no_io_error_seen:1; |
112 | unsigned int generation_error:1; /* also sets header_error */ | 123 | unsigned int generation_error:1; /* also sets header_error */ |
124 | |||
125 | /* The following is for the data used to check parity */ | ||
126 | /* It is for the data with checksum */ | ||
127 | unsigned int data_corrected:1; | ||
113 | }; | 128 | }; |
114 | }; | 129 | }; |
115 | 130 | ||
131 | /* Used for the chunks with parity stripe such RAID5/6 */ | ||
132 | struct scrub_parity { | ||
133 | struct scrub_ctx *sctx; | ||
134 | |||
135 | struct btrfs_device *scrub_dev; | ||
136 | |||
137 | u64 logic_start; | ||
138 | |||
139 | u64 logic_end; | ||
140 | |||
141 | int nsectors; | ||
142 | |||
143 | int stripe_len; | ||
144 | |||
145 | atomic_t ref_count; | ||
146 | |||
147 | struct list_head spages; | ||
148 | |||
149 | /* Work of parity check and repair */ | ||
150 | struct btrfs_work work; | ||
151 | |||
152 | /* Mark the parity blocks which have data */ | ||
153 | unsigned long *dbitmap; | ||
154 | |||
155 | /* | ||
156 | * Mark the parity blocks which have data, but errors happen when | ||
157 | * read data or check data | ||
158 | */ | ||
159 | unsigned long *ebitmap; | ||
160 | |||
161 | unsigned long bitmap[0]; | ||
162 | }; | ||
163 | |||
116 | struct scrub_wr_ctx { | 164 | struct scrub_wr_ctx { |
117 | struct scrub_bio *wr_curr_bio; | 165 | struct scrub_bio *wr_curr_bio; |
118 | struct btrfs_device *tgtdev; | 166 | struct btrfs_device *tgtdev; |
@@ -196,7 +244,7 @@ static int scrub_setup_recheck_block(struct scrub_ctx *sctx, | |||
196 | static void scrub_recheck_block(struct btrfs_fs_info *fs_info, | 244 | static void scrub_recheck_block(struct btrfs_fs_info *fs_info, |
197 | struct scrub_block *sblock, int is_metadata, | 245 | struct scrub_block *sblock, int is_metadata, |
198 | int have_csum, u8 *csum, u64 generation, | 246 | int have_csum, u8 *csum, u64 generation, |
199 | u16 csum_size); | 247 | u16 csum_size, int retry_failed_mirror); |
200 | static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | 248 | static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, |
201 | struct scrub_block *sblock, | 249 | struct scrub_block *sblock, |
202 | int is_metadata, int have_csum, | 250 | int is_metadata, int have_csum, |
@@ -218,6 +266,8 @@ static void scrub_block_get(struct scrub_block *sblock); | |||
218 | static void scrub_block_put(struct scrub_block *sblock); | 266 | static void scrub_block_put(struct scrub_block *sblock); |
219 | static void scrub_page_get(struct scrub_page *spage); | 267 | static void scrub_page_get(struct scrub_page *spage); |
220 | static void scrub_page_put(struct scrub_page *spage); | 268 | static void scrub_page_put(struct scrub_page *spage); |
269 | static void scrub_parity_get(struct scrub_parity *sparity); | ||
270 | static void scrub_parity_put(struct scrub_parity *sparity); | ||
221 | static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx, | 271 | static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx, |
222 | struct scrub_page *spage); | 272 | struct scrub_page *spage); |
223 | static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | 273 | static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, |
@@ -790,6 +840,20 @@ out: | |||
790 | scrub_pending_trans_workers_dec(sctx); | 840 | scrub_pending_trans_workers_dec(sctx); |
791 | } | 841 | } |
792 | 842 | ||
843 | static inline void scrub_get_recover(struct scrub_recover *recover) | ||
844 | { | ||
845 | atomic_inc(&recover->refs); | ||
846 | } | ||
847 | |||
848 | static inline void scrub_put_recover(struct scrub_recover *recover) | ||
849 | { | ||
850 | if (atomic_dec_and_test(&recover->refs)) { | ||
851 | kfree(recover->bbio); | ||
852 | kfree(recover->raid_map); | ||
853 | kfree(recover); | ||
854 | } | ||
855 | } | ||
856 | |||
793 | /* | 857 | /* |
794 | * scrub_handle_errored_block gets called when either verification of the | 858 | * scrub_handle_errored_block gets called when either verification of the |
795 | * pages failed or the bio failed to read, e.g. with EIO. In the latter | 859 | * pages failed or the bio failed to read, e.g. with EIO. In the latter |
@@ -906,7 +970,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
906 | 970 | ||
907 | /* build and submit the bios for the failed mirror, check checksums */ | 971 | /* build and submit the bios for the failed mirror, check checksums */ |
908 | scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum, | 972 | scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum, |
909 | csum, generation, sctx->csum_size); | 973 | csum, generation, sctx->csum_size, 1); |
910 | 974 | ||
911 | if (!sblock_bad->header_error && !sblock_bad->checksum_error && | 975 | if (!sblock_bad->header_error && !sblock_bad->checksum_error && |
912 | sblock_bad->no_io_error_seen) { | 976 | sblock_bad->no_io_error_seen) { |
@@ -920,6 +984,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
920 | */ | 984 | */ |
921 | spin_lock(&sctx->stat_lock); | 985 | spin_lock(&sctx->stat_lock); |
922 | sctx->stat.unverified_errors++; | 986 | sctx->stat.unverified_errors++; |
987 | sblock_to_check->data_corrected = 1; | ||
923 | spin_unlock(&sctx->stat_lock); | 988 | spin_unlock(&sctx->stat_lock); |
924 | 989 | ||
925 | if (sctx->is_dev_replace) | 990 | if (sctx->is_dev_replace) |
@@ -1019,7 +1084,7 @@ nodatasum_case: | |||
1019 | /* build and submit the bios, check checksums */ | 1084 | /* build and submit the bios, check checksums */ |
1020 | scrub_recheck_block(fs_info, sblock_other, is_metadata, | 1085 | scrub_recheck_block(fs_info, sblock_other, is_metadata, |
1021 | have_csum, csum, generation, | 1086 | have_csum, csum, generation, |
1022 | sctx->csum_size); | 1087 | sctx->csum_size, 0); |
1023 | 1088 | ||
1024 | if (!sblock_other->header_error && | 1089 | if (!sblock_other->header_error && |
1025 | !sblock_other->checksum_error && | 1090 | !sblock_other->checksum_error && |
@@ -1169,7 +1234,7 @@ nodatasum_case: | |||
1169 | */ | 1234 | */ |
1170 | scrub_recheck_block(fs_info, sblock_bad, | 1235 | scrub_recheck_block(fs_info, sblock_bad, |
1171 | is_metadata, have_csum, csum, | 1236 | is_metadata, have_csum, csum, |
1172 | generation, sctx->csum_size); | 1237 | generation, sctx->csum_size, 1); |
1173 | if (!sblock_bad->header_error && | 1238 | if (!sblock_bad->header_error && |
1174 | !sblock_bad->checksum_error && | 1239 | !sblock_bad->checksum_error && |
1175 | sblock_bad->no_io_error_seen) | 1240 | sblock_bad->no_io_error_seen) |
@@ -1180,6 +1245,7 @@ nodatasum_case: | |||
1180 | corrected_error: | 1245 | corrected_error: |
1181 | spin_lock(&sctx->stat_lock); | 1246 | spin_lock(&sctx->stat_lock); |
1182 | sctx->stat.corrected_errors++; | 1247 | sctx->stat.corrected_errors++; |
1248 | sblock_to_check->data_corrected = 1; | ||
1183 | spin_unlock(&sctx->stat_lock); | 1249 | spin_unlock(&sctx->stat_lock); |
1184 | printk_ratelimited_in_rcu(KERN_ERR | 1250 | printk_ratelimited_in_rcu(KERN_ERR |
1185 | "BTRFS: fixed up error at logical %llu on dev %s\n", | 1251 | "BTRFS: fixed up error at logical %llu on dev %s\n", |
@@ -1201,11 +1267,18 @@ out: | |||
1201 | mirror_index++) { | 1267 | mirror_index++) { |
1202 | struct scrub_block *sblock = sblocks_for_recheck + | 1268 | struct scrub_block *sblock = sblocks_for_recheck + |
1203 | mirror_index; | 1269 | mirror_index; |
1270 | struct scrub_recover *recover; | ||
1204 | int page_index; | 1271 | int page_index; |
1205 | 1272 | ||
1206 | for (page_index = 0; page_index < sblock->page_count; | 1273 | for (page_index = 0; page_index < sblock->page_count; |
1207 | page_index++) { | 1274 | page_index++) { |
1208 | sblock->pagev[page_index]->sblock = NULL; | 1275 | sblock->pagev[page_index]->sblock = NULL; |
1276 | recover = sblock->pagev[page_index]->recover; | ||
1277 | if (recover) { | ||
1278 | scrub_put_recover(recover); | ||
1279 | sblock->pagev[page_index]->recover = | ||
1280 | NULL; | ||
1281 | } | ||
1209 | scrub_page_put(sblock->pagev[page_index]); | 1282 | scrub_page_put(sblock->pagev[page_index]); |
1210 | } | 1283 | } |
1211 | } | 1284 | } |
@@ -1215,14 +1288,63 @@ out: | |||
1215 | return 0; | 1288 | return 0; |
1216 | } | 1289 | } |
1217 | 1290 | ||
1291 | static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio, u64 *raid_map) | ||
1292 | { | ||
1293 | if (raid_map) { | ||
1294 | if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE) | ||
1295 | return 3; | ||
1296 | else | ||
1297 | return 2; | ||
1298 | } else { | ||
1299 | return (int)bbio->num_stripes; | ||
1300 | } | ||
1301 | } | ||
1302 | |||
1303 | static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map, | ||
1304 | u64 mapped_length, | ||
1305 | int nstripes, int mirror, | ||
1306 | int *stripe_index, | ||
1307 | u64 *stripe_offset) | ||
1308 | { | ||
1309 | int i; | ||
1310 | |||
1311 | if (raid_map) { | ||
1312 | /* RAID5/6 */ | ||
1313 | for (i = 0; i < nstripes; i++) { | ||
1314 | if (raid_map[i] == RAID6_Q_STRIPE || | ||
1315 | raid_map[i] == RAID5_P_STRIPE) | ||
1316 | continue; | ||
1317 | |||
1318 | if (logical >= raid_map[i] && | ||
1319 | logical < raid_map[i] + mapped_length) | ||
1320 | break; | ||
1321 | } | ||
1322 | |||
1323 | *stripe_index = i; | ||
1324 | *stripe_offset = logical - raid_map[i]; | ||
1325 | } else { | ||
1326 | /* The other RAID type */ | ||
1327 | *stripe_index = mirror; | ||
1328 | *stripe_offset = 0; | ||
1329 | } | ||
1330 | } | ||
1331 | |||
1218 | static int scrub_setup_recheck_block(struct scrub_ctx *sctx, | 1332 | static int scrub_setup_recheck_block(struct scrub_ctx *sctx, |
1219 | struct btrfs_fs_info *fs_info, | 1333 | struct btrfs_fs_info *fs_info, |
1220 | struct scrub_block *original_sblock, | 1334 | struct scrub_block *original_sblock, |
1221 | u64 length, u64 logical, | 1335 | u64 length, u64 logical, |
1222 | struct scrub_block *sblocks_for_recheck) | 1336 | struct scrub_block *sblocks_for_recheck) |
1223 | { | 1337 | { |
1338 | struct scrub_recover *recover; | ||
1339 | struct btrfs_bio *bbio; | ||
1340 | u64 *raid_map; | ||
1341 | u64 sublen; | ||
1342 | u64 mapped_length; | ||
1343 | u64 stripe_offset; | ||
1344 | int stripe_index; | ||
1224 | int page_index; | 1345 | int page_index; |
1225 | int mirror_index; | 1346 | int mirror_index; |
1347 | int nmirrors; | ||
1226 | int ret; | 1348 | int ret; |
1227 | 1349 | ||
1228 | /* | 1350 | /* |
@@ -1233,23 +1355,39 @@ static int scrub_setup_recheck_block(struct scrub_ctx *sctx, | |||
1233 | 1355 | ||
1234 | page_index = 0; | 1356 | page_index = 0; |
1235 | while (length > 0) { | 1357 | while (length > 0) { |
1236 | u64 sublen = min_t(u64, length, PAGE_SIZE); | 1358 | sublen = min_t(u64, length, PAGE_SIZE); |
1237 | u64 mapped_length = sublen; | 1359 | mapped_length = sublen; |
1238 | struct btrfs_bio *bbio = NULL; | 1360 | bbio = NULL; |
1361 | raid_map = NULL; | ||
1239 | 1362 | ||
1240 | /* | 1363 | /* |
1241 | * with a length of PAGE_SIZE, each returned stripe | 1364 | * with a length of PAGE_SIZE, each returned stripe |
1242 | * represents one mirror | 1365 | * represents one mirror |
1243 | */ | 1366 | */ |
1244 | ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical, | 1367 | ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical, |
1245 | &mapped_length, &bbio, 0); | 1368 | &mapped_length, &bbio, 0, &raid_map); |
1246 | if (ret || !bbio || mapped_length < sublen) { | 1369 | if (ret || !bbio || mapped_length < sublen) { |
1247 | kfree(bbio); | 1370 | kfree(bbio); |
1371 | kfree(raid_map); | ||
1248 | return -EIO; | 1372 | return -EIO; |
1249 | } | 1373 | } |
1250 | 1374 | ||
1375 | recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS); | ||
1376 | if (!recover) { | ||
1377 | kfree(bbio); | ||
1378 | kfree(raid_map); | ||
1379 | return -ENOMEM; | ||
1380 | } | ||
1381 | |||
1382 | atomic_set(&recover->refs, 1); | ||
1383 | recover->bbio = bbio; | ||
1384 | recover->raid_map = raid_map; | ||
1385 | recover->map_length = mapped_length; | ||
1386 | |||
1251 | BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO); | 1387 | BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO); |
1252 | for (mirror_index = 0; mirror_index < (int)bbio->num_stripes; | 1388 | |
1389 | nmirrors = scrub_nr_raid_mirrors(bbio, raid_map); | ||
1390 | for (mirror_index = 0; mirror_index < nmirrors; | ||
1253 | mirror_index++) { | 1391 | mirror_index++) { |
1254 | struct scrub_block *sblock; | 1392 | struct scrub_block *sblock; |
1255 | struct scrub_page *page; | 1393 | struct scrub_page *page; |
@@ -1265,26 +1403,38 @@ leave_nomem: | |||
1265 | spin_lock(&sctx->stat_lock); | 1403 | spin_lock(&sctx->stat_lock); |
1266 | sctx->stat.malloc_errors++; | 1404 | sctx->stat.malloc_errors++; |
1267 | spin_unlock(&sctx->stat_lock); | 1405 | spin_unlock(&sctx->stat_lock); |
1268 | kfree(bbio); | 1406 | scrub_put_recover(recover); |
1269 | return -ENOMEM; | 1407 | return -ENOMEM; |
1270 | } | 1408 | } |
1271 | scrub_page_get(page); | 1409 | scrub_page_get(page); |
1272 | sblock->pagev[page_index] = page; | 1410 | sblock->pagev[page_index] = page; |
1273 | page->logical = logical; | 1411 | page->logical = logical; |
1274 | page->physical = bbio->stripes[mirror_index].physical; | 1412 | |
1413 | scrub_stripe_index_and_offset(logical, raid_map, | ||
1414 | mapped_length, | ||
1415 | bbio->num_stripes, | ||
1416 | mirror_index, | ||
1417 | &stripe_index, | ||
1418 | &stripe_offset); | ||
1419 | page->physical = bbio->stripes[stripe_index].physical + | ||
1420 | stripe_offset; | ||
1421 | page->dev = bbio->stripes[stripe_index].dev; | ||
1422 | |||
1275 | BUG_ON(page_index >= original_sblock->page_count); | 1423 | BUG_ON(page_index >= original_sblock->page_count); |
1276 | page->physical_for_dev_replace = | 1424 | page->physical_for_dev_replace = |
1277 | original_sblock->pagev[page_index]-> | 1425 | original_sblock->pagev[page_index]-> |
1278 | physical_for_dev_replace; | 1426 | physical_for_dev_replace; |
1279 | /* for missing devices, dev->bdev is NULL */ | 1427 | /* for missing devices, dev->bdev is NULL */ |
1280 | page->dev = bbio->stripes[mirror_index].dev; | ||
1281 | page->mirror_num = mirror_index + 1; | 1428 | page->mirror_num = mirror_index + 1; |
1282 | sblock->page_count++; | 1429 | sblock->page_count++; |
1283 | page->page = alloc_page(GFP_NOFS); | 1430 | page->page = alloc_page(GFP_NOFS); |
1284 | if (!page->page) | 1431 | if (!page->page) |
1285 | goto leave_nomem; | 1432 | goto leave_nomem; |
1433 | |||
1434 | scrub_get_recover(recover); | ||
1435 | page->recover = recover; | ||
1286 | } | 1436 | } |
1287 | kfree(bbio); | 1437 | scrub_put_recover(recover); |
1288 | length -= sublen; | 1438 | length -= sublen; |
1289 | logical += sublen; | 1439 | logical += sublen; |
1290 | page_index++; | 1440 | page_index++; |
@@ -1293,6 +1443,51 @@ leave_nomem: | |||
1293 | return 0; | 1443 | return 0; |
1294 | } | 1444 | } |
1295 | 1445 | ||
1446 | struct scrub_bio_ret { | ||
1447 | struct completion event; | ||
1448 | int error; | ||
1449 | }; | ||
1450 | |||
1451 | static void scrub_bio_wait_endio(struct bio *bio, int error) | ||
1452 | { | ||
1453 | struct scrub_bio_ret *ret = bio->bi_private; | ||
1454 | |||
1455 | ret->error = error; | ||
1456 | complete(&ret->event); | ||
1457 | } | ||
1458 | |||
1459 | static inline int scrub_is_page_on_raid56(struct scrub_page *page) | ||
1460 | { | ||
1461 | return page->recover && page->recover->raid_map; | ||
1462 | } | ||
1463 | |||
1464 | static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, | ||
1465 | struct bio *bio, | ||
1466 | struct scrub_page *page) | ||
1467 | { | ||
1468 | struct scrub_bio_ret done; | ||
1469 | int ret; | ||
1470 | |||
1471 | init_completion(&done.event); | ||
1472 | done.error = 0; | ||
1473 | bio->bi_iter.bi_sector = page->logical >> 9; | ||
1474 | bio->bi_private = &done; | ||
1475 | bio->bi_end_io = scrub_bio_wait_endio; | ||
1476 | |||
1477 | ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio, | ||
1478 | page->recover->raid_map, | ||
1479 | page->recover->map_length, | ||
1480 | page->mirror_num, 0); | ||
1481 | if (ret) | ||
1482 | return ret; | ||
1483 | |||
1484 | wait_for_completion(&done.event); | ||
1485 | if (done.error) | ||
1486 | return -EIO; | ||
1487 | |||
1488 | return 0; | ||
1489 | } | ||
1490 | |||
1296 | /* | 1491 | /* |
1297 | * this function will check the on disk data for checksum errors, header | 1492 | * this function will check the on disk data for checksum errors, header |
1298 | * errors and read I/O errors. If any I/O errors happen, the exact pages | 1493 | * errors and read I/O errors. If any I/O errors happen, the exact pages |
@@ -1303,7 +1498,7 @@ leave_nomem: | |||
1303 | static void scrub_recheck_block(struct btrfs_fs_info *fs_info, | 1498 | static void scrub_recheck_block(struct btrfs_fs_info *fs_info, |
1304 | struct scrub_block *sblock, int is_metadata, | 1499 | struct scrub_block *sblock, int is_metadata, |
1305 | int have_csum, u8 *csum, u64 generation, | 1500 | int have_csum, u8 *csum, u64 generation, |
1306 | u16 csum_size) | 1501 | u16 csum_size, int retry_failed_mirror) |
1307 | { | 1502 | { |
1308 | int page_num; | 1503 | int page_num; |
1309 | 1504 | ||
@@ -1329,11 +1524,17 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, | |||
1329 | continue; | 1524 | continue; |
1330 | } | 1525 | } |
1331 | bio->bi_bdev = page->dev->bdev; | 1526 | bio->bi_bdev = page->dev->bdev; |
1332 | bio->bi_iter.bi_sector = page->physical >> 9; | ||
1333 | 1527 | ||
1334 | bio_add_page(bio, page->page, PAGE_SIZE, 0); | 1528 | bio_add_page(bio, page->page, PAGE_SIZE, 0); |
1335 | if (btrfsic_submit_bio_wait(READ, bio)) | 1529 | if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) { |
1336 | sblock->no_io_error_seen = 0; | 1530 | if (scrub_submit_raid56_bio_wait(fs_info, bio, page)) |
1531 | sblock->no_io_error_seen = 0; | ||
1532 | } else { | ||
1533 | bio->bi_iter.bi_sector = page->physical >> 9; | ||
1534 | |||
1535 | if (btrfsic_submit_bio_wait(READ, bio)) | ||
1536 | sblock->no_io_error_seen = 0; | ||
1537 | } | ||
1337 | 1538 | ||
1338 | bio_put(bio); | 1539 | bio_put(bio); |
1339 | } | 1540 | } |
@@ -1486,6 +1687,13 @@ static void scrub_write_block_to_dev_replace(struct scrub_block *sblock) | |||
1486 | { | 1687 | { |
1487 | int page_num; | 1688 | int page_num; |
1488 | 1689 | ||
1690 | /* | ||
1691 | * This block is used for the check of the parity on the source device, | ||
1692 | * so the data needn't be written into the destination device. | ||
1693 | */ | ||
1694 | if (sblock->sparity) | ||
1695 | return; | ||
1696 | |||
1489 | for (page_num = 0; page_num < sblock->page_count; page_num++) { | 1697 | for (page_num = 0; page_num < sblock->page_count; page_num++) { |
1490 | int ret; | 1698 | int ret; |
1491 | 1699 | ||
@@ -1867,6 +2075,9 @@ static void scrub_block_put(struct scrub_block *sblock) | |||
1867 | if (atomic_dec_and_test(&sblock->ref_count)) { | 2075 | if (atomic_dec_and_test(&sblock->ref_count)) { |
1868 | int i; | 2076 | int i; |
1869 | 2077 | ||
2078 | if (sblock->sparity) | ||
2079 | scrub_parity_put(sblock->sparity); | ||
2080 | |||
1870 | for (i = 0; i < sblock->page_count; i++) | 2081 | for (i = 0; i < sblock->page_count; i++) |
1871 | scrub_page_put(sblock->pagev[i]); | 2082 | scrub_page_put(sblock->pagev[i]); |
1872 | kfree(sblock); | 2083 | kfree(sblock); |
@@ -2124,9 +2335,51 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work) | |||
2124 | scrub_pending_bio_dec(sctx); | 2335 | scrub_pending_bio_dec(sctx); |
2125 | } | 2336 | } |
2126 | 2337 | ||
2338 | static inline void __scrub_mark_bitmap(struct scrub_parity *sparity, | ||
2339 | unsigned long *bitmap, | ||
2340 | u64 start, u64 len) | ||
2341 | { | ||
2342 | int offset; | ||
2343 | int nsectors; | ||
2344 | int sectorsize = sparity->sctx->dev_root->sectorsize; | ||
2345 | |||
2346 | if (len >= sparity->stripe_len) { | ||
2347 | bitmap_set(bitmap, 0, sparity->nsectors); | ||
2348 | return; | ||
2349 | } | ||
2350 | |||
2351 | start -= sparity->logic_start; | ||
2352 | offset = (int)do_div(start, sparity->stripe_len); | ||
2353 | offset /= sectorsize; | ||
2354 | nsectors = (int)len / sectorsize; | ||
2355 | |||
2356 | if (offset + nsectors <= sparity->nsectors) { | ||
2357 | bitmap_set(bitmap, offset, nsectors); | ||
2358 | return; | ||
2359 | } | ||
2360 | |||
2361 | bitmap_set(bitmap, offset, sparity->nsectors - offset); | ||
2362 | bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset)); | ||
2363 | } | ||
2364 | |||
2365 | static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity, | ||
2366 | u64 start, u64 len) | ||
2367 | { | ||
2368 | __scrub_mark_bitmap(sparity, sparity->ebitmap, start, len); | ||
2369 | } | ||
2370 | |||
2371 | static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity, | ||
2372 | u64 start, u64 len) | ||
2373 | { | ||
2374 | __scrub_mark_bitmap(sparity, sparity->dbitmap, start, len); | ||
2375 | } | ||
2376 | |||
2127 | static void scrub_block_complete(struct scrub_block *sblock) | 2377 | static void scrub_block_complete(struct scrub_block *sblock) |
2128 | { | 2378 | { |
2379 | int corrupted = 0; | ||
2380 | |||
2129 | if (!sblock->no_io_error_seen) { | 2381 | if (!sblock->no_io_error_seen) { |
2382 | corrupted = 1; | ||
2130 | scrub_handle_errored_block(sblock); | 2383 | scrub_handle_errored_block(sblock); |
2131 | } else { | 2384 | } else { |
2132 | /* | 2385 | /* |
@@ -2134,9 +2387,19 @@ static void scrub_block_complete(struct scrub_block *sblock) | |||
2134 | * dev replace case, otherwise write here in dev replace | 2387 | * dev replace case, otherwise write here in dev replace |
2135 | * case. | 2388 | * case. |
2136 | */ | 2389 | */ |
2137 | if (!scrub_checksum(sblock) && sblock->sctx->is_dev_replace) | 2390 | corrupted = scrub_checksum(sblock); |
2391 | if (!corrupted && sblock->sctx->is_dev_replace) | ||
2138 | scrub_write_block_to_dev_replace(sblock); | 2392 | scrub_write_block_to_dev_replace(sblock); |
2139 | } | 2393 | } |
2394 | |||
2395 | if (sblock->sparity && corrupted && !sblock->data_corrected) { | ||
2396 | u64 start = sblock->pagev[0]->logical; | ||
2397 | u64 end = sblock->pagev[sblock->page_count - 1]->logical + | ||
2398 | PAGE_SIZE; | ||
2399 | |||
2400 | scrub_parity_mark_sectors_error(sblock->sparity, | ||
2401 | start, end - start); | ||
2402 | } | ||
2140 | } | 2403 | } |
2141 | 2404 | ||
2142 | static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, | 2405 | static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, |
@@ -2228,6 +2491,132 @@ behind_scrub_pages: | |||
2228 | return 0; | 2491 | return 0; |
2229 | } | 2492 | } |
2230 | 2493 | ||
2494 | static int scrub_pages_for_parity(struct scrub_parity *sparity, | ||
2495 | u64 logical, u64 len, | ||
2496 | u64 physical, struct btrfs_device *dev, | ||
2497 | u64 flags, u64 gen, int mirror_num, u8 *csum) | ||
2498 | { | ||
2499 | struct scrub_ctx *sctx = sparity->sctx; | ||
2500 | struct scrub_block *sblock; | ||
2501 | int index; | ||
2502 | |||
2503 | sblock = kzalloc(sizeof(*sblock), GFP_NOFS); | ||
2504 | if (!sblock) { | ||
2505 | spin_lock(&sctx->stat_lock); | ||
2506 | sctx->stat.malloc_errors++; | ||
2507 | spin_unlock(&sctx->stat_lock); | ||
2508 | return -ENOMEM; | ||
2509 | } | ||
2510 | |||
2511 | /* one ref inside this function, plus one for each page added to | ||
2512 | * a bio later on */ | ||
2513 | atomic_set(&sblock->ref_count, 1); | ||
2514 | sblock->sctx = sctx; | ||
2515 | sblock->no_io_error_seen = 1; | ||
2516 | sblock->sparity = sparity; | ||
2517 | scrub_parity_get(sparity); | ||
2518 | |||
2519 | for (index = 0; len > 0; index++) { | ||
2520 | struct scrub_page *spage; | ||
2521 | u64 l = min_t(u64, len, PAGE_SIZE); | ||
2522 | |||
2523 | spage = kzalloc(sizeof(*spage), GFP_NOFS); | ||
2524 | if (!spage) { | ||
2525 | leave_nomem: | ||
2526 | spin_lock(&sctx->stat_lock); | ||
2527 | sctx->stat.malloc_errors++; | ||
2528 | spin_unlock(&sctx->stat_lock); | ||
2529 | scrub_block_put(sblock); | ||
2530 | return -ENOMEM; | ||
2531 | } | ||
2532 | BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK); | ||
2533 | /* For scrub block */ | ||
2534 | scrub_page_get(spage); | ||
2535 | sblock->pagev[index] = spage; | ||
2536 | /* For scrub parity */ | ||
2537 | scrub_page_get(spage); | ||
2538 | list_add_tail(&spage->list, &sparity->spages); | ||
2539 | spage->sblock = sblock; | ||
2540 | spage->dev = dev; | ||
2541 | spage->flags = flags; | ||
2542 | spage->generation = gen; | ||
2543 | spage->logical = logical; | ||
2544 | spage->physical = physical; | ||
2545 | spage->mirror_num = mirror_num; | ||
2546 | if (csum) { | ||
2547 | spage->have_csum = 1; | ||
2548 | memcpy(spage->csum, csum, sctx->csum_size); | ||
2549 | } else { | ||
2550 | spage->have_csum = 0; | ||
2551 | } | ||
2552 | sblock->page_count++; | ||
2553 | spage->page = alloc_page(GFP_NOFS); | ||
2554 | if (!spage->page) | ||
2555 | goto leave_nomem; | ||
2556 | len -= l; | ||
2557 | logical += l; | ||
2558 | physical += l; | ||
2559 | } | ||
2560 | |||
2561 | WARN_ON(sblock->page_count == 0); | ||
2562 | for (index = 0; index < sblock->page_count; index++) { | ||
2563 | struct scrub_page *spage = sblock->pagev[index]; | ||
2564 | int ret; | ||
2565 | |||
2566 | ret = scrub_add_page_to_rd_bio(sctx, spage); | ||
2567 | if (ret) { | ||
2568 | scrub_block_put(sblock); | ||
2569 | return ret; | ||
2570 | } | ||
2571 | } | ||
2572 | |||
2573 | /* last one frees, either here or in bio completion for last page */ | ||
2574 | scrub_block_put(sblock); | ||
2575 | return 0; | ||
2576 | } | ||
2577 | |||
2578 | static int scrub_extent_for_parity(struct scrub_parity *sparity, | ||
2579 | u64 logical, u64 len, | ||
2580 | u64 physical, struct btrfs_device *dev, | ||
2581 | u64 flags, u64 gen, int mirror_num) | ||
2582 | { | ||
2583 | struct scrub_ctx *sctx = sparity->sctx; | ||
2584 | int ret; | ||
2585 | u8 csum[BTRFS_CSUM_SIZE]; | ||
2586 | u32 blocksize; | ||
2587 | |||
2588 | if (flags & BTRFS_EXTENT_FLAG_DATA) { | ||
2589 | blocksize = sctx->sectorsize; | ||
2590 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | ||
2591 | blocksize = sctx->nodesize; | ||
2592 | } else { | ||
2593 | blocksize = sctx->sectorsize; | ||
2594 | WARN_ON(1); | ||
2595 | } | ||
2596 | |||
2597 | while (len) { | ||
2598 | u64 l = min_t(u64, len, blocksize); | ||
2599 | int have_csum = 0; | ||
2600 | |||
2601 | if (flags & BTRFS_EXTENT_FLAG_DATA) { | ||
2602 | /* push csums to sbio */ | ||
2603 | have_csum = scrub_find_csum(sctx, logical, l, csum); | ||
2604 | if (have_csum == 0) | ||
2605 | goto skip; | ||
2606 | } | ||
2607 | ret = scrub_pages_for_parity(sparity, logical, l, physical, dev, | ||
2608 | flags, gen, mirror_num, | ||
2609 | have_csum ? csum : NULL); | ||
2610 | skip: | ||
2611 | if (ret) | ||
2612 | return ret; | ||
2613 | len -= l; | ||
2614 | logical += l; | ||
2615 | physical += l; | ||
2616 | } | ||
2617 | return 0; | ||
2618 | } | ||
2619 | |||
2231 | /* | 2620 | /* |
2232 | * Given a physical address, this will calculate it's | 2621 | * Given a physical address, this will calculate it's |
2233 | * logical offset. if this is a parity stripe, it will return | 2622 | * logical offset. if this is a parity stripe, it will return |
@@ -2236,7 +2625,8 @@ behind_scrub_pages: | |||
2236 | * return 0 if it is a data stripe, 1 means parity stripe. | 2625 | * return 0 if it is a data stripe, 1 means parity stripe. |
2237 | */ | 2626 | */ |
2238 | static int get_raid56_logic_offset(u64 physical, int num, | 2627 | static int get_raid56_logic_offset(u64 physical, int num, |
2239 | struct map_lookup *map, u64 *offset) | 2628 | struct map_lookup *map, u64 *offset, |
2629 | u64 *stripe_start) | ||
2240 | { | 2630 | { |
2241 | int i; | 2631 | int i; |
2242 | int j = 0; | 2632 | int j = 0; |
@@ -2247,6 +2637,9 @@ static int get_raid56_logic_offset(u64 physical, int num, | |||
2247 | 2637 | ||
2248 | last_offset = (physical - map->stripes[num].physical) * | 2638 | last_offset = (physical - map->stripes[num].physical) * |
2249 | nr_data_stripes(map); | 2639 | nr_data_stripes(map); |
2640 | if (stripe_start) | ||
2641 | *stripe_start = last_offset; | ||
2642 | |||
2250 | *offset = last_offset; | 2643 | *offset = last_offset; |
2251 | for (i = 0; i < nr_data_stripes(map); i++) { | 2644 | for (i = 0; i < nr_data_stripes(map); i++) { |
2252 | *offset = last_offset + i * map->stripe_len; | 2645 | *offset = last_offset + i * map->stripe_len; |
@@ -2269,13 +2662,330 @@ static int get_raid56_logic_offset(u64 physical, int num, | |||
2269 | return 1; | 2662 | return 1; |
2270 | } | 2663 | } |
2271 | 2664 | ||
2665 | static void scrub_free_parity(struct scrub_parity *sparity) | ||
2666 | { | ||
2667 | struct scrub_ctx *sctx = sparity->sctx; | ||
2668 | struct scrub_page *curr, *next; | ||
2669 | int nbits; | ||
2670 | |||
2671 | nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors); | ||
2672 | if (nbits) { | ||
2673 | spin_lock(&sctx->stat_lock); | ||
2674 | sctx->stat.read_errors += nbits; | ||
2675 | sctx->stat.uncorrectable_errors += nbits; | ||
2676 | spin_unlock(&sctx->stat_lock); | ||
2677 | } | ||
2678 | |||
2679 | list_for_each_entry_safe(curr, next, &sparity->spages, list) { | ||
2680 | list_del_init(&curr->list); | ||
2681 | scrub_page_put(curr); | ||
2682 | } | ||
2683 | |||
2684 | kfree(sparity); | ||
2685 | } | ||
2686 | |||
2687 | static void scrub_parity_bio_endio(struct bio *bio, int error) | ||
2688 | { | ||
2689 | struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private; | ||
2690 | struct scrub_ctx *sctx = sparity->sctx; | ||
2691 | |||
2692 | if (error) | ||
2693 | bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, | ||
2694 | sparity->nsectors); | ||
2695 | |||
2696 | scrub_free_parity(sparity); | ||
2697 | scrub_pending_bio_dec(sctx); | ||
2698 | bio_put(bio); | ||
2699 | } | ||
2700 | |||
2701 | static void scrub_parity_check_and_repair(struct scrub_parity *sparity) | ||
2702 | { | ||
2703 | struct scrub_ctx *sctx = sparity->sctx; | ||
2704 | struct bio *bio; | ||
2705 | struct btrfs_raid_bio *rbio; | ||
2706 | struct scrub_page *spage; | ||
2707 | struct btrfs_bio *bbio = NULL; | ||
2708 | u64 *raid_map = NULL; | ||
2709 | u64 length; | ||
2710 | int ret; | ||
2711 | |||
2712 | if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap, | ||
2713 | sparity->nsectors)) | ||
2714 | goto out; | ||
2715 | |||
2716 | length = sparity->logic_end - sparity->logic_start + 1; | ||
2717 | ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE, | ||
2718 | sparity->logic_start, | ||
2719 | &length, &bbio, 0, &raid_map); | ||
2720 | if (ret || !bbio || !raid_map) | ||
2721 | goto bbio_out; | ||
2722 | |||
2723 | bio = btrfs_io_bio_alloc(GFP_NOFS, 0); | ||
2724 | if (!bio) | ||
2725 | goto bbio_out; | ||
2726 | |||
2727 | bio->bi_iter.bi_sector = sparity->logic_start >> 9; | ||
2728 | bio->bi_private = sparity; | ||
2729 | bio->bi_end_io = scrub_parity_bio_endio; | ||
2730 | |||
2731 | rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio, | ||
2732 | raid_map, length, | ||
2733 | sparity->scrub_dev, | ||
2734 | sparity->dbitmap, | ||
2735 | sparity->nsectors); | ||
2736 | if (!rbio) | ||
2737 | goto rbio_out; | ||
2738 | |||
2739 | list_for_each_entry(spage, &sparity->spages, list) | ||
2740 | raid56_parity_add_scrub_pages(rbio, spage->page, | ||
2741 | spage->logical); | ||
2742 | |||
2743 | scrub_pending_bio_inc(sctx); | ||
2744 | raid56_parity_submit_scrub_rbio(rbio); | ||
2745 | return; | ||
2746 | |||
2747 | rbio_out: | ||
2748 | bio_put(bio); | ||
2749 | bbio_out: | ||
2750 | kfree(bbio); | ||
2751 | kfree(raid_map); | ||
2752 | bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, | ||
2753 | sparity->nsectors); | ||
2754 | spin_lock(&sctx->stat_lock); | ||
2755 | sctx->stat.malloc_errors++; | ||
2756 | spin_unlock(&sctx->stat_lock); | ||
2757 | out: | ||
2758 | scrub_free_parity(sparity); | ||
2759 | } | ||
2760 | |||
2761 | static inline int scrub_calc_parity_bitmap_len(int nsectors) | ||
2762 | { | ||
2763 | return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * (BITS_PER_LONG / 8); | ||
2764 | } | ||
2765 | |||
2766 | static void scrub_parity_get(struct scrub_parity *sparity) | ||
2767 | { | ||
2768 | atomic_inc(&sparity->ref_count); | ||
2769 | } | ||
2770 | |||
2771 | static void scrub_parity_put(struct scrub_parity *sparity) | ||
2772 | { | ||
2773 | if (!atomic_dec_and_test(&sparity->ref_count)) | ||
2774 | return; | ||
2775 | |||
2776 | scrub_parity_check_and_repair(sparity); | ||
2777 | } | ||
2778 | |||
2779 | static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, | ||
2780 | struct map_lookup *map, | ||
2781 | struct btrfs_device *sdev, | ||
2782 | struct btrfs_path *path, | ||
2783 | u64 logic_start, | ||
2784 | u64 logic_end) | ||
2785 | { | ||
2786 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; | ||
2787 | struct btrfs_root *root = fs_info->extent_root; | ||
2788 | struct btrfs_root *csum_root = fs_info->csum_root; | ||
2789 | struct btrfs_extent_item *extent; | ||
2790 | u64 flags; | ||
2791 | int ret; | ||
2792 | int slot; | ||
2793 | struct extent_buffer *l; | ||
2794 | struct btrfs_key key; | ||
2795 | u64 generation; | ||
2796 | u64 extent_logical; | ||
2797 | u64 extent_physical; | ||
2798 | u64 extent_len; | ||
2799 | struct btrfs_device *extent_dev; | ||
2800 | struct scrub_parity *sparity; | ||
2801 | int nsectors; | ||
2802 | int bitmap_len; | ||
2803 | int extent_mirror_num; | ||
2804 | int stop_loop = 0; | ||
2805 | |||
2806 | nsectors = map->stripe_len / root->sectorsize; | ||
2807 | bitmap_len = scrub_calc_parity_bitmap_len(nsectors); | ||
2808 | sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len, | ||
2809 | GFP_NOFS); | ||
2810 | if (!sparity) { | ||
2811 | spin_lock(&sctx->stat_lock); | ||
2812 | sctx->stat.malloc_errors++; | ||
2813 | spin_unlock(&sctx->stat_lock); | ||
2814 | return -ENOMEM; | ||
2815 | } | ||
2816 | |||
2817 | sparity->stripe_len = map->stripe_len; | ||
2818 | sparity->nsectors = nsectors; | ||
2819 | sparity->sctx = sctx; | ||
2820 | sparity->scrub_dev = sdev; | ||
2821 | sparity->logic_start = logic_start; | ||
2822 | sparity->logic_end = logic_end; | ||
2823 | atomic_set(&sparity->ref_count, 1); | ||
2824 | INIT_LIST_HEAD(&sparity->spages); | ||
2825 | sparity->dbitmap = sparity->bitmap; | ||
2826 | sparity->ebitmap = (void *)sparity->bitmap + bitmap_len; | ||
2827 | |||
2828 | ret = 0; | ||
2829 | while (logic_start < logic_end) { | ||
2830 | if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) | ||
2831 | key.type = BTRFS_METADATA_ITEM_KEY; | ||
2832 | else | ||
2833 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
2834 | key.objectid = logic_start; | ||
2835 | key.offset = (u64)-1; | ||
2836 | |||
2837 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
2838 | if (ret < 0) | ||
2839 | goto out; | ||
2840 | |||
2841 | if (ret > 0) { | ||
2842 | ret = btrfs_previous_extent_item(root, path, 0); | ||
2843 | if (ret < 0) | ||
2844 | goto out; | ||
2845 | if (ret > 0) { | ||
2846 | btrfs_release_path(path); | ||
2847 | ret = btrfs_search_slot(NULL, root, &key, | ||
2848 | path, 0, 0); | ||
2849 | if (ret < 0) | ||
2850 | goto out; | ||
2851 | } | ||
2852 | } | ||
2853 | |||
2854 | stop_loop = 0; | ||
2855 | while (1) { | ||
2856 | u64 bytes; | ||
2857 | |||
2858 | l = path->nodes[0]; | ||
2859 | slot = path->slots[0]; | ||
2860 | if (slot >= btrfs_header_nritems(l)) { | ||
2861 | ret = btrfs_next_leaf(root, path); | ||
2862 | if (ret == 0) | ||
2863 | continue; | ||
2864 | if (ret < 0) | ||
2865 | goto out; | ||
2866 | |||
2867 | stop_loop = 1; | ||
2868 | break; | ||
2869 | } | ||
2870 | btrfs_item_key_to_cpu(l, &key, slot); | ||
2871 | |||
2872 | if (key.type == BTRFS_METADATA_ITEM_KEY) | ||
2873 | bytes = root->nodesize; | ||
2874 | else | ||
2875 | bytes = key.offset; | ||
2876 | |||
2877 | if (key.objectid + bytes <= logic_start) | ||
2878 | goto next; | ||
2879 | |||
2880 | if (key.type != BTRFS_EXTENT_ITEM_KEY && | ||
2881 | key.type != BTRFS_METADATA_ITEM_KEY) | ||
2882 | goto next; | ||
2883 | |||
2884 | if (key.objectid > logic_end) { | ||
2885 | stop_loop = 1; | ||
2886 | break; | ||
2887 | } | ||
2888 | |||
2889 | while (key.objectid >= logic_start + map->stripe_len) | ||
2890 | logic_start += map->stripe_len; | ||
2891 | |||
2892 | extent = btrfs_item_ptr(l, slot, | ||
2893 | struct btrfs_extent_item); | ||
2894 | flags = btrfs_extent_flags(l, extent); | ||
2895 | generation = btrfs_extent_generation(l, extent); | ||
2896 | |||
2897 | if (key.objectid < logic_start && | ||
2898 | (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) { | ||
2899 | btrfs_err(fs_info, | ||
2900 | "scrub: tree block %llu spanning stripes, ignored. logical=%llu", | ||
2901 | key.objectid, logic_start); | ||
2902 | goto next; | ||
2903 | } | ||
2904 | again: | ||
2905 | extent_logical = key.objectid; | ||
2906 | extent_len = bytes; | ||
2907 | |||
2908 | if (extent_logical < logic_start) { | ||
2909 | extent_len -= logic_start - extent_logical; | ||
2910 | extent_logical = logic_start; | ||
2911 | } | ||
2912 | |||
2913 | if (extent_logical + extent_len > | ||
2914 | logic_start + map->stripe_len) | ||
2915 | extent_len = logic_start + map->stripe_len - | ||
2916 | extent_logical; | ||
2917 | |||
2918 | scrub_parity_mark_sectors_data(sparity, extent_logical, | ||
2919 | extent_len); | ||
2920 | |||
2921 | scrub_remap_extent(fs_info, extent_logical, | ||
2922 | extent_len, &extent_physical, | ||
2923 | &extent_dev, | ||
2924 | &extent_mirror_num); | ||
2925 | |||
2926 | ret = btrfs_lookup_csums_range(csum_root, | ||
2927 | extent_logical, | ||
2928 | extent_logical + extent_len - 1, | ||
2929 | &sctx->csum_list, 1); | ||
2930 | if (ret) | ||
2931 | goto out; | ||
2932 | |||
2933 | ret = scrub_extent_for_parity(sparity, extent_logical, | ||
2934 | extent_len, | ||
2935 | extent_physical, | ||
2936 | extent_dev, flags, | ||
2937 | generation, | ||
2938 | extent_mirror_num); | ||
2939 | if (ret) | ||
2940 | goto out; | ||
2941 | |||
2942 | scrub_free_csums(sctx); | ||
2943 | if (extent_logical + extent_len < | ||
2944 | key.objectid + bytes) { | ||
2945 | logic_start += map->stripe_len; | ||
2946 | |||
2947 | if (logic_start >= logic_end) { | ||
2948 | stop_loop = 1; | ||
2949 | break; | ||
2950 | } | ||
2951 | |||
2952 | if (logic_start < key.objectid + bytes) { | ||
2953 | cond_resched(); | ||
2954 | goto again; | ||
2955 | } | ||
2956 | } | ||
2957 | next: | ||
2958 | path->slots[0]++; | ||
2959 | } | ||
2960 | |||
2961 | btrfs_release_path(path); | ||
2962 | |||
2963 | if (stop_loop) | ||
2964 | break; | ||
2965 | |||
2966 | logic_start += map->stripe_len; | ||
2967 | } | ||
2968 | out: | ||
2969 | if (ret < 0) | ||
2970 | scrub_parity_mark_sectors_error(sparity, logic_start, | ||
2971 | logic_end - logic_start + 1); | ||
2972 | scrub_parity_put(sparity); | ||
2973 | scrub_submit(sctx); | ||
2974 | mutex_lock(&sctx->wr_ctx.wr_lock); | ||
2975 | scrub_wr_submit(sctx); | ||
2976 | mutex_unlock(&sctx->wr_ctx.wr_lock); | ||
2977 | |||
2978 | btrfs_release_path(path); | ||
2979 | return ret < 0 ? ret : 0; | ||
2980 | } | ||
2981 | |||
2272 | static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | 2982 | static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, |
2273 | struct map_lookup *map, | 2983 | struct map_lookup *map, |
2274 | struct btrfs_device *scrub_dev, | 2984 | struct btrfs_device *scrub_dev, |
2275 | int num, u64 base, u64 length, | 2985 | int num, u64 base, u64 length, |
2276 | int is_dev_replace) | 2986 | int is_dev_replace) |
2277 | { | 2987 | { |
2278 | struct btrfs_path *path; | 2988 | struct btrfs_path *path, *ppath; |
2279 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; | 2989 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; |
2280 | struct btrfs_root *root = fs_info->extent_root; | 2990 | struct btrfs_root *root = fs_info->extent_root; |
2281 | struct btrfs_root *csum_root = fs_info->csum_root; | 2991 | struct btrfs_root *csum_root = fs_info->csum_root; |
@@ -2302,6 +3012,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
2302 | u64 extent_logical; | 3012 | u64 extent_logical; |
2303 | u64 extent_physical; | 3013 | u64 extent_physical; |
2304 | u64 extent_len; | 3014 | u64 extent_len; |
3015 | u64 stripe_logical; | ||
3016 | u64 stripe_end; | ||
2305 | struct btrfs_device *extent_dev; | 3017 | struct btrfs_device *extent_dev; |
2306 | int extent_mirror_num; | 3018 | int extent_mirror_num; |
2307 | int stop_loop = 0; | 3019 | int stop_loop = 0; |
@@ -2327,7 +3039,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
2327 | mirror_num = num % map->num_stripes + 1; | 3039 | mirror_num = num % map->num_stripes + 1; |
2328 | } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | 3040 | } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | |
2329 | BTRFS_BLOCK_GROUP_RAID6)) { | 3041 | BTRFS_BLOCK_GROUP_RAID6)) { |
2330 | get_raid56_logic_offset(physical, num, map, &offset); | 3042 | get_raid56_logic_offset(physical, num, map, &offset, NULL); |
2331 | increment = map->stripe_len * nr_data_stripes(map); | 3043 | increment = map->stripe_len * nr_data_stripes(map); |
2332 | mirror_num = 1; | 3044 | mirror_num = 1; |
2333 | } else { | 3045 | } else { |
@@ -2339,6 +3051,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
2339 | if (!path) | 3051 | if (!path) |
2340 | return -ENOMEM; | 3052 | return -ENOMEM; |
2341 | 3053 | ||
3054 | ppath = btrfs_alloc_path(); | ||
3055 | if (!ppath) { | ||
3056 | btrfs_free_path(ppath); | ||
3057 | return -ENOMEM; | ||
3058 | } | ||
3059 | |||
2342 | /* | 3060 | /* |
2343 | * work on commit root. The related disk blocks are static as | 3061 | * work on commit root. The related disk blocks are static as |
2344 | * long as COW is applied. This means, it is save to rewrite | 3062 | * long as COW is applied. This means, it is save to rewrite |
@@ -2357,7 +3075,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
2357 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | 3075 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | |
2358 | BTRFS_BLOCK_GROUP_RAID6)) { | 3076 | BTRFS_BLOCK_GROUP_RAID6)) { |
2359 | get_raid56_logic_offset(physical_end, num, | 3077 | get_raid56_logic_offset(physical_end, num, |
2360 | map, &logic_end); | 3078 | map, &logic_end, NULL); |
2361 | logic_end += base; | 3079 | logic_end += base; |
2362 | } else { | 3080 | } else { |
2363 | logic_end = logical + increment * nstripes; | 3081 | logic_end = logical + increment * nstripes; |
@@ -2404,10 +3122,18 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
2404 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | 3122 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | |
2405 | BTRFS_BLOCK_GROUP_RAID6)) { | 3123 | BTRFS_BLOCK_GROUP_RAID6)) { |
2406 | ret = get_raid56_logic_offset(physical, num, | 3124 | ret = get_raid56_logic_offset(physical, num, |
2407 | map, &logical); | 3125 | map, &logical, &stripe_logical); |
2408 | logical += base; | 3126 | logical += base; |
2409 | if (ret) | 3127 | if (ret) { |
3128 | stripe_logical += base; | ||
3129 | stripe_end = stripe_logical + increment - 1; | ||
3130 | ret = scrub_raid56_parity(sctx, map, scrub_dev, | ||
3131 | ppath, stripe_logical, | ||
3132 | stripe_end); | ||
3133 | if (ret) | ||
3134 | goto out; | ||
2410 | goto skip; | 3135 | goto skip; |
3136 | } | ||
2411 | } | 3137 | } |
2412 | /* | 3138 | /* |
2413 | * canceled? | 3139 | * canceled? |
@@ -2558,13 +3284,25 @@ again: | |||
2558 | * loop until we find next data stripe | 3284 | * loop until we find next data stripe |
2559 | * or we have finished all stripes. | 3285 | * or we have finished all stripes. |
2560 | */ | 3286 | */ |
2561 | do { | 3287 | loop: |
2562 | physical += map->stripe_len; | 3288 | physical += map->stripe_len; |
2563 | ret = get_raid56_logic_offset( | 3289 | ret = get_raid56_logic_offset(physical, |
2564 | physical, num, | 3290 | num, map, &logical, |
2565 | map, &logical); | 3291 | &stripe_logical); |
2566 | logical += base; | 3292 | logical += base; |
2567 | } while (physical < physical_end && ret); | 3293 | |
3294 | if (ret && physical < physical_end) { | ||
3295 | stripe_logical += base; | ||
3296 | stripe_end = stripe_logical + | ||
3297 | increment - 1; | ||
3298 | ret = scrub_raid56_parity(sctx, | ||
3299 | map, scrub_dev, ppath, | ||
3300 | stripe_logical, | ||
3301 | stripe_end); | ||
3302 | if (ret) | ||
3303 | goto out; | ||
3304 | goto loop; | ||
3305 | } | ||
2568 | } else { | 3306 | } else { |
2569 | physical += map->stripe_len; | 3307 | physical += map->stripe_len; |
2570 | logical += increment; | 3308 | logical += increment; |
@@ -2605,6 +3343,7 @@ out: | |||
2605 | 3343 | ||
2606 | blk_finish_plug(&plug); | 3344 | blk_finish_plug(&plug); |
2607 | btrfs_free_path(path); | 3345 | btrfs_free_path(path); |
3346 | btrfs_free_path(ppath); | ||
2608 | return ret < 0 ? ret : 0; | 3347 | return ret < 0 ? ret : 0; |
2609 | } | 3348 | } |
2610 | 3349 | ||
@@ -3310,6 +4049,50 @@ out: | |||
3310 | scrub_pending_trans_workers_dec(sctx); | 4049 | scrub_pending_trans_workers_dec(sctx); |
3311 | } | 4050 | } |
3312 | 4051 | ||
4052 | static int check_extent_to_block(struct inode *inode, u64 start, u64 len, | ||
4053 | u64 logical) | ||
4054 | { | ||
4055 | struct extent_state *cached_state = NULL; | ||
4056 | struct btrfs_ordered_extent *ordered; | ||
4057 | struct extent_io_tree *io_tree; | ||
4058 | struct extent_map *em; | ||
4059 | u64 lockstart = start, lockend = start + len - 1; | ||
4060 | int ret = 0; | ||
4061 | |||
4062 | io_tree = &BTRFS_I(inode)->io_tree; | ||
4063 | |||
4064 | lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state); | ||
4065 | ordered = btrfs_lookup_ordered_range(inode, lockstart, len); | ||
4066 | if (ordered) { | ||
4067 | btrfs_put_ordered_extent(ordered); | ||
4068 | ret = 1; | ||
4069 | goto out_unlock; | ||
4070 | } | ||
4071 | |||
4072 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); | ||
4073 | if (IS_ERR(em)) { | ||
4074 | ret = PTR_ERR(em); | ||
4075 | goto out_unlock; | ||
4076 | } | ||
4077 | |||
4078 | /* | ||
4079 | * This extent does not actually cover the logical extent anymore, | ||
4080 | * move on to the next inode. | ||
4081 | */ | ||
4082 | if (em->block_start > logical || | ||
4083 | em->block_start + em->block_len < logical + len) { | ||
4084 | free_extent_map(em); | ||
4085 | ret = 1; | ||
4086 | goto out_unlock; | ||
4087 | } | ||
4088 | free_extent_map(em); | ||
4089 | |||
4090 | out_unlock: | ||
4091 | unlock_extent_cached(io_tree, lockstart, lockend, &cached_state, | ||
4092 | GFP_NOFS); | ||
4093 | return ret; | ||
4094 | } | ||
4095 | |||
3313 | static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, | 4096 | static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, |
3314 | struct scrub_copy_nocow_ctx *nocow_ctx) | 4097 | struct scrub_copy_nocow_ctx *nocow_ctx) |
3315 | { | 4098 | { |
@@ -3318,13 +4101,10 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, | |||
3318 | struct inode *inode; | 4101 | struct inode *inode; |
3319 | struct page *page; | 4102 | struct page *page; |
3320 | struct btrfs_root *local_root; | 4103 | struct btrfs_root *local_root; |
3321 | struct btrfs_ordered_extent *ordered; | ||
3322 | struct extent_map *em; | ||
3323 | struct extent_state *cached_state = NULL; | ||
3324 | struct extent_io_tree *io_tree; | 4104 | struct extent_io_tree *io_tree; |
3325 | u64 physical_for_dev_replace; | 4105 | u64 physical_for_dev_replace; |
4106 | u64 nocow_ctx_logical; | ||
3326 | u64 len = nocow_ctx->len; | 4107 | u64 len = nocow_ctx->len; |
3327 | u64 lockstart = offset, lockend = offset + len - 1; | ||
3328 | unsigned long index; | 4108 | unsigned long index; |
3329 | int srcu_index; | 4109 | int srcu_index; |
3330 | int ret = 0; | 4110 | int ret = 0; |
@@ -3356,30 +4136,13 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, | |||
3356 | 4136 | ||
3357 | physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; | 4137 | physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; |
3358 | io_tree = &BTRFS_I(inode)->io_tree; | 4138 | io_tree = &BTRFS_I(inode)->io_tree; |
4139 | nocow_ctx_logical = nocow_ctx->logical; | ||
3359 | 4140 | ||
3360 | lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state); | 4141 | ret = check_extent_to_block(inode, offset, len, nocow_ctx_logical); |
3361 | ordered = btrfs_lookup_ordered_range(inode, lockstart, len); | 4142 | if (ret) { |
3362 | if (ordered) { | 4143 | ret = ret > 0 ? 0 : ret; |
3363 | btrfs_put_ordered_extent(ordered); | 4144 | goto out; |
3364 | goto out_unlock; | ||
3365 | } | ||
3366 | |||
3367 | em = btrfs_get_extent(inode, NULL, 0, lockstart, len, 0); | ||
3368 | if (IS_ERR(em)) { | ||
3369 | ret = PTR_ERR(em); | ||
3370 | goto out_unlock; | ||
3371 | } | ||
3372 | |||
3373 | /* | ||
3374 | * This extent does not actually cover the logical extent anymore, | ||
3375 | * move on to the next inode. | ||
3376 | */ | ||
3377 | if (em->block_start > nocow_ctx->logical || | ||
3378 | em->block_start + em->block_len < nocow_ctx->logical + len) { | ||
3379 | free_extent_map(em); | ||
3380 | goto out_unlock; | ||
3381 | } | 4145 | } |
3382 | free_extent_map(em); | ||
3383 | 4146 | ||
3384 | while (len >= PAGE_CACHE_SIZE) { | 4147 | while (len >= PAGE_CACHE_SIZE) { |
3385 | index = offset >> PAGE_CACHE_SHIFT; | 4148 | index = offset >> PAGE_CACHE_SHIFT; |
@@ -3396,7 +4159,7 @@ again: | |||
3396 | goto next_page; | 4159 | goto next_page; |
3397 | } else { | 4160 | } else { |
3398 | ClearPageError(page); | 4161 | ClearPageError(page); |
3399 | err = extent_read_full_page_nolock(io_tree, page, | 4162 | err = extent_read_full_page(io_tree, page, |
3400 | btrfs_get_extent, | 4163 | btrfs_get_extent, |
3401 | nocow_ctx->mirror_num); | 4164 | nocow_ctx->mirror_num); |
3402 | if (err) { | 4165 | if (err) { |
@@ -3421,6 +4184,14 @@ again: | |||
3421 | goto next_page; | 4184 | goto next_page; |
3422 | } | 4185 | } |
3423 | } | 4186 | } |
4187 | |||
4188 | ret = check_extent_to_block(inode, offset, len, | ||
4189 | nocow_ctx_logical); | ||
4190 | if (ret) { | ||
4191 | ret = ret > 0 ? 0 : ret; | ||
4192 | goto next_page; | ||
4193 | } | ||
4194 | |||
3424 | err = write_page_nocow(nocow_ctx->sctx, | 4195 | err = write_page_nocow(nocow_ctx->sctx, |
3425 | physical_for_dev_replace, page); | 4196 | physical_for_dev_replace, page); |
3426 | if (err) | 4197 | if (err) |
@@ -3434,12 +4205,10 @@ next_page: | |||
3434 | 4205 | ||
3435 | offset += PAGE_CACHE_SIZE; | 4206 | offset += PAGE_CACHE_SIZE; |
3436 | physical_for_dev_replace += PAGE_CACHE_SIZE; | 4207 | physical_for_dev_replace += PAGE_CACHE_SIZE; |
4208 | nocow_ctx_logical += PAGE_CACHE_SIZE; | ||
3437 | len -= PAGE_CACHE_SIZE; | 4209 | len -= PAGE_CACHE_SIZE; |
3438 | } | 4210 | } |
3439 | ret = COPY_COMPLETE; | 4211 | ret = COPY_COMPLETE; |
3440 | out_unlock: | ||
3441 | unlock_extent_cached(io_tree, lockstart, lockend, &cached_state, | ||
3442 | GFP_NOFS); | ||
3443 | out: | 4212 | out: |
3444 | mutex_unlock(&inode->i_mutex); | 4213 | mutex_unlock(&inode->i_mutex); |
3445 | iput(inode); | 4214 | iput(inode); |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 874828dd0a86..804432dbc351 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
@@ -5507,6 +5507,51 @@ out: | |||
5507 | return ret; | 5507 | return ret; |
5508 | } | 5508 | } |
5509 | 5509 | ||
5510 | /* | ||
5511 | * If orphan cleanup did remove any orphans from a root, it means the tree | ||
5512 | * was modified and therefore the commit root is not the same as the current | ||
5513 | * root anymore. This is a problem, because send uses the commit root and | ||
5514 | * therefore can see inode items that don't exist in the current root anymore, | ||
5515 | * and for example make calls to btrfs_iget, which will do tree lookups based | ||
5516 | * on the current root and not on the commit root. Those lookups will fail, | ||
5517 | * returning a -ESTALE error, and making send fail with that error. So make | ||
5518 | * sure a send does not see any orphans we have just removed, and that it will | ||
5519 | * see the same inodes regardless of whether a transaction commit happened | ||
5520 | * before it started (meaning that the commit root will be the same as the | ||
5521 | * current root) or not. | ||
5522 | */ | ||
5523 | static int ensure_commit_roots_uptodate(struct send_ctx *sctx) | ||
5524 | { | ||
5525 | int i; | ||
5526 | struct btrfs_trans_handle *trans = NULL; | ||
5527 | |||
5528 | again: | ||
5529 | if (sctx->parent_root && | ||
5530 | sctx->parent_root->node != sctx->parent_root->commit_root) | ||
5531 | goto commit_trans; | ||
5532 | |||
5533 | for (i = 0; i < sctx->clone_roots_cnt; i++) | ||
5534 | if (sctx->clone_roots[i].root->node != | ||
5535 | sctx->clone_roots[i].root->commit_root) | ||
5536 | goto commit_trans; | ||
5537 | |||
5538 | if (trans) | ||
5539 | return btrfs_end_transaction(trans, sctx->send_root); | ||
5540 | |||
5541 | return 0; | ||
5542 | |||
5543 | commit_trans: | ||
5544 | /* Use any root, all fs roots will get their commit roots updated. */ | ||
5545 | if (!trans) { | ||
5546 | trans = btrfs_join_transaction(sctx->send_root); | ||
5547 | if (IS_ERR(trans)) | ||
5548 | return PTR_ERR(trans); | ||
5549 | goto again; | ||
5550 | } | ||
5551 | |||
5552 | return btrfs_commit_transaction(trans, sctx->send_root); | ||
5553 | } | ||
5554 | |||
5510 | static void btrfs_root_dec_send_in_progress(struct btrfs_root* root) | 5555 | static void btrfs_root_dec_send_in_progress(struct btrfs_root* root) |
5511 | { | 5556 | { |
5512 | spin_lock(&root->root_item_lock); | 5557 | spin_lock(&root->root_item_lock); |
@@ -5728,6 +5773,10 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
5728 | NULL); | 5773 | NULL); |
5729 | sort_clone_roots = 1; | 5774 | sort_clone_roots = 1; |
5730 | 5775 | ||
5776 | ret = ensure_commit_roots_uptodate(sctx); | ||
5777 | if (ret) | ||
5778 | goto out; | ||
5779 | |||
5731 | current->journal_info = BTRFS_SEND_TRANS_STUB; | 5780 | current->journal_info = BTRFS_SEND_TRANS_STUB; |
5732 | ret = send_subvol(sctx); | 5781 | ret = send_subvol(sctx); |
5733 | current->journal_info = NULL; | 5782 | current->journal_info = NULL; |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 54bd91ece35b..60f7cbe815e9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -262,7 +262,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, | |||
262 | trans->aborted = errno; | 262 | trans->aborted = errno; |
263 | /* Nothing used. The other threads that have joined this | 263 | /* Nothing used. The other threads that have joined this |
264 | * transaction may be able to continue. */ | 264 | * transaction may be able to continue. */ |
265 | if (!trans->blocks_used) { | 265 | if (!trans->blocks_used && list_empty(&trans->new_bgs)) { |
266 | const char *errstr; | 266 | const char *errstr; |
267 | 267 | ||
268 | errstr = btrfs_decode_error(errno); | 268 | errstr = btrfs_decode_error(errno); |
@@ -642,11 +642,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
642 | "disabling disk space caching"); | 642 | "disabling disk space caching"); |
643 | break; | 643 | break; |
644 | case Opt_inode_cache: | 644 | case Opt_inode_cache: |
645 | btrfs_set_and_info(root, CHANGE_INODE_CACHE, | 645 | btrfs_set_pending_and_info(info, INODE_MAP_CACHE, |
646 | "enabling inode map caching"); | 646 | "enabling inode map caching"); |
647 | break; | 647 | break; |
648 | case Opt_noinode_cache: | 648 | case Opt_noinode_cache: |
649 | btrfs_clear_and_info(root, CHANGE_INODE_CACHE, | 649 | btrfs_clear_pending_and_info(info, INODE_MAP_CACHE, |
650 | "disabling inode map caching"); | 650 | "disabling inode map caching"); |
651 | break; | 651 | break; |
652 | case Opt_clear_cache: | 652 | case Opt_clear_cache: |
@@ -993,9 +993,17 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
993 | trans = btrfs_attach_transaction_barrier(root); | 993 | trans = btrfs_attach_transaction_barrier(root); |
994 | if (IS_ERR(trans)) { | 994 | if (IS_ERR(trans)) { |
995 | /* no transaction, don't bother */ | 995 | /* no transaction, don't bother */ |
996 | if (PTR_ERR(trans) == -ENOENT) | 996 | if (PTR_ERR(trans) == -ENOENT) { |
997 | return 0; | 997 | /* |
998 | return PTR_ERR(trans); | 998 | * Exit unless we have some pending changes |
999 | * that need to go through commit | ||
1000 | */ | ||
1001 | if (fs_info->pending_changes == 0) | ||
1002 | return 0; | ||
1003 | trans = btrfs_start_transaction(root, 0); | ||
1004 | } else { | ||
1005 | return PTR_ERR(trans); | ||
1006 | } | ||
999 | } | 1007 | } |
1000 | return btrfs_commit_transaction(trans, root); | 1008 | return btrfs_commit_transaction(trans, root); |
1001 | } | 1009 | } |
@@ -1644,8 +1652,20 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) | |||
1644 | int i = 0, nr_devices; | 1652 | int i = 0, nr_devices; |
1645 | int ret; | 1653 | int ret; |
1646 | 1654 | ||
1655 | /* | ||
1656 | * We aren't under the device list lock, so this is racey-ish, but good | ||
1657 | * enough for our purposes. | ||
1658 | */ | ||
1647 | nr_devices = fs_info->fs_devices->open_devices; | 1659 | nr_devices = fs_info->fs_devices->open_devices; |
1648 | BUG_ON(!nr_devices); | 1660 | if (!nr_devices) { |
1661 | smp_mb(); | ||
1662 | nr_devices = fs_info->fs_devices->open_devices; | ||
1663 | ASSERT(nr_devices); | ||
1664 | if (!nr_devices) { | ||
1665 | *free_bytes = 0; | ||
1666 | return 0; | ||
1667 | } | ||
1668 | } | ||
1649 | 1669 | ||
1650 | devices_info = kmalloc_array(nr_devices, sizeof(*devices_info), | 1670 | devices_info = kmalloc_array(nr_devices, sizeof(*devices_info), |
1651 | GFP_NOFS); | 1671 | GFP_NOFS); |
@@ -1670,11 +1690,17 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) | |||
1670 | else | 1690 | else |
1671 | min_stripe_size = BTRFS_STRIPE_LEN; | 1691 | min_stripe_size = BTRFS_STRIPE_LEN; |
1672 | 1692 | ||
1673 | list_for_each_entry(device, &fs_devices->devices, dev_list) { | 1693 | if (fs_info->alloc_start) |
1694 | mutex_lock(&fs_devices->device_list_mutex); | ||
1695 | rcu_read_lock(); | ||
1696 | list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { | ||
1674 | if (!device->in_fs_metadata || !device->bdev || | 1697 | if (!device->in_fs_metadata || !device->bdev || |
1675 | device->is_tgtdev_for_dev_replace) | 1698 | device->is_tgtdev_for_dev_replace) |
1676 | continue; | 1699 | continue; |
1677 | 1700 | ||
1701 | if (i >= nr_devices) | ||
1702 | break; | ||
1703 | |||
1678 | avail_space = device->total_bytes - device->bytes_used; | 1704 | avail_space = device->total_bytes - device->bytes_used; |
1679 | 1705 | ||
1680 | /* align with stripe_len */ | 1706 | /* align with stripe_len */ |
@@ -1689,24 +1715,32 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) | |||
1689 | skip_space = 1024 * 1024; | 1715 | skip_space = 1024 * 1024; |
1690 | 1716 | ||
1691 | /* user can set the offset in fs_info->alloc_start. */ | 1717 | /* user can set the offset in fs_info->alloc_start. */ |
1692 | if (fs_info->alloc_start + BTRFS_STRIPE_LEN <= | 1718 | if (fs_info->alloc_start && |
1693 | device->total_bytes) | 1719 | fs_info->alloc_start + BTRFS_STRIPE_LEN <= |
1720 | device->total_bytes) { | ||
1721 | rcu_read_unlock(); | ||
1694 | skip_space = max(fs_info->alloc_start, skip_space); | 1722 | skip_space = max(fs_info->alloc_start, skip_space); |
1695 | 1723 | ||
1696 | /* | 1724 | /* |
1697 | * btrfs can not use the free space in [0, skip_space - 1], | 1725 | * btrfs can not use the free space in |
1698 | * we must subtract it from the total. In order to implement | 1726 | * [0, skip_space - 1], we must subtract it from the |
1699 | * it, we account the used space in this range first. | 1727 | * total. In order to implement it, we account the used |
1700 | */ | 1728 | * space in this range first. |
1701 | ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1, | 1729 | */ |
1702 | &used_space); | 1730 | ret = btrfs_account_dev_extents_size(device, 0, |
1703 | if (ret) { | 1731 | skip_space - 1, |
1704 | kfree(devices_info); | 1732 | &used_space); |
1705 | return ret; | 1733 | if (ret) { |
1706 | } | 1734 | kfree(devices_info); |
1735 | mutex_unlock(&fs_devices->device_list_mutex); | ||
1736 | return ret; | ||
1737 | } | ||
1707 | 1738 | ||
1708 | /* calc the free space in [0, skip_space - 1] */ | 1739 | rcu_read_lock(); |
1709 | skip_space -= used_space; | 1740 | |
1741 | /* calc the free space in [0, skip_space - 1] */ | ||
1742 | skip_space -= used_space; | ||
1743 | } | ||
1710 | 1744 | ||
1711 | /* | 1745 | /* |
1712 | * we can use the free space in [0, skip_space - 1], subtract | 1746 | * we can use the free space in [0, skip_space - 1], subtract |
@@ -1725,6 +1759,9 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) | |||
1725 | 1759 | ||
1726 | i++; | 1760 | i++; |
1727 | } | 1761 | } |
1762 | rcu_read_unlock(); | ||
1763 | if (fs_info->alloc_start) | ||
1764 | mutex_unlock(&fs_devices->device_list_mutex); | ||
1728 | 1765 | ||
1729 | nr_devices = i; | 1766 | nr_devices = i; |
1730 | 1767 | ||
@@ -1787,8 +1824,6 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
1787 | * holding chunk_muext to avoid allocating new chunks, holding | 1824 | * holding chunk_muext to avoid allocating new chunks, holding |
1788 | * device_list_mutex to avoid the device being removed | 1825 | * device_list_mutex to avoid the device being removed |
1789 | */ | 1826 | */ |
1790 | mutex_lock(&fs_info->fs_devices->device_list_mutex); | ||
1791 | mutex_lock(&fs_info->chunk_mutex); | ||
1792 | rcu_read_lock(); | 1827 | rcu_read_lock(); |
1793 | list_for_each_entry_rcu(found, head, list) { | 1828 | list_for_each_entry_rcu(found, head, list) { |
1794 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) { | 1829 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) { |
@@ -1824,17 +1859,12 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
1824 | buf->f_bfree -= block_rsv->size >> bits; | 1859 | buf->f_bfree -= block_rsv->size >> bits; |
1825 | spin_unlock(&block_rsv->lock); | 1860 | spin_unlock(&block_rsv->lock); |
1826 | 1861 | ||
1827 | buf->f_bavail = total_free_data; | 1862 | buf->f_bavail = div_u64(total_free_data, factor); |
1828 | ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data); | 1863 | ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data); |
1829 | if (ret) { | 1864 | if (ret) |
1830 | mutex_unlock(&fs_info->chunk_mutex); | ||
1831 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | ||
1832 | return ret; | 1865 | return ret; |
1833 | } | ||
1834 | buf->f_bavail += div_u64(total_free_data, factor); | 1866 | buf->f_bavail += div_u64(total_free_data, factor); |
1835 | buf->f_bavail = buf->f_bavail >> bits; | 1867 | buf->f_bavail = buf->f_bavail >> bits; |
1836 | mutex_unlock(&fs_info->chunk_mutex); | ||
1837 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | ||
1838 | 1868 | ||
1839 | buf->f_type = BTRFS_SUPER_MAGIC; | 1869 | buf->f_type = BTRFS_SUPER_MAGIC; |
1840 | buf->f_bsize = dentry->d_sb->s_blocksize; | 1870 | buf->f_bsize = dentry->d_sb->s_blocksize; |
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index b2e7bb4393f6..92db3f648df4 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c | |||
@@ -111,7 +111,6 @@ static ssize_t btrfs_feature_attr_store(struct kobject *kobj, | |||
111 | { | 111 | { |
112 | struct btrfs_fs_info *fs_info; | 112 | struct btrfs_fs_info *fs_info; |
113 | struct btrfs_feature_attr *fa = to_btrfs_feature_attr(a); | 113 | struct btrfs_feature_attr *fa = to_btrfs_feature_attr(a); |
114 | struct btrfs_trans_handle *trans; | ||
115 | u64 features, set, clear; | 114 | u64 features, set, clear; |
116 | unsigned long val; | 115 | unsigned long val; |
117 | int ret; | 116 | int ret; |
@@ -153,10 +152,6 @@ static ssize_t btrfs_feature_attr_store(struct kobject *kobj, | |||
153 | btrfs_info(fs_info, "%s %s feature flag", | 152 | btrfs_info(fs_info, "%s %s feature flag", |
154 | val ? "Setting" : "Clearing", fa->kobj_attr.attr.name); | 153 | val ? "Setting" : "Clearing", fa->kobj_attr.attr.name); |
155 | 154 | ||
156 | trans = btrfs_start_transaction(fs_info->fs_root, 0); | ||
157 | if (IS_ERR(trans)) | ||
158 | return PTR_ERR(trans); | ||
159 | |||
160 | spin_lock(&fs_info->super_lock); | 155 | spin_lock(&fs_info->super_lock); |
161 | features = get_features(fs_info, fa->feature_set); | 156 | features = get_features(fs_info, fa->feature_set); |
162 | if (val) | 157 | if (val) |
@@ -166,9 +161,11 @@ static ssize_t btrfs_feature_attr_store(struct kobject *kobj, | |||
166 | set_features(fs_info, fa->feature_set, features); | 161 | set_features(fs_info, fa->feature_set, features); |
167 | spin_unlock(&fs_info->super_lock); | 162 | spin_unlock(&fs_info->super_lock); |
168 | 163 | ||
169 | ret = btrfs_commit_transaction(trans, fs_info->fs_root); | 164 | /* |
170 | if (ret) | 165 | * We don't want to do full transaction commit from inside sysfs |
171 | return ret; | 166 | */ |
167 | btrfs_set_pending(fs_info, COMMIT); | ||
168 | wake_up_process(fs_info->transaction_kthread); | ||
172 | 169 | ||
173 | return count; | 170 | return count; |
174 | } | 171 | } |
@@ -372,9 +369,6 @@ static ssize_t btrfs_label_store(struct kobject *kobj, | |||
372 | const char *buf, size_t len) | 369 | const char *buf, size_t len) |
373 | { | 370 | { |
374 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); | 371 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); |
375 | struct btrfs_trans_handle *trans; | ||
376 | struct btrfs_root *root = fs_info->fs_root; | ||
377 | int ret; | ||
378 | size_t p_len; | 372 | size_t p_len; |
379 | 373 | ||
380 | if (fs_info->sb->s_flags & MS_RDONLY) | 374 | if (fs_info->sb->s_flags & MS_RDONLY) |
@@ -389,20 +383,18 @@ static ssize_t btrfs_label_store(struct kobject *kobj, | |||
389 | if (p_len >= BTRFS_LABEL_SIZE) | 383 | if (p_len >= BTRFS_LABEL_SIZE) |
390 | return -EINVAL; | 384 | return -EINVAL; |
391 | 385 | ||
392 | trans = btrfs_start_transaction(root, 0); | 386 | spin_lock(&fs_info->super_lock); |
393 | if (IS_ERR(trans)) | ||
394 | return PTR_ERR(trans); | ||
395 | |||
396 | spin_lock(&root->fs_info->super_lock); | ||
397 | memset(fs_info->super_copy->label, 0, BTRFS_LABEL_SIZE); | 387 | memset(fs_info->super_copy->label, 0, BTRFS_LABEL_SIZE); |
398 | memcpy(fs_info->super_copy->label, buf, p_len); | 388 | memcpy(fs_info->super_copy->label, buf, p_len); |
399 | spin_unlock(&root->fs_info->super_lock); | 389 | spin_unlock(&fs_info->super_lock); |
400 | ret = btrfs_commit_transaction(trans, root); | ||
401 | 390 | ||
402 | if (!ret) | 391 | /* |
403 | return len; | 392 | * We don't want to do full transaction commit from inside sysfs |
393 | */ | ||
394 | btrfs_set_pending(fs_info, COMMIT); | ||
395 | wake_up_process(fs_info->transaction_kthread); | ||
404 | 396 | ||
405 | return ret; | 397 | return len; |
406 | } | 398 | } |
407 | BTRFS_ATTR_RW(label, btrfs_label_show, btrfs_label_store); | 399 | BTRFS_ATTR_RW(label, btrfs_label_show, btrfs_label_store); |
408 | 400 | ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index dcaae3616728..a605d4e2f2bc 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -76,6 +76,32 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) | |||
76 | } | 76 | } |
77 | } | 77 | } |
78 | 78 | ||
79 | static void clear_btree_io_tree(struct extent_io_tree *tree) | ||
80 | { | ||
81 | spin_lock(&tree->lock); | ||
82 | while (!RB_EMPTY_ROOT(&tree->state)) { | ||
83 | struct rb_node *node; | ||
84 | struct extent_state *state; | ||
85 | |||
86 | node = rb_first(&tree->state); | ||
87 | state = rb_entry(node, struct extent_state, rb_node); | ||
88 | rb_erase(&state->rb_node, &tree->state); | ||
89 | RB_CLEAR_NODE(&state->rb_node); | ||
90 | /* | ||
91 | * btree io trees aren't supposed to have tasks waiting for | ||
92 | * changes in the flags of extent states ever. | ||
93 | */ | ||
94 | ASSERT(!waitqueue_active(&state->wq)); | ||
95 | free_extent_state(state); | ||
96 | if (need_resched()) { | ||
97 | spin_unlock(&tree->lock); | ||
98 | cond_resched(); | ||
99 | spin_lock(&tree->lock); | ||
100 | } | ||
101 | } | ||
102 | spin_unlock(&tree->lock); | ||
103 | } | ||
104 | |||
79 | static noinline void switch_commit_roots(struct btrfs_transaction *trans, | 105 | static noinline void switch_commit_roots(struct btrfs_transaction *trans, |
80 | struct btrfs_fs_info *fs_info) | 106 | struct btrfs_fs_info *fs_info) |
81 | { | 107 | { |
@@ -89,6 +115,7 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans, | |||
89 | root->commit_root = btrfs_root_node(root); | 115 | root->commit_root = btrfs_root_node(root); |
90 | if (is_fstree(root->objectid)) | 116 | if (is_fstree(root->objectid)) |
91 | btrfs_unpin_free_ino(root); | 117 | btrfs_unpin_free_ino(root); |
118 | clear_btree_io_tree(&root->dirty_log_pages); | ||
92 | } | 119 | } |
93 | up_write(&fs_info->commit_root_sem); | 120 | up_write(&fs_info->commit_root_sem); |
94 | } | 121 | } |
@@ -220,6 +247,7 @@ loop: | |||
220 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | 247 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); |
221 | INIT_LIST_HEAD(&cur_trans->pending_chunks); | 248 | INIT_LIST_HEAD(&cur_trans->pending_chunks); |
222 | INIT_LIST_HEAD(&cur_trans->switch_commits); | 249 | INIT_LIST_HEAD(&cur_trans->switch_commits); |
250 | INIT_LIST_HEAD(&cur_trans->pending_ordered); | ||
223 | list_add_tail(&cur_trans->list, &fs_info->trans_list); | 251 | list_add_tail(&cur_trans->list, &fs_info->trans_list); |
224 | extent_io_tree_init(&cur_trans->dirty_pages, | 252 | extent_io_tree_init(&cur_trans->dirty_pages, |
225 | fs_info->btree_inode->i_mapping); | 253 | fs_info->btree_inode->i_mapping); |
@@ -488,6 +516,7 @@ again: | |||
488 | h->sync = false; | 516 | h->sync = false; |
489 | INIT_LIST_HEAD(&h->qgroup_ref_list); | 517 | INIT_LIST_HEAD(&h->qgroup_ref_list); |
490 | INIT_LIST_HEAD(&h->new_bgs); | 518 | INIT_LIST_HEAD(&h->new_bgs); |
519 | INIT_LIST_HEAD(&h->ordered); | ||
491 | 520 | ||
492 | smp_mb(); | 521 | smp_mb(); |
493 | if (cur_trans->state >= TRANS_STATE_BLOCKED && | 522 | if (cur_trans->state >= TRANS_STATE_BLOCKED && |
@@ -719,6 +748,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
719 | if (!list_empty(&trans->new_bgs)) | 748 | if (!list_empty(&trans->new_bgs)) |
720 | btrfs_create_pending_block_groups(trans, root); | 749 | btrfs_create_pending_block_groups(trans, root); |
721 | 750 | ||
751 | if (!list_empty(&trans->ordered)) { | ||
752 | spin_lock(&info->trans_lock); | ||
753 | list_splice(&trans->ordered, &cur_trans->pending_ordered); | ||
754 | spin_unlock(&info->trans_lock); | ||
755 | } | ||
756 | |||
722 | trans->delayed_ref_updates = 0; | 757 | trans->delayed_ref_updates = 0; |
723 | if (!trans->sync) { | 758 | if (!trans->sync) { |
724 | must_run_delayed_refs = | 759 | must_run_delayed_refs = |
@@ -828,17 +863,39 @@ int btrfs_write_marked_extents(struct btrfs_root *root, | |||
828 | 863 | ||
829 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, | 864 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, |
830 | mark, &cached_state)) { | 865 | mark, &cached_state)) { |
831 | convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, | 866 | bool wait_writeback = false; |
832 | mark, &cached_state, GFP_NOFS); | 867 | |
833 | cached_state = NULL; | 868 | err = convert_extent_bit(dirty_pages, start, end, |
834 | err = filemap_fdatawrite_range(mapping, start, end); | 869 | EXTENT_NEED_WAIT, |
870 | mark, &cached_state, GFP_NOFS); | ||
871 | /* | ||
872 | * convert_extent_bit can return -ENOMEM, which is most of the | ||
873 | * time a temporary error. So when it happens, ignore the error | ||
874 | * and wait for writeback of this range to finish - because we | ||
875 | * failed to set the bit EXTENT_NEED_WAIT for the range, a call | ||
876 | * to btrfs_wait_marked_extents() would not know that writeback | ||
877 | * for this range started and therefore wouldn't wait for it to | ||
878 | * finish - we don't want to commit a superblock that points to | ||
879 | * btree nodes/leafs for which writeback hasn't finished yet | ||
880 | * (and without errors). | ||
881 | * We cleanup any entries left in the io tree when committing | ||
882 | * the transaction (through clear_btree_io_tree()). | ||
883 | */ | ||
884 | if (err == -ENOMEM) { | ||
885 | err = 0; | ||
886 | wait_writeback = true; | ||
887 | } | ||
888 | if (!err) | ||
889 | err = filemap_fdatawrite_range(mapping, start, end); | ||
835 | if (err) | 890 | if (err) |
836 | werr = err; | 891 | werr = err; |
892 | else if (wait_writeback) | ||
893 | werr = filemap_fdatawait_range(mapping, start, end); | ||
894 | free_extent_state(cached_state); | ||
895 | cached_state = NULL; | ||
837 | cond_resched(); | 896 | cond_resched(); |
838 | start = end + 1; | 897 | start = end + 1; |
839 | } | 898 | } |
840 | if (err) | ||
841 | werr = err; | ||
842 | return werr; | 899 | return werr; |
843 | } | 900 | } |
844 | 901 | ||
@@ -862,11 +919,25 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, | |||
862 | 919 | ||
863 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, | 920 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, |
864 | EXTENT_NEED_WAIT, &cached_state)) { | 921 | EXTENT_NEED_WAIT, &cached_state)) { |
865 | clear_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, | 922 | /* |
866 | 0, 0, &cached_state, GFP_NOFS); | 923 | * Ignore -ENOMEM errors returned by clear_extent_bit(). |
867 | err = filemap_fdatawait_range(mapping, start, end); | 924 | * When committing the transaction, we'll remove any entries |
925 | * left in the io tree. For a log commit, we don't remove them | ||
926 | * after committing the log because the tree can be accessed | ||
927 | * concurrently - we do it only at transaction commit time when | ||
928 | * it's safe to do it (through clear_btree_io_tree()). | ||
929 | */ | ||
930 | err = clear_extent_bit(dirty_pages, start, end, | ||
931 | EXTENT_NEED_WAIT, | ||
932 | 0, 0, &cached_state, GFP_NOFS); | ||
933 | if (err == -ENOMEM) | ||
934 | err = 0; | ||
935 | if (!err) | ||
936 | err = filemap_fdatawait_range(mapping, start, end); | ||
868 | if (err) | 937 | if (err) |
869 | werr = err; | 938 | werr = err; |
939 | free_extent_state(cached_state); | ||
940 | cached_state = NULL; | ||
870 | cond_resched(); | 941 | cond_resched(); |
871 | start = end + 1; | 942 | start = end + 1; |
872 | } | 943 | } |
@@ -919,17 +990,17 @@ static int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | |||
919 | return 0; | 990 | return 0; |
920 | } | 991 | } |
921 | 992 | ||
922 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | 993 | static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, |
923 | struct btrfs_root *root) | 994 | struct btrfs_root *root) |
924 | { | 995 | { |
925 | if (!trans || !trans->transaction) { | 996 | int ret; |
926 | struct inode *btree_inode; | 997 | |
927 | btree_inode = root->fs_info->btree_inode; | 998 | ret = btrfs_write_and_wait_marked_extents(root, |
928 | return filemap_write_and_wait(btree_inode->i_mapping); | ||
929 | } | ||
930 | return btrfs_write_and_wait_marked_extents(root, | ||
931 | &trans->transaction->dirty_pages, | 999 | &trans->transaction->dirty_pages, |
932 | EXTENT_DIRTY); | 1000 | EXTENT_DIRTY); |
1001 | clear_btree_io_tree(&trans->transaction->dirty_pages); | ||
1002 | |||
1003 | return ret; | ||
933 | } | 1004 | } |
934 | 1005 | ||
935 | /* | 1006 | /* |
@@ -1652,6 +1723,28 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) | |||
1652 | btrfs_wait_ordered_roots(fs_info, -1); | 1723 | btrfs_wait_ordered_roots(fs_info, -1); |
1653 | } | 1724 | } |
1654 | 1725 | ||
1726 | static inline void | ||
1727 | btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans, | ||
1728 | struct btrfs_fs_info *fs_info) | ||
1729 | { | ||
1730 | struct btrfs_ordered_extent *ordered; | ||
1731 | |||
1732 | spin_lock(&fs_info->trans_lock); | ||
1733 | while (!list_empty(&cur_trans->pending_ordered)) { | ||
1734 | ordered = list_first_entry(&cur_trans->pending_ordered, | ||
1735 | struct btrfs_ordered_extent, | ||
1736 | trans_list); | ||
1737 | list_del_init(&ordered->trans_list); | ||
1738 | spin_unlock(&fs_info->trans_lock); | ||
1739 | |||
1740 | wait_event(ordered->wait, test_bit(BTRFS_ORDERED_COMPLETE, | ||
1741 | &ordered->flags)); | ||
1742 | btrfs_put_ordered_extent(ordered); | ||
1743 | spin_lock(&fs_info->trans_lock); | ||
1744 | } | ||
1745 | spin_unlock(&fs_info->trans_lock); | ||
1746 | } | ||
1747 | |||
1655 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | 1748 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, |
1656 | struct btrfs_root *root) | 1749 | struct btrfs_root *root) |
1657 | { | 1750 | { |
@@ -1702,6 +1795,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1702 | } | 1795 | } |
1703 | 1796 | ||
1704 | spin_lock(&root->fs_info->trans_lock); | 1797 | spin_lock(&root->fs_info->trans_lock); |
1798 | list_splice(&trans->ordered, &cur_trans->pending_ordered); | ||
1705 | if (cur_trans->state >= TRANS_STATE_COMMIT_START) { | 1799 | if (cur_trans->state >= TRANS_STATE_COMMIT_START) { |
1706 | spin_unlock(&root->fs_info->trans_lock); | 1800 | spin_unlock(&root->fs_info->trans_lock); |
1707 | atomic_inc(&cur_trans->use_count); | 1801 | atomic_inc(&cur_trans->use_count); |
@@ -1754,6 +1848,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1754 | 1848 | ||
1755 | btrfs_wait_delalloc_flush(root->fs_info); | 1849 | btrfs_wait_delalloc_flush(root->fs_info); |
1756 | 1850 | ||
1851 | btrfs_wait_pending_ordered(cur_trans, root->fs_info); | ||
1852 | |||
1757 | btrfs_scrub_pause(root); | 1853 | btrfs_scrub_pause(root); |
1758 | /* | 1854 | /* |
1759 | * Ok now we need to make sure to block out any other joins while we | 1855 | * Ok now we need to make sure to block out any other joins while we |
@@ -1842,13 +1938,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1842 | } | 1938 | } |
1843 | 1939 | ||
1844 | /* | 1940 | /* |
1845 | * Since the transaction is done, we should set the inode map cache flag | 1941 | * Since the transaction is done, we can apply the pending changes |
1846 | * before any other comming transaction. | 1942 | * before the next transaction. |
1847 | */ | 1943 | */ |
1848 | if (btrfs_test_opt(root, CHANGE_INODE_CACHE)) | 1944 | btrfs_apply_pending_changes(root->fs_info); |
1849 | btrfs_set_opt(root->fs_info->mount_opt, INODE_MAP_CACHE); | ||
1850 | else | ||
1851 | btrfs_clear_opt(root->fs_info->mount_opt, INODE_MAP_CACHE); | ||
1852 | 1945 | ||
1853 | /* commit_fs_roots gets rid of all the tree log roots, it is now | 1946 | /* commit_fs_roots gets rid of all the tree log roots, it is now |
1854 | * safe to free the root of tree log roots | 1947 | * safe to free the root of tree log roots |
@@ -2019,3 +2112,32 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) | |||
2019 | 2112 | ||
2020 | return (ret < 0) ? 0 : 1; | 2113 | return (ret < 0) ? 0 : 1; |
2021 | } | 2114 | } |
2115 | |||
2116 | void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info) | ||
2117 | { | ||
2118 | unsigned long prev; | ||
2119 | unsigned long bit; | ||
2120 | |||
2121 | prev = cmpxchg(&fs_info->pending_changes, 0, 0); | ||
2122 | if (!prev) | ||
2123 | return; | ||
2124 | |||
2125 | bit = 1 << BTRFS_PENDING_SET_INODE_MAP_CACHE; | ||
2126 | if (prev & bit) | ||
2127 | btrfs_set_opt(fs_info->mount_opt, INODE_MAP_CACHE); | ||
2128 | prev &= ~bit; | ||
2129 | |||
2130 | bit = 1 << BTRFS_PENDING_CLEAR_INODE_MAP_CACHE; | ||
2131 | if (prev & bit) | ||
2132 | btrfs_clear_opt(fs_info->mount_opt, INODE_MAP_CACHE); | ||
2133 | prev &= ~bit; | ||
2134 | |||
2135 | bit = 1 << BTRFS_PENDING_COMMIT; | ||
2136 | if (prev & bit) | ||
2137 | btrfs_debug(fs_info, "pending commit done"); | ||
2138 | prev &= ~bit; | ||
2139 | |||
2140 | if (prev) | ||
2141 | btrfs_warn(fs_info, | ||
2142 | "unknown pending changes left 0x%lx, ignoring", prev); | ||
2143 | } | ||
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index d8f40e1a5d2d..00ed29c4b3f9 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -56,6 +56,7 @@ struct btrfs_transaction { | |||
56 | wait_queue_head_t commit_wait; | 56 | wait_queue_head_t commit_wait; |
57 | struct list_head pending_snapshots; | 57 | struct list_head pending_snapshots; |
58 | struct list_head pending_chunks; | 58 | struct list_head pending_chunks; |
59 | struct list_head pending_ordered; | ||
59 | struct list_head switch_commits; | 60 | struct list_head switch_commits; |
60 | struct btrfs_delayed_ref_root delayed_refs; | 61 | struct btrfs_delayed_ref_root delayed_refs; |
61 | int aborted; | 62 | int aborted; |
@@ -105,6 +106,7 @@ struct btrfs_trans_handle { | |||
105 | */ | 106 | */ |
106 | struct btrfs_root *root; | 107 | struct btrfs_root *root; |
107 | struct seq_list delayed_ref_elem; | 108 | struct seq_list delayed_ref_elem; |
109 | struct list_head ordered; | ||
108 | struct list_head qgroup_ref_list; | 110 | struct list_head qgroup_ref_list; |
109 | struct list_head new_bgs; | 111 | struct list_head new_bgs; |
110 | }; | 112 | }; |
@@ -145,8 +147,6 @@ struct btrfs_trans_handle *btrfs_attach_transaction_barrier( | |||
145 | struct btrfs_root *root); | 147 | struct btrfs_root *root); |
146 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root); | 148 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root); |
147 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); | 149 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); |
148 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | ||
149 | struct btrfs_root *root); | ||
150 | 150 | ||
151 | void btrfs_add_dead_root(struct btrfs_root *root); | 151 | void btrfs_add_dead_root(struct btrfs_root *root); |
152 | int btrfs_defrag_root(struct btrfs_root *root); | 152 | int btrfs_defrag_root(struct btrfs_root *root); |
@@ -170,4 +170,6 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, | |||
170 | int btrfs_transaction_blocked(struct btrfs_fs_info *info); | 170 | int btrfs_transaction_blocked(struct btrfs_fs_info *info); |
171 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info); | 171 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info); |
172 | void btrfs_put_transaction(struct btrfs_transaction *transaction); | 172 | void btrfs_put_transaction(struct btrfs_transaction *transaction); |
173 | void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info); | ||
174 | |||
173 | #endif | 175 | #endif |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 286213cec861..9a02da16f2be 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -2599,12 +2599,14 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2599 | index2 = root_log_ctx.log_transid % 2; | 2599 | index2 = root_log_ctx.log_transid % 2; |
2600 | if (atomic_read(&log_root_tree->log_commit[index2])) { | 2600 | if (atomic_read(&log_root_tree->log_commit[index2])) { |
2601 | blk_finish_plug(&plug); | 2601 | blk_finish_plug(&plug); |
2602 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2602 | ret = btrfs_wait_marked_extents(log, &log->dirty_log_pages, |
2603 | mark); | ||
2604 | btrfs_wait_logged_extents(trans, log, log_transid); | ||
2603 | wait_log_commit(trans, log_root_tree, | 2605 | wait_log_commit(trans, log_root_tree, |
2604 | root_log_ctx.log_transid); | 2606 | root_log_ctx.log_transid); |
2605 | btrfs_free_logged_extents(log, log_transid); | ||
2606 | mutex_unlock(&log_root_tree->log_mutex); | 2607 | mutex_unlock(&log_root_tree->log_mutex); |
2607 | ret = root_log_ctx.log_ret; | 2608 | if (!ret) |
2609 | ret = root_log_ctx.log_ret; | ||
2608 | goto out; | 2610 | goto out; |
2609 | } | 2611 | } |
2610 | ASSERT(root_log_ctx.log_transid == log_root_tree->log_transid); | 2612 | ASSERT(root_log_ctx.log_transid == log_root_tree->log_transid); |
@@ -2641,11 +2643,18 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2641 | mutex_unlock(&log_root_tree->log_mutex); | 2643 | mutex_unlock(&log_root_tree->log_mutex); |
2642 | goto out_wake_log_root; | 2644 | goto out_wake_log_root; |
2643 | } | 2645 | } |
2644 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2646 | ret = btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2645 | btrfs_wait_marked_extents(log_root_tree, | 2647 | if (!ret) |
2646 | &log_root_tree->dirty_log_pages, | 2648 | ret = btrfs_wait_marked_extents(log_root_tree, |
2647 | EXTENT_NEW | EXTENT_DIRTY); | 2649 | &log_root_tree->dirty_log_pages, |
2648 | btrfs_wait_logged_extents(log, log_transid); | 2650 | EXTENT_NEW | EXTENT_DIRTY); |
2651 | if (ret) { | ||
2652 | btrfs_set_log_full_commit(root->fs_info, trans); | ||
2653 | btrfs_free_logged_extents(log, log_transid); | ||
2654 | mutex_unlock(&log_root_tree->log_mutex); | ||
2655 | goto out_wake_log_root; | ||
2656 | } | ||
2657 | btrfs_wait_logged_extents(trans, log, log_transid); | ||
2649 | 2658 | ||
2650 | btrfs_set_super_log_root(root->fs_info->super_for_commit, | 2659 | btrfs_set_super_log_root(root->fs_info->super_for_commit, |
2651 | log_root_tree->node->start); | 2660 | log_root_tree->node->start); |
@@ -3626,6 +3635,12 @@ static int wait_ordered_extents(struct btrfs_trans_handle *trans, | |||
3626 | test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))); | 3635 | test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))); |
3627 | 3636 | ||
3628 | if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) { | 3637 | if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) { |
3638 | /* | ||
3639 | * Clear the AS_EIO/AS_ENOSPC flags from the inode's | ||
3640 | * i_mapping flags, so that the next fsync won't get | ||
3641 | * an outdated io error too. | ||
3642 | */ | ||
3643 | btrfs_inode_check_errors(inode); | ||
3629 | *ordered_io_error = true; | 3644 | *ordered_io_error = true; |
3630 | break; | 3645 | break; |
3631 | } | 3646 | } |
@@ -3766,7 +3781,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
3766 | fi = btrfs_item_ptr(leaf, path->slots[0], | 3781 | fi = btrfs_item_ptr(leaf, path->slots[0], |
3767 | struct btrfs_file_extent_item); | 3782 | struct btrfs_file_extent_item); |
3768 | 3783 | ||
3769 | btrfs_set_token_file_extent_generation(leaf, fi, em->generation, | 3784 | btrfs_set_token_file_extent_generation(leaf, fi, trans->transid, |
3770 | &token); | 3785 | &token); |
3771 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) | 3786 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) |
3772 | btrfs_set_token_file_extent_type(leaf, fi, | 3787 | btrfs_set_token_file_extent_type(leaf, fi, |
@@ -3963,7 +3978,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
3963 | 3978 | ||
3964 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 3979 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
3965 | 3980 | ||
3966 | btrfs_get_logged_extents(inode, &logged_list); | 3981 | btrfs_get_logged_extents(inode, &logged_list, start, end); |
3967 | 3982 | ||
3968 | /* | 3983 | /* |
3969 | * a brute force approach to making sure we get the most uptodate | 3984 | * a brute force approach to making sure we get the most uptodate |
@@ -4089,6 +4104,21 @@ log_extents: | |||
4089 | btrfs_release_path(path); | 4104 | btrfs_release_path(path); |
4090 | btrfs_release_path(dst_path); | 4105 | btrfs_release_path(dst_path); |
4091 | if (fast_search) { | 4106 | if (fast_search) { |
4107 | /* | ||
4108 | * Some ordered extents started by fsync might have completed | ||
4109 | * before we collected the ordered extents in logged_list, which | ||
4110 | * means they're gone, not in our logged_list nor in the inode's | ||
4111 | * ordered tree. We want the application/user space to know an | ||
4112 | * error happened while attempting to persist file data so that | ||
4113 | * it can take proper action. If such error happened, we leave | ||
4114 | * without writing to the log tree and the fsync must report the | ||
4115 | * file data write error and not commit the current transaction. | ||
4116 | */ | ||
4117 | err = btrfs_inode_check_errors(inode); | ||
4118 | if (err) { | ||
4119 | ctx->io_err = err; | ||
4120 | goto out_unlock; | ||
4121 | } | ||
4092 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path, | 4122 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path, |
4093 | &logged_list, ctx); | 4123 | &logged_list, ctx); |
4094 | if (ret) { | 4124 | if (ret) { |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d47289c715c8..0144790e296e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -53,16 +53,6 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); | |||
53 | DEFINE_MUTEX(uuid_mutex); | 53 | DEFINE_MUTEX(uuid_mutex); |
54 | static LIST_HEAD(fs_uuids); | 54 | static LIST_HEAD(fs_uuids); |
55 | 55 | ||
56 | static void lock_chunks(struct btrfs_root *root) | ||
57 | { | ||
58 | mutex_lock(&root->fs_info->chunk_mutex); | ||
59 | } | ||
60 | |||
61 | static void unlock_chunks(struct btrfs_root *root) | ||
62 | { | ||
63 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
64 | } | ||
65 | |||
66 | static struct btrfs_fs_devices *__alloc_fs_devices(void) | 56 | static struct btrfs_fs_devices *__alloc_fs_devices(void) |
67 | { | 57 | { |
68 | struct btrfs_fs_devices *fs_devs; | 58 | struct btrfs_fs_devices *fs_devs; |
@@ -1068,9 +1058,11 @@ static int contains_pending_extent(struct btrfs_trans_handle *trans, | |||
1068 | u64 *start, u64 len) | 1058 | u64 *start, u64 len) |
1069 | { | 1059 | { |
1070 | struct extent_map *em; | 1060 | struct extent_map *em; |
1061 | struct list_head *search_list = &trans->transaction->pending_chunks; | ||
1071 | int ret = 0; | 1062 | int ret = 0; |
1072 | 1063 | ||
1073 | list_for_each_entry(em, &trans->transaction->pending_chunks, list) { | 1064 | again: |
1065 | list_for_each_entry(em, search_list, list) { | ||
1074 | struct map_lookup *map; | 1066 | struct map_lookup *map; |
1075 | int i; | 1067 | int i; |
1076 | 1068 | ||
@@ -1087,6 +1079,10 @@ static int contains_pending_extent(struct btrfs_trans_handle *trans, | |||
1087 | ret = 1; | 1079 | ret = 1; |
1088 | } | 1080 | } |
1089 | } | 1081 | } |
1082 | if (search_list == &trans->transaction->pending_chunks) { | ||
1083 | search_list = &trans->root->fs_info->pinned_chunks; | ||
1084 | goto again; | ||
1085 | } | ||
1090 | 1086 | ||
1091 | return ret; | 1087 | return ret; |
1092 | } | 1088 | } |
@@ -1800,8 +1796,8 @@ error_undo: | |||
1800 | goto error_brelse; | 1796 | goto error_brelse; |
1801 | } | 1797 | } |
1802 | 1798 | ||
1803 | void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, | 1799 | void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, |
1804 | struct btrfs_device *srcdev) | 1800 | struct btrfs_device *srcdev) |
1805 | { | 1801 | { |
1806 | struct btrfs_fs_devices *fs_devices; | 1802 | struct btrfs_fs_devices *fs_devices; |
1807 | 1803 | ||
@@ -1829,6 +1825,12 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, | |||
1829 | 1825 | ||
1830 | if (srcdev->bdev) | 1826 | if (srcdev->bdev) |
1831 | fs_devices->open_devices--; | 1827 | fs_devices->open_devices--; |
1828 | } | ||
1829 | |||
1830 | void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, | ||
1831 | struct btrfs_device *srcdev) | ||
1832 | { | ||
1833 | struct btrfs_fs_devices *fs_devices = srcdev->fs_devices; | ||
1832 | 1834 | ||
1833 | call_rcu(&srcdev->rcu, free_device); | 1835 | call_rcu(&srcdev->rcu, free_device); |
1834 | 1836 | ||
@@ -2647,18 +2649,12 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, | |||
2647 | } | 2649 | } |
2648 | } | 2650 | } |
2649 | 2651 | ||
2650 | ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); | 2652 | ret = btrfs_remove_block_group(trans, extent_root, chunk_offset, em); |
2651 | if (ret) { | 2653 | if (ret) { |
2652 | btrfs_abort_transaction(trans, extent_root, ret); | 2654 | btrfs_abort_transaction(trans, extent_root, ret); |
2653 | goto out; | 2655 | goto out; |
2654 | } | 2656 | } |
2655 | 2657 | ||
2656 | write_lock(&em_tree->lock); | ||
2657 | remove_extent_mapping(em_tree, em); | ||
2658 | write_unlock(&em_tree->lock); | ||
2659 | |||
2660 | /* once for the tree */ | ||
2661 | free_extent_map(em); | ||
2662 | out: | 2658 | out: |
2663 | /* once for us */ | 2659 | /* once for us */ |
2664 | free_extent_map(em); | 2660 | free_extent_map(em); |
@@ -4505,6 +4501,8 @@ error_del_extent: | |||
4505 | free_extent_map(em); | 4501 | free_extent_map(em); |
4506 | /* One for the tree reference */ | 4502 | /* One for the tree reference */ |
4507 | free_extent_map(em); | 4503 | free_extent_map(em); |
4504 | /* One for the pending_chunks list reference */ | ||
4505 | free_extent_map(em); | ||
4508 | error: | 4506 | error: |
4509 | kfree(devices_info); | 4507 | kfree(devices_info); |
4510 | return ret; | 4508 | return ret; |
@@ -4881,13 +4879,15 @@ static inline int parity_smaller(u64 a, u64 b) | |||
4881 | static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map) | 4879 | static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map) |
4882 | { | 4880 | { |
4883 | struct btrfs_bio_stripe s; | 4881 | struct btrfs_bio_stripe s; |
4882 | int real_stripes = bbio->num_stripes - bbio->num_tgtdevs; | ||
4884 | int i; | 4883 | int i; |
4885 | u64 l; | 4884 | u64 l; |
4886 | int again = 1; | 4885 | int again = 1; |
4886 | int m; | ||
4887 | 4887 | ||
4888 | while (again) { | 4888 | while (again) { |
4889 | again = 0; | 4889 | again = 0; |
4890 | for (i = 0; i < bbio->num_stripes - 1; i++) { | 4890 | for (i = 0; i < real_stripes - 1; i++) { |
4891 | if (parity_smaller(raid_map[i], raid_map[i+1])) { | 4891 | if (parity_smaller(raid_map[i], raid_map[i+1])) { |
4892 | s = bbio->stripes[i]; | 4892 | s = bbio->stripes[i]; |
4893 | l = raid_map[i]; | 4893 | l = raid_map[i]; |
@@ -4895,6 +4895,14 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map) | |||
4895 | raid_map[i] = raid_map[i+1]; | 4895 | raid_map[i] = raid_map[i+1]; |
4896 | bbio->stripes[i+1] = s; | 4896 | bbio->stripes[i+1] = s; |
4897 | raid_map[i+1] = l; | 4897 | raid_map[i+1] = l; |
4898 | |||
4899 | if (bbio->tgtdev_map) { | ||
4900 | m = bbio->tgtdev_map[i]; | ||
4901 | bbio->tgtdev_map[i] = | ||
4902 | bbio->tgtdev_map[i + 1]; | ||
4903 | bbio->tgtdev_map[i + 1] = m; | ||
4904 | } | ||
4905 | |||
4898 | again = 1; | 4906 | again = 1; |
4899 | } | 4907 | } |
4900 | } | 4908 | } |
@@ -4923,6 +4931,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
4923 | int ret = 0; | 4931 | int ret = 0; |
4924 | int num_stripes; | 4932 | int num_stripes; |
4925 | int max_errors = 0; | 4933 | int max_errors = 0; |
4934 | int tgtdev_indexes = 0; | ||
4926 | struct btrfs_bio *bbio = NULL; | 4935 | struct btrfs_bio *bbio = NULL; |
4927 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | 4936 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; |
4928 | int dev_replace_is_ongoing = 0; | 4937 | int dev_replace_is_ongoing = 0; |
@@ -5161,15 +5170,14 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5161 | BTRFS_BLOCK_GROUP_RAID6)) { | 5170 | BTRFS_BLOCK_GROUP_RAID6)) { |
5162 | u64 tmp; | 5171 | u64 tmp; |
5163 | 5172 | ||
5164 | if (bbio_ret && ((rw & REQ_WRITE) || mirror_num > 1) | 5173 | if (raid_map_ret && |
5165 | && raid_map_ret) { | 5174 | ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || |
5175 | mirror_num > 1)) { | ||
5166 | int i, rot; | 5176 | int i, rot; |
5167 | 5177 | ||
5168 | /* push stripe_nr back to the start of the full stripe */ | 5178 | /* push stripe_nr back to the start of the full stripe */ |
5169 | stripe_nr = raid56_full_stripe_start; | 5179 | stripe_nr = raid56_full_stripe_start; |
5170 | do_div(stripe_nr, stripe_len); | 5180 | do_div(stripe_nr, stripe_len * nr_data_stripes(map)); |
5171 | |||
5172 | stripe_index = do_div(stripe_nr, nr_data_stripes(map)); | ||
5173 | 5181 | ||
5174 | /* RAID[56] write or recovery. Return all stripes */ | 5182 | /* RAID[56] write or recovery. Return all stripes */ |
5175 | num_stripes = map->num_stripes; | 5183 | num_stripes = map->num_stripes; |
@@ -5235,14 +5243,19 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5235 | num_alloc_stripes <<= 1; | 5243 | num_alloc_stripes <<= 1; |
5236 | if (rw & REQ_GET_READ_MIRRORS) | 5244 | if (rw & REQ_GET_READ_MIRRORS) |
5237 | num_alloc_stripes++; | 5245 | num_alloc_stripes++; |
5246 | tgtdev_indexes = num_stripes; | ||
5238 | } | 5247 | } |
5239 | bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS); | 5248 | |
5249 | bbio = kzalloc(btrfs_bio_size(num_alloc_stripes, tgtdev_indexes), | ||
5250 | GFP_NOFS); | ||
5240 | if (!bbio) { | 5251 | if (!bbio) { |
5241 | kfree(raid_map); | 5252 | kfree(raid_map); |
5242 | ret = -ENOMEM; | 5253 | ret = -ENOMEM; |
5243 | goto out; | 5254 | goto out; |
5244 | } | 5255 | } |
5245 | atomic_set(&bbio->error, 0); | 5256 | atomic_set(&bbio->error, 0); |
5257 | if (dev_replace_is_ongoing) | ||
5258 | bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes); | ||
5246 | 5259 | ||
5247 | if (rw & REQ_DISCARD) { | 5260 | if (rw & REQ_DISCARD) { |
5248 | int factor = 0; | 5261 | int factor = 0; |
@@ -5327,6 +5340,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5327 | if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) | 5340 | if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) |
5328 | max_errors = btrfs_chunk_max_errors(map); | 5341 | max_errors = btrfs_chunk_max_errors(map); |
5329 | 5342 | ||
5343 | tgtdev_indexes = 0; | ||
5330 | if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) && | 5344 | if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) && |
5331 | dev_replace->tgtdev != NULL) { | 5345 | dev_replace->tgtdev != NULL) { |
5332 | int index_where_to_add; | 5346 | int index_where_to_add; |
@@ -5355,8 +5369,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5355 | new->physical = old->physical; | 5369 | new->physical = old->physical; |
5356 | new->length = old->length; | 5370 | new->length = old->length; |
5357 | new->dev = dev_replace->tgtdev; | 5371 | new->dev = dev_replace->tgtdev; |
5372 | bbio->tgtdev_map[i] = index_where_to_add; | ||
5358 | index_where_to_add++; | 5373 | index_where_to_add++; |
5359 | max_errors++; | 5374 | max_errors++; |
5375 | tgtdev_indexes++; | ||
5360 | } | 5376 | } |
5361 | } | 5377 | } |
5362 | num_stripes = index_where_to_add; | 5378 | num_stripes = index_where_to_add; |
@@ -5402,7 +5418,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5402 | tgtdev_stripe->length = | 5418 | tgtdev_stripe->length = |
5403 | bbio->stripes[index_srcdev].length; | 5419 | bbio->stripes[index_srcdev].length; |
5404 | tgtdev_stripe->dev = dev_replace->tgtdev; | 5420 | tgtdev_stripe->dev = dev_replace->tgtdev; |
5421 | bbio->tgtdev_map[index_srcdev] = num_stripes; | ||
5405 | 5422 | ||
5423 | tgtdev_indexes++; | ||
5406 | num_stripes++; | 5424 | num_stripes++; |
5407 | } | 5425 | } |
5408 | } | 5426 | } |
@@ -5412,6 +5430,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5412 | bbio->num_stripes = num_stripes; | 5430 | bbio->num_stripes = num_stripes; |
5413 | bbio->max_errors = max_errors; | 5431 | bbio->max_errors = max_errors; |
5414 | bbio->mirror_num = mirror_num; | 5432 | bbio->mirror_num = mirror_num; |
5433 | bbio->num_tgtdevs = tgtdev_indexes; | ||
5415 | 5434 | ||
5416 | /* | 5435 | /* |
5417 | * this is the case that REQ_READ && dev_replace_is_ongoing && | 5436 | * this is the case that REQ_READ && dev_replace_is_ongoing && |
@@ -5443,6 +5462,16 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5443 | mirror_num, NULL); | 5462 | mirror_num, NULL); |
5444 | } | 5463 | } |
5445 | 5464 | ||
5465 | /* For Scrub/replace */ | ||
5466 | int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw, | ||
5467 | u64 logical, u64 *length, | ||
5468 | struct btrfs_bio **bbio_ret, int mirror_num, | ||
5469 | u64 **raid_map_ret) | ||
5470 | { | ||
5471 | return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret, | ||
5472 | mirror_num, raid_map_ret); | ||
5473 | } | ||
5474 | |||
5446 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | 5475 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, |
5447 | u64 chunk_start, u64 physical, u64 devid, | 5476 | u64 chunk_start, u64 physical, u64 devid, |
5448 | u64 **logical, int *naddrs, int *stripe_len) | 5477 | u64 **logical, int *naddrs, int *stripe_len) |
@@ -5812,12 +5841,9 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
5812 | } else { | 5841 | } else { |
5813 | ret = raid56_parity_recover(root, bio, bbio, | 5842 | ret = raid56_parity_recover(root, bio, bbio, |
5814 | raid_map, map_length, | 5843 | raid_map, map_length, |
5815 | mirror_num); | 5844 | mirror_num, 1); |
5816 | } | 5845 | } |
5817 | /* | 5846 | |
5818 | * FIXME, replace dosen't support raid56 yet, please fix | ||
5819 | * it in the future. | ||
5820 | */ | ||
5821 | btrfs_bio_counter_dec(root->fs_info); | 5847 | btrfs_bio_counter_dec(root->fs_info); |
5822 | return ret; | 5848 | return ret; |
5823 | } | 5849 | } |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 08980fa23039..d6fe73c0f4a2 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -292,7 +292,7 @@ struct btrfs_bio_stripe { | |||
292 | struct btrfs_bio; | 292 | struct btrfs_bio; |
293 | typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); | 293 | typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); |
294 | 294 | ||
295 | #define BTRFS_BIO_ORIG_BIO_SUBMITTED 0x1 | 295 | #define BTRFS_BIO_ORIG_BIO_SUBMITTED (1 << 0) |
296 | 296 | ||
297 | struct btrfs_bio { | 297 | struct btrfs_bio { |
298 | atomic_t stripes_pending; | 298 | atomic_t stripes_pending; |
@@ -305,6 +305,8 @@ struct btrfs_bio { | |||
305 | int max_errors; | 305 | int max_errors; |
306 | int num_stripes; | 306 | int num_stripes; |
307 | int mirror_num; | 307 | int mirror_num; |
308 | int num_tgtdevs; | ||
309 | int *tgtdev_map; | ||
308 | struct btrfs_bio_stripe stripes[]; | 310 | struct btrfs_bio_stripe stripes[]; |
309 | }; | 311 | }; |
310 | 312 | ||
@@ -387,12 +389,18 @@ struct btrfs_balance_control { | |||
387 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | 389 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, |
388 | u64 end, u64 *length); | 390 | u64 end, u64 *length); |
389 | 391 | ||
390 | #define btrfs_bio_size(n) (sizeof(struct btrfs_bio) + \ | 392 | #define btrfs_bio_size(total_stripes, real_stripes) \ |
391 | (sizeof(struct btrfs_bio_stripe) * (n))) | 393 | (sizeof(struct btrfs_bio) + \ |
394 | (sizeof(struct btrfs_bio_stripe) * (total_stripes)) + \ | ||
395 | (sizeof(int) * (real_stripes))) | ||
392 | 396 | ||
393 | int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | 397 | int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, |
394 | u64 logical, u64 *length, | 398 | u64 logical, u64 *length, |
395 | struct btrfs_bio **bbio_ret, int mirror_num); | 399 | struct btrfs_bio **bbio_ret, int mirror_num); |
400 | int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw, | ||
401 | u64 logical, u64 *length, | ||
402 | struct btrfs_bio **bbio_ret, int mirror_num, | ||
403 | u64 **raid_map_ret); | ||
396 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | 404 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, |
397 | u64 chunk_start, u64 physical, u64 devid, | 405 | u64 chunk_start, u64 physical, u64 devid, |
398 | u64 **logical, int *naddrs, int *stripe_len); | 406 | u64 **logical, int *naddrs, int *stripe_len); |
@@ -448,8 +456,10 @@ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info); | |||
448 | int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); | 456 | int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); |
449 | int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, | 457 | int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, |
450 | struct btrfs_fs_info *fs_info); | 458 | struct btrfs_fs_info *fs_info); |
451 | void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, | 459 | void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, |
452 | struct btrfs_device *srcdev); | 460 | struct btrfs_device *srcdev); |
461 | void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, | ||
462 | struct btrfs_device *srcdev); | ||
453 | void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, | 463 | void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, |
454 | struct btrfs_device *tgtdev); | 464 | struct btrfs_device *tgtdev); |
455 | void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info, | 465 | void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info, |
@@ -513,4 +523,16 @@ static inline void btrfs_dev_stat_reset(struct btrfs_device *dev, | |||
513 | void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info); | 523 | void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info); |
514 | void btrfs_update_commit_device_bytes_used(struct btrfs_root *root, | 524 | void btrfs_update_commit_device_bytes_used(struct btrfs_root *root, |
515 | struct btrfs_transaction *transaction); | 525 | struct btrfs_transaction *transaction); |
526 | |||
527 | static inline void lock_chunks(struct btrfs_root *root) | ||
528 | { | ||
529 | mutex_lock(&root->fs_info->chunk_mutex); | ||
530 | } | ||
531 | |||
532 | static inline void unlock_chunks(struct btrfs_root *root) | ||
533 | { | ||
534 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
535 | } | ||
536 | |||
537 | |||
516 | #endif | 538 | #endif |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index dcf20131fbe4..47b19465f0dc 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include "xattr.h" | 29 | #include "xattr.h" |
30 | #include "disk-io.h" | 30 | #include "disk-io.h" |
31 | #include "props.h" | 31 | #include "props.h" |
32 | #include "locking.h" | ||
32 | 33 | ||
33 | 34 | ||
34 | ssize_t __btrfs_getxattr(struct inode *inode, const char *name, | 35 | ssize_t __btrfs_getxattr(struct inode *inode, const char *name, |
@@ -91,7 +92,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans, | |||
91 | struct inode *inode, const char *name, | 92 | struct inode *inode, const char *name, |
92 | const void *value, size_t size, int flags) | 93 | const void *value, size_t size, int flags) |
93 | { | 94 | { |
94 | struct btrfs_dir_item *di; | 95 | struct btrfs_dir_item *di = NULL; |
95 | struct btrfs_root *root = BTRFS_I(inode)->root; | 96 | struct btrfs_root *root = BTRFS_I(inode)->root; |
96 | struct btrfs_path *path; | 97 | struct btrfs_path *path; |
97 | size_t name_len = strlen(name); | 98 | size_t name_len = strlen(name); |
@@ -103,84 +104,119 @@ static int do_setxattr(struct btrfs_trans_handle *trans, | |||
103 | path = btrfs_alloc_path(); | 104 | path = btrfs_alloc_path(); |
104 | if (!path) | 105 | if (!path) |
105 | return -ENOMEM; | 106 | return -ENOMEM; |
107 | path->skip_release_on_error = 1; | ||
108 | |||
109 | if (!value) { | ||
110 | di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), | ||
111 | name, name_len, -1); | ||
112 | if (!di && (flags & XATTR_REPLACE)) | ||
113 | ret = -ENODATA; | ||
114 | else if (di) | ||
115 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | ||
116 | goto out; | ||
117 | } | ||
106 | 118 | ||
119 | /* | ||
120 | * For a replace we can't just do the insert blindly. | ||
121 | * Do a lookup first (read-only btrfs_search_slot), and return if xattr | ||
122 | * doesn't exist. If it exists, fall down below to the insert/replace | ||
123 | * path - we can't race with a concurrent xattr delete, because the VFS | ||
124 | * locks the inode's i_mutex before calling setxattr or removexattr. | ||
125 | */ | ||
107 | if (flags & XATTR_REPLACE) { | 126 | if (flags & XATTR_REPLACE) { |
108 | di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name, | 127 | ASSERT(mutex_is_locked(&inode->i_mutex)); |
109 | name_len, -1); | 128 | di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), |
110 | if (IS_ERR(di)) { | 129 | name, name_len, 0); |
111 | ret = PTR_ERR(di); | 130 | if (!di) { |
112 | goto out; | ||
113 | } else if (!di) { | ||
114 | ret = -ENODATA; | 131 | ret = -ENODATA; |
115 | goto out; | 132 | goto out; |
116 | } | 133 | } |
117 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | ||
118 | if (ret) | ||
119 | goto out; | ||
120 | btrfs_release_path(path); | 134 | btrfs_release_path(path); |
135 | di = NULL; | ||
136 | } | ||
121 | 137 | ||
138 | ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), | ||
139 | name, name_len, value, size); | ||
140 | if (ret == -EOVERFLOW) { | ||
122 | /* | 141 | /* |
123 | * remove the attribute | 142 | * We have an existing item in a leaf, split_leaf couldn't |
143 | * expand it. That item might have or not a dir_item that | ||
144 | * matches our target xattr, so lets check. | ||
124 | */ | 145 | */ |
125 | if (!value) | 146 | ret = 0; |
126 | goto out; | 147 | btrfs_assert_tree_locked(path->nodes[0]); |
127 | } else { | 148 | di = btrfs_match_dir_item_name(root, path, name, name_len); |
128 | di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), | 149 | if (!di && !(flags & XATTR_REPLACE)) { |
129 | name, name_len, 0); | 150 | ret = -ENOSPC; |
130 | if (IS_ERR(di)) { | ||
131 | ret = PTR_ERR(di); | ||
132 | goto out; | 151 | goto out; |
133 | } | 152 | } |
134 | if (!di && !value) | 153 | } else if (ret == -EEXIST) { |
135 | goto out; | 154 | ret = 0; |
136 | btrfs_release_path(path); | 155 | di = btrfs_match_dir_item_name(root, path, name, name_len); |
156 | ASSERT(di); /* logic error */ | ||
157 | } else if (ret) { | ||
158 | goto out; | ||
137 | } | 159 | } |
138 | 160 | ||
139 | again: | 161 | if (di && (flags & XATTR_CREATE)) { |
140 | ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), | ||
141 | name, name_len, value, size); | ||
142 | /* | ||
143 | * If we're setting an xattr to a new value but the new value is say | ||
144 | * exactly BTRFS_MAX_XATTR_SIZE, we could end up with EOVERFLOW getting | ||
145 | * back from split_leaf. This is because it thinks we'll be extending | ||
146 | * the existing item size, but we're asking for enough space to add the | ||
147 | * item itself. So if we get EOVERFLOW just set ret to EEXIST and let | ||
148 | * the rest of the function figure it out. | ||
149 | */ | ||
150 | if (ret == -EOVERFLOW) | ||
151 | ret = -EEXIST; | 162 | ret = -EEXIST; |
163 | goto out; | ||
164 | } | ||
152 | 165 | ||
153 | if (ret == -EEXIST) { | 166 | if (di) { |
154 | if (flags & XATTR_CREATE) | ||
155 | goto out; | ||
156 | /* | 167 | /* |
157 | * We can't use the path we already have since we won't have the | 168 | * We're doing a replace, and it must be atomic, that is, at |
158 | * proper locking for a delete, so release the path and | 169 | * any point in time we have either the old or the new xattr |
159 | * re-lookup to delete the thing. | 170 | * value in the tree. We don't want readers (getxattr and |
171 | * listxattrs) to miss a value, this is specially important | ||
172 | * for ACLs. | ||
160 | */ | 173 | */ |
161 | btrfs_release_path(path); | 174 | const int slot = path->slots[0]; |
162 | di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), | 175 | struct extent_buffer *leaf = path->nodes[0]; |
163 | name, name_len, -1); | 176 | const u16 old_data_len = btrfs_dir_data_len(leaf, di); |
164 | if (IS_ERR(di)) { | 177 | const u32 item_size = btrfs_item_size_nr(leaf, slot); |
165 | ret = PTR_ERR(di); | 178 | const u32 data_size = sizeof(*di) + name_len + size; |
166 | goto out; | 179 | struct btrfs_item *item; |
167 | } else if (!di) { | 180 | unsigned long data_ptr; |
168 | /* Shouldn't happen but just in case... */ | 181 | char *ptr; |
169 | btrfs_release_path(path); | 182 | |
170 | goto again; | 183 | if (size > old_data_len) { |
184 | if (btrfs_leaf_free_space(root, leaf) < | ||
185 | (size - old_data_len)) { | ||
186 | ret = -ENOSPC; | ||
187 | goto out; | ||
188 | } | ||
171 | } | 189 | } |
172 | 190 | ||
173 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | 191 | if (old_data_len + name_len + sizeof(*di) == item_size) { |
174 | if (ret) | 192 | /* No other xattrs packed in the same leaf item. */ |
175 | goto out; | 193 | if (size > old_data_len) |
194 | btrfs_extend_item(root, path, | ||
195 | size - old_data_len); | ||
196 | else if (size < old_data_len) | ||
197 | btrfs_truncate_item(root, path, data_size, 1); | ||
198 | } else { | ||
199 | /* There are other xattrs packed in the same item. */ | ||
200 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | ||
201 | if (ret) | ||
202 | goto out; | ||
203 | btrfs_extend_item(root, path, data_size); | ||
204 | } | ||
176 | 205 | ||
206 | item = btrfs_item_nr(slot); | ||
207 | ptr = btrfs_item_ptr(leaf, slot, char); | ||
208 | ptr += btrfs_item_size(leaf, item) - data_size; | ||
209 | di = (struct btrfs_dir_item *)ptr; | ||
210 | btrfs_set_dir_data_len(leaf, di, size); | ||
211 | data_ptr = ((unsigned long)(di + 1)) + name_len; | ||
212 | write_extent_buffer(leaf, value, data_ptr, size); | ||
213 | btrfs_mark_buffer_dirty(leaf); | ||
214 | } else { | ||
177 | /* | 215 | /* |
178 | * We have a value to set, so go back and try to insert it now. | 216 | * Insert, and we had space for the xattr, so path->slots[0] is |
217 | * where our xattr dir_item is and btrfs_insert_xattr_item() | ||
218 | * filled it. | ||
179 | */ | 219 | */ |
180 | if (value) { | ||
181 | btrfs_release_path(path); | ||
182 | goto again; | ||
183 | } | ||
184 | } | 220 | } |
185 | out: | 221 | out: |
186 | btrfs_free_path(path); | 222 | btrfs_free_path(path); |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index db3f772e57ae..a75fba67bb1f 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -158,17 +158,8 @@ struct ext4_allocation_request { | |||
158 | #define EXT4_MAP_MAPPED (1 << BH_Mapped) | 158 | #define EXT4_MAP_MAPPED (1 << BH_Mapped) |
159 | #define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) | 159 | #define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) |
160 | #define EXT4_MAP_BOUNDARY (1 << BH_Boundary) | 160 | #define EXT4_MAP_BOUNDARY (1 << BH_Boundary) |
161 | /* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of | ||
162 | * ext4_map_blocks wants to know whether or not the underlying cluster has | ||
163 | * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that | ||
164 | * the requested mapping was from previously mapped (or delayed allocated) | ||
165 | * cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster | ||
166 | * should never appear on buffer_head's state flags. | ||
167 | */ | ||
168 | #define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster) | ||
169 | #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ | 161 | #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ |
170 | EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ | 162 | EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY) |
171 | EXT4_MAP_FROM_CLUSTER) | ||
172 | 163 | ||
173 | struct ext4_map_blocks { | 164 | struct ext4_map_blocks { |
174 | ext4_fsblk_t m_pblk; | 165 | ext4_fsblk_t m_pblk; |
@@ -565,10 +556,8 @@ enum { | |||
565 | #define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 | 556 | #define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 |
566 | /* Do not take i_data_sem locking in ext4_map_blocks */ | 557 | /* Do not take i_data_sem locking in ext4_map_blocks */ |
567 | #define EXT4_GET_BLOCKS_NO_LOCK 0x0100 | 558 | #define EXT4_GET_BLOCKS_NO_LOCK 0x0100 |
568 | /* Do not put hole in extent cache */ | ||
569 | #define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200 | ||
570 | /* Convert written extents to unwritten */ | 559 | /* Convert written extents to unwritten */ |
571 | #define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0400 | 560 | #define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0200 |
572 | 561 | ||
573 | /* | 562 | /* |
574 | * The bit position of these flags must not overlap with any of the | 563 | * The bit position of these flags must not overlap with any of the |
@@ -889,10 +878,12 @@ struct ext4_inode_info { | |||
889 | /* extents status tree */ | 878 | /* extents status tree */ |
890 | struct ext4_es_tree i_es_tree; | 879 | struct ext4_es_tree i_es_tree; |
891 | rwlock_t i_es_lock; | 880 | rwlock_t i_es_lock; |
892 | struct list_head i_es_lru; | 881 | struct list_head i_es_list; |
893 | unsigned int i_es_all_nr; /* protected by i_es_lock */ | 882 | unsigned int i_es_all_nr; /* protected by i_es_lock */ |
894 | unsigned int i_es_lru_nr; /* protected by i_es_lock */ | 883 | unsigned int i_es_shk_nr; /* protected by i_es_lock */ |
895 | unsigned long i_touch_when; /* jiffies of last accessing */ | 884 | ext4_lblk_t i_es_shrink_lblk; /* Offset where we start searching for |
885 | extents to shrink. Protected by | ||
886 | i_es_lock */ | ||
896 | 887 | ||
897 | /* ialloc */ | 888 | /* ialloc */ |
898 | ext4_group_t i_last_alloc_group; | 889 | ext4_group_t i_last_alloc_group; |
@@ -1337,10 +1328,11 @@ struct ext4_sb_info { | |||
1337 | 1328 | ||
1338 | /* Reclaim extents from extent status tree */ | 1329 | /* Reclaim extents from extent status tree */ |
1339 | struct shrinker s_es_shrinker; | 1330 | struct shrinker s_es_shrinker; |
1340 | struct list_head s_es_lru; | 1331 | struct list_head s_es_list; /* List of inodes with reclaimable extents */ |
1332 | long s_es_nr_inode; | ||
1341 | struct ext4_es_stats s_es_stats; | 1333 | struct ext4_es_stats s_es_stats; |
1342 | struct mb_cache *s_mb_cache; | 1334 | struct mb_cache *s_mb_cache; |
1343 | spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; | 1335 | spinlock_t s_es_lock ____cacheline_aligned_in_smp; |
1344 | 1336 | ||
1345 | /* Ratelimit ext4 messages. */ | 1337 | /* Ratelimit ext4 messages. */ |
1346 | struct ratelimit_state s_err_ratelimit_state; | 1338 | struct ratelimit_state s_err_ratelimit_state; |
@@ -2196,7 +2188,6 @@ extern int ext4_calculate_overhead(struct super_block *sb); | |||
2196 | extern void ext4_superblock_csum_set(struct super_block *sb); | 2188 | extern void ext4_superblock_csum_set(struct super_block *sb); |
2197 | extern void *ext4_kvmalloc(size_t size, gfp_t flags); | 2189 | extern void *ext4_kvmalloc(size_t size, gfp_t flags); |
2198 | extern void *ext4_kvzalloc(size_t size, gfp_t flags); | 2190 | extern void *ext4_kvzalloc(size_t size, gfp_t flags); |
2199 | extern void ext4_kvfree(void *ptr); | ||
2200 | extern int ext4_alloc_flex_bg_array(struct super_block *sb, | 2191 | extern int ext4_alloc_flex_bg_array(struct super_block *sb, |
2201 | ext4_group_t ngroup); | 2192 | ext4_group_t ngroup); |
2202 | extern const char *ext4_decode_error(struct super_block *sb, int errno, | 2193 | extern const char *ext4_decode_error(struct super_block *sb, int errno, |
@@ -2647,7 +2638,7 @@ extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode, | |||
2647 | int *retval); | 2638 | int *retval); |
2648 | extern int ext4_inline_data_fiemap(struct inode *inode, | 2639 | extern int ext4_inline_data_fiemap(struct inode *inode, |
2649 | struct fiemap_extent_info *fieinfo, | 2640 | struct fiemap_extent_info *fieinfo, |
2650 | int *has_inline); | 2641 | int *has_inline, __u64 start, __u64 len); |
2651 | extern int ext4_try_to_evict_inline_data(handle_t *handle, | 2642 | extern int ext4_try_to_evict_inline_data(handle_t *handle, |
2652 | struct inode *inode, | 2643 | struct inode *inode, |
2653 | int needed); | 2644 | int needed); |
@@ -2795,16 +2786,6 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io, | |||
2795 | extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); | 2786 | extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); |
2796 | 2787 | ||
2797 | /* | 2788 | /* |
2798 | * Note that these flags will never ever appear in a buffer_head's state flag. | ||
2799 | * See EXT4_MAP_... to see where this is used. | ||
2800 | */ | ||
2801 | enum ext4_state_bits { | ||
2802 | BH_AllocFromCluster /* allocated blocks were part of already | ||
2803 | * allocated cluster. */ | ||
2804 | = BH_JBDPrivateStart | ||
2805 | }; | ||
2806 | |||
2807 | /* | ||
2808 | * Add new method to test whether block and inode bitmaps are properly | 2789 | * Add new method to test whether block and inode bitmaps are properly |
2809 | * initialized. With uninit_bg reading the block from disk is not enough | 2790 | * initialized. With uninit_bg reading the block from disk is not enough |
2810 | * to mark the bitmap uptodate. We need to also zero-out the bitmap | 2791 | * to mark the bitmap uptodate. We need to also zero-out the bitmap |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0b16fb4c06d3..e5d3eadf47b1 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -2306,16 +2306,16 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, | |||
2306 | ext4_lblk_t block) | 2306 | ext4_lblk_t block) |
2307 | { | 2307 | { |
2308 | int depth = ext_depth(inode); | 2308 | int depth = ext_depth(inode); |
2309 | unsigned long len = 0; | 2309 | ext4_lblk_t len; |
2310 | ext4_lblk_t lblock = 0; | 2310 | ext4_lblk_t lblock; |
2311 | struct ext4_extent *ex; | 2311 | struct ext4_extent *ex; |
2312 | struct extent_status es; | ||
2312 | 2313 | ||
2313 | ex = path[depth].p_ext; | 2314 | ex = path[depth].p_ext; |
2314 | if (ex == NULL) { | 2315 | if (ex == NULL) { |
2315 | /* | 2316 | /* there is no extent yet, so gap is [0;-] */ |
2316 | * there is no extent yet, so gap is [0;-] and we | 2317 | lblock = 0; |
2317 | * don't cache it | 2318 | len = EXT_MAX_BLOCKS; |
2318 | */ | ||
2319 | ext_debug("cache gap(whole file):"); | 2319 | ext_debug("cache gap(whole file):"); |
2320 | } else if (block < le32_to_cpu(ex->ee_block)) { | 2320 | } else if (block < le32_to_cpu(ex->ee_block)) { |
2321 | lblock = block; | 2321 | lblock = block; |
@@ -2324,9 +2324,6 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, | |||
2324 | block, | 2324 | block, |
2325 | le32_to_cpu(ex->ee_block), | 2325 | le32_to_cpu(ex->ee_block), |
2326 | ext4_ext_get_actual_len(ex)); | 2326 | ext4_ext_get_actual_len(ex)); |
2327 | if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1)) | ||
2328 | ext4_es_insert_extent(inode, lblock, len, ~0, | ||
2329 | EXTENT_STATUS_HOLE); | ||
2330 | } else if (block >= le32_to_cpu(ex->ee_block) | 2327 | } else if (block >= le32_to_cpu(ex->ee_block) |
2331 | + ext4_ext_get_actual_len(ex)) { | 2328 | + ext4_ext_get_actual_len(ex)) { |
2332 | ext4_lblk_t next; | 2329 | ext4_lblk_t next; |
@@ -2340,14 +2337,19 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, | |||
2340 | block); | 2337 | block); |
2341 | BUG_ON(next == lblock); | 2338 | BUG_ON(next == lblock); |
2342 | len = next - lblock; | 2339 | len = next - lblock; |
2343 | if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1)) | ||
2344 | ext4_es_insert_extent(inode, lblock, len, ~0, | ||
2345 | EXTENT_STATUS_HOLE); | ||
2346 | } else { | 2340 | } else { |
2347 | BUG(); | 2341 | BUG(); |
2348 | } | 2342 | } |
2349 | 2343 | ||
2350 | ext_debug(" -> %u:%lu\n", lblock, len); | 2344 | ext4_es_find_delayed_extent_range(inode, lblock, lblock + len - 1, &es); |
2345 | if (es.es_len) { | ||
2346 | /* There's delayed extent containing lblock? */ | ||
2347 | if (es.es_lblk <= lblock) | ||
2348 | return; | ||
2349 | len = min(es.es_lblk - lblock, len); | ||
2350 | } | ||
2351 | ext_debug(" -> %u:%u\n", lblock, len); | ||
2352 | ext4_es_insert_extent(inode, lblock, len, ~0, EXTENT_STATUS_HOLE); | ||
2351 | } | 2353 | } |
2352 | 2354 | ||
2353 | /* | 2355 | /* |
@@ -2481,7 +2483,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2481 | ext4_lblk_t from, ext4_lblk_t to) | 2483 | ext4_lblk_t from, ext4_lblk_t to) |
2482 | { | 2484 | { |
2483 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 2485 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
2484 | unsigned short ee_len = ext4_ext_get_actual_len(ex); | 2486 | unsigned short ee_len = ext4_ext_get_actual_len(ex); |
2485 | ext4_fsblk_t pblk; | 2487 | ext4_fsblk_t pblk; |
2486 | int flags = get_default_free_blocks_flags(inode); | 2488 | int flags = get_default_free_blocks_flags(inode); |
2487 | 2489 | ||
@@ -2490,7 +2492,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2490 | * at the beginning of the extent. Instead, we make a note | 2492 | * at the beginning of the extent. Instead, we make a note |
2491 | * that we tried freeing the cluster, and check to see if we | 2493 | * that we tried freeing the cluster, and check to see if we |
2492 | * need to free it on a subsequent call to ext4_remove_blocks, | 2494 | * need to free it on a subsequent call to ext4_remove_blocks, |
2493 | * or at the end of the ext4_truncate() operation. | 2495 | * or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space. |
2494 | */ | 2496 | */ |
2495 | flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER; | 2497 | flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER; |
2496 | 2498 | ||
@@ -2501,8 +2503,8 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2501 | * partial cluster here. | 2503 | * partial cluster here. |
2502 | */ | 2504 | */ |
2503 | pblk = ext4_ext_pblock(ex) + ee_len - 1; | 2505 | pblk = ext4_ext_pblock(ex) + ee_len - 1; |
2504 | if ((*partial_cluster > 0) && | 2506 | if (*partial_cluster > 0 && |
2505 | (EXT4_B2C(sbi, pblk) != *partial_cluster)) { | 2507 | *partial_cluster != (long long) EXT4_B2C(sbi, pblk)) { |
2506 | ext4_free_blocks(handle, inode, NULL, | 2508 | ext4_free_blocks(handle, inode, NULL, |
2507 | EXT4_C2B(sbi, *partial_cluster), | 2509 | EXT4_C2B(sbi, *partial_cluster), |
2508 | sbi->s_cluster_ratio, flags); | 2510 | sbi->s_cluster_ratio, flags); |
@@ -2528,7 +2530,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2528 | && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { | 2530 | && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { |
2529 | /* tail removal */ | 2531 | /* tail removal */ |
2530 | ext4_lblk_t num; | 2532 | ext4_lblk_t num; |
2531 | unsigned int unaligned; | 2533 | long long first_cluster; |
2532 | 2534 | ||
2533 | num = le32_to_cpu(ex->ee_block) + ee_len - from; | 2535 | num = le32_to_cpu(ex->ee_block) + ee_len - from; |
2534 | pblk = ext4_ext_pblock(ex) + ee_len - num; | 2536 | pblk = ext4_ext_pblock(ex) + ee_len - num; |
@@ -2538,7 +2540,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2538 | * used by any other extent (partial_cluster is negative). | 2540 | * used by any other extent (partial_cluster is negative). |
2539 | */ | 2541 | */ |
2540 | if (*partial_cluster < 0 && | 2542 | if (*partial_cluster < 0 && |
2541 | -(*partial_cluster) == EXT4_B2C(sbi, pblk + num - 1)) | 2543 | *partial_cluster == -(long long) EXT4_B2C(sbi, pblk+num-1)) |
2542 | flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER; | 2544 | flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER; |
2543 | 2545 | ||
2544 | ext_debug("free last %u blocks starting %llu partial %lld\n", | 2546 | ext_debug("free last %u blocks starting %llu partial %lld\n", |
@@ -2549,21 +2551,24 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2549 | * beginning of a cluster, and we removed the entire | 2551 | * beginning of a cluster, and we removed the entire |
2550 | * extent and the cluster is not used by any other extent, | 2552 | * extent and the cluster is not used by any other extent, |
2551 | * save the partial cluster here, since we might need to | 2553 | * save the partial cluster here, since we might need to |
2552 | * delete if we determine that the truncate operation has | 2554 | * delete if we determine that the truncate or punch hole |
2553 | * removed all of the blocks in the cluster. | 2555 | * operation has removed all of the blocks in the cluster. |
2556 | * If that cluster is used by another extent, preserve its | ||
2557 | * negative value so it isn't freed later on. | ||
2554 | * | 2558 | * |
2555 | * On the other hand, if we did not manage to free the whole | 2559 | * If the whole extent wasn't freed, we've reached the |
2556 | * extent, we have to mark the cluster as used (store negative | 2560 | * start of the truncated/punched region and have finished |
2557 | * cluster number in partial_cluster). | 2561 | * removing blocks. If there's a partial cluster here it's |
2562 | * shared with the remainder of the extent and is no longer | ||
2563 | * a candidate for removal. | ||
2558 | */ | 2564 | */ |
2559 | unaligned = EXT4_PBLK_COFF(sbi, pblk); | 2565 | if (EXT4_PBLK_COFF(sbi, pblk) && ee_len == num) { |
2560 | if (unaligned && (ee_len == num) && | 2566 | first_cluster = (long long) EXT4_B2C(sbi, pblk); |
2561 | (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk)))) | 2567 | if (first_cluster != -*partial_cluster) |
2562 | *partial_cluster = EXT4_B2C(sbi, pblk); | 2568 | *partial_cluster = first_cluster; |
2563 | else if (unaligned) | 2569 | } else { |
2564 | *partial_cluster = -((long long)EXT4_B2C(sbi, pblk)); | ||
2565 | else if (*partial_cluster > 0) | ||
2566 | *partial_cluster = 0; | 2570 | *partial_cluster = 0; |
2571 | } | ||
2567 | } else | 2572 | } else |
2568 | ext4_error(sbi->s_sb, "strange request: removal(2) " | 2573 | ext4_error(sbi->s_sb, "strange request: removal(2) " |
2569 | "%u-%u from %u:%u\n", | 2574 | "%u-%u from %u:%u\n", |
@@ -2574,15 +2579,16 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2574 | 2579 | ||
2575 | /* | 2580 | /* |
2576 | * ext4_ext_rm_leaf() Removes the extents associated with the | 2581 | * ext4_ext_rm_leaf() Removes the extents associated with the |
2577 | * blocks appearing between "start" and "end", and splits the extents | 2582 | * blocks appearing between "start" and "end". Both "start" |
2578 | * if "start" and "end" appear in the same extent | 2583 | * and "end" must appear in the same extent or EIO is returned. |
2579 | * | 2584 | * |
2580 | * @handle: The journal handle | 2585 | * @handle: The journal handle |
2581 | * @inode: The files inode | 2586 | * @inode: The files inode |
2582 | * @path: The path to the leaf | 2587 | * @path: The path to the leaf |
2583 | * @partial_cluster: The cluster which we'll have to free if all extents | 2588 | * @partial_cluster: The cluster which we'll have to free if all extents |
2584 | * has been released from it. It gets negative in case | 2589 | * has been released from it. However, if this value is |
2585 | * that the cluster is still used. | 2590 | * negative, it's a cluster just to the right of the |
2591 | * punched region and it must not be freed. | ||
2586 | * @start: The first block to remove | 2592 | * @start: The first block to remove |
2587 | * @end: The last block to remove | 2593 | * @end: The last block to remove |
2588 | */ | 2594 | */ |
@@ -2621,27 +2627,6 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2621 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2627 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2622 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2628 | ex_ee_len = ext4_ext_get_actual_len(ex); |
2623 | 2629 | ||
2624 | /* | ||
2625 | * If we're starting with an extent other than the last one in the | ||
2626 | * node, we need to see if it shares a cluster with the extent to | ||
2627 | * the right (towards the end of the file). If its leftmost cluster | ||
2628 | * is this extent's rightmost cluster and it is not cluster aligned, | ||
2629 | * we'll mark it as a partial that is not to be deallocated. | ||
2630 | */ | ||
2631 | |||
2632 | if (ex != EXT_LAST_EXTENT(eh)) { | ||
2633 | ext4_fsblk_t current_pblk, right_pblk; | ||
2634 | long long current_cluster, right_cluster; | ||
2635 | |||
2636 | current_pblk = ext4_ext_pblock(ex) + ex_ee_len - 1; | ||
2637 | current_cluster = (long long)EXT4_B2C(sbi, current_pblk); | ||
2638 | right_pblk = ext4_ext_pblock(ex + 1); | ||
2639 | right_cluster = (long long)EXT4_B2C(sbi, right_pblk); | ||
2640 | if (current_cluster == right_cluster && | ||
2641 | EXT4_PBLK_COFF(sbi, right_pblk)) | ||
2642 | *partial_cluster = -right_cluster; | ||
2643 | } | ||
2644 | |||
2645 | trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster); | 2630 | trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster); |
2646 | 2631 | ||
2647 | while (ex >= EXT_FIRST_EXTENT(eh) && | 2632 | while (ex >= EXT_FIRST_EXTENT(eh) && |
@@ -2666,14 +2651,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2666 | if (end < ex_ee_block) { | 2651 | if (end < ex_ee_block) { |
2667 | /* | 2652 | /* |
2668 | * We're going to skip this extent and move to another, | 2653 | * We're going to skip this extent and move to another, |
2669 | * so if this extent is not cluster aligned we have | 2654 | * so note that its first cluster is in use to avoid |
2670 | * to mark the current cluster as used to avoid | 2655 | * freeing it when removing blocks. Eventually, the |
2671 | * accidentally freeing it later on | 2656 | * right edge of the truncated/punched region will |
2657 | * be just to the left. | ||
2672 | */ | 2658 | */ |
2673 | pblk = ext4_ext_pblock(ex); | 2659 | if (sbi->s_cluster_ratio > 1) { |
2674 | if (EXT4_PBLK_COFF(sbi, pblk)) | 2660 | pblk = ext4_ext_pblock(ex); |
2675 | *partial_cluster = | 2661 | *partial_cluster = |
2676 | -((long long)EXT4_B2C(sbi, pblk)); | 2662 | -(long long) EXT4_B2C(sbi, pblk); |
2663 | } | ||
2677 | ex--; | 2664 | ex--; |
2678 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2665 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2679 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2666 | ex_ee_len = ext4_ext_get_actual_len(ex); |
@@ -2749,8 +2736,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2749 | sizeof(struct ext4_extent)); | 2736 | sizeof(struct ext4_extent)); |
2750 | } | 2737 | } |
2751 | le16_add_cpu(&eh->eh_entries, -1); | 2738 | le16_add_cpu(&eh->eh_entries, -1); |
2752 | } else if (*partial_cluster > 0) | 2739 | } |
2753 | *partial_cluster = 0; | ||
2754 | 2740 | ||
2755 | err = ext4_ext_dirty(handle, inode, path + depth); | 2741 | err = ext4_ext_dirty(handle, inode, path + depth); |
2756 | if (err) | 2742 | if (err) |
@@ -2769,20 +2755,18 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2769 | /* | 2755 | /* |
2770 | * If there's a partial cluster and at least one extent remains in | 2756 | * If there's a partial cluster and at least one extent remains in |
2771 | * the leaf, free the partial cluster if it isn't shared with the | 2757 | * the leaf, free the partial cluster if it isn't shared with the |
2772 | * current extent. If there's a partial cluster and no extents | 2758 | * current extent. If it is shared with the current extent |
2773 | * remain in the leaf, it can't be freed here. It can only be | 2759 | * we zero partial_cluster because we've reached the start of the |
2774 | * freed when it's possible to determine if it's not shared with | 2760 | * truncated/punched region and we're done removing blocks. |
2775 | * any other extent - when the next leaf is processed or when space | ||
2776 | * removal is complete. | ||
2777 | */ | 2761 | */ |
2778 | if (*partial_cluster > 0 && eh->eh_entries && | 2762 | if (*partial_cluster > 0 && ex >= EXT_FIRST_EXTENT(eh)) { |
2779 | (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != | 2763 | pblk = ext4_ext_pblock(ex) + ex_ee_len - 1; |
2780 | *partial_cluster)) { | 2764 | if (*partial_cluster != (long long) EXT4_B2C(sbi, pblk)) { |
2781 | int flags = get_default_free_blocks_flags(inode); | 2765 | ext4_free_blocks(handle, inode, NULL, |
2782 | 2766 | EXT4_C2B(sbi, *partial_cluster), | |
2783 | ext4_free_blocks(handle, inode, NULL, | 2767 | sbi->s_cluster_ratio, |
2784 | EXT4_C2B(sbi, *partial_cluster), | 2768 | get_default_free_blocks_flags(inode)); |
2785 | sbi->s_cluster_ratio, flags); | 2769 | } |
2786 | *partial_cluster = 0; | 2770 | *partial_cluster = 0; |
2787 | } | 2771 | } |
2788 | 2772 | ||
@@ -2819,7 +2803,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path) | |||
2819 | int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, | 2803 | int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, |
2820 | ext4_lblk_t end) | 2804 | ext4_lblk_t end) |
2821 | { | 2805 | { |
2822 | struct super_block *sb = inode->i_sb; | 2806 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
2823 | int depth = ext_depth(inode); | 2807 | int depth = ext_depth(inode); |
2824 | struct ext4_ext_path *path = NULL; | 2808 | struct ext4_ext_path *path = NULL; |
2825 | long long partial_cluster = 0; | 2809 | long long partial_cluster = 0; |
@@ -2845,9 +2829,10 @@ again: | |||
2845 | */ | 2829 | */ |
2846 | if (end < EXT_MAX_BLOCKS - 1) { | 2830 | if (end < EXT_MAX_BLOCKS - 1) { |
2847 | struct ext4_extent *ex; | 2831 | struct ext4_extent *ex; |
2848 | ext4_lblk_t ee_block; | 2832 | ext4_lblk_t ee_block, ex_end, lblk; |
2833 | ext4_fsblk_t pblk; | ||
2849 | 2834 | ||
2850 | /* find extent for this block */ | 2835 | /* find extent for or closest extent to this block */ |
2851 | path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE); | 2836 | path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE); |
2852 | if (IS_ERR(path)) { | 2837 | if (IS_ERR(path)) { |
2853 | ext4_journal_stop(handle); | 2838 | ext4_journal_stop(handle); |
@@ -2867,6 +2852,7 @@ again: | |||
2867 | } | 2852 | } |
2868 | 2853 | ||
2869 | ee_block = le32_to_cpu(ex->ee_block); | 2854 | ee_block = le32_to_cpu(ex->ee_block); |
2855 | ex_end = ee_block + ext4_ext_get_actual_len(ex) - 1; | ||
2870 | 2856 | ||
2871 | /* | 2857 | /* |
2872 | * See if the last block is inside the extent, if so split | 2858 | * See if the last block is inside the extent, if so split |
@@ -2874,8 +2860,19 @@ again: | |||
2874 | * tail of the first part of the split extent in | 2860 | * tail of the first part of the split extent in |
2875 | * ext4_ext_rm_leaf(). | 2861 | * ext4_ext_rm_leaf(). |
2876 | */ | 2862 | */ |
2877 | if (end >= ee_block && | 2863 | if (end >= ee_block && end < ex_end) { |
2878 | end < ee_block + ext4_ext_get_actual_len(ex) - 1) { | 2864 | |
2865 | /* | ||
2866 | * If we're going to split the extent, note that | ||
2867 | * the cluster containing the block after 'end' is | ||
2868 | * in use to avoid freeing it when removing blocks. | ||
2869 | */ | ||
2870 | if (sbi->s_cluster_ratio > 1) { | ||
2871 | pblk = ext4_ext_pblock(ex) + end - ee_block + 2; | ||
2872 | partial_cluster = | ||
2873 | -(long long) EXT4_B2C(sbi, pblk); | ||
2874 | } | ||
2875 | |||
2879 | /* | 2876 | /* |
2880 | * Split the extent in two so that 'end' is the last | 2877 | * Split the extent in two so that 'end' is the last |
2881 | * block in the first new extent. Also we should not | 2878 | * block in the first new extent. Also we should not |
@@ -2886,6 +2883,24 @@ again: | |||
2886 | end + 1, 1); | 2883 | end + 1, 1); |
2887 | if (err < 0) | 2884 | if (err < 0) |
2888 | goto out; | 2885 | goto out; |
2886 | |||
2887 | } else if (sbi->s_cluster_ratio > 1 && end >= ex_end) { | ||
2888 | /* | ||
2889 | * If there's an extent to the right its first cluster | ||
2890 | * contains the immediate right boundary of the | ||
2891 | * truncated/punched region. Set partial_cluster to | ||
2892 | * its negative value so it won't be freed if shared | ||
2893 | * with the current extent. The end < ee_block case | ||
2894 | * is handled in ext4_ext_rm_leaf(). | ||
2895 | */ | ||
2896 | lblk = ex_end + 1; | ||
2897 | err = ext4_ext_search_right(inode, path, &lblk, &pblk, | ||
2898 | &ex); | ||
2899 | if (err) | ||
2900 | goto out; | ||
2901 | if (pblk) | ||
2902 | partial_cluster = | ||
2903 | -(long long) EXT4_B2C(sbi, pblk); | ||
2889 | } | 2904 | } |
2890 | } | 2905 | } |
2891 | /* | 2906 | /* |
@@ -2996,16 +3011,18 @@ again: | |||
2996 | trace_ext4_ext_remove_space_done(inode, start, end, depth, | 3011 | trace_ext4_ext_remove_space_done(inode, start, end, depth, |
2997 | partial_cluster, path->p_hdr->eh_entries); | 3012 | partial_cluster, path->p_hdr->eh_entries); |
2998 | 3013 | ||
2999 | /* If we still have something in the partial cluster and we have removed | 3014 | /* |
3015 | * If we still have something in the partial cluster and we have removed | ||
3000 | * even the first extent, then we should free the blocks in the partial | 3016 | * even the first extent, then we should free the blocks in the partial |
3001 | * cluster as well. */ | 3017 | * cluster as well. (This code will only run when there are no leaves |
3002 | if (partial_cluster > 0 && path->p_hdr->eh_entries == 0) { | 3018 | * to the immediate left of the truncated/punched region.) |
3003 | int flags = get_default_free_blocks_flags(inode); | 3019 | */ |
3004 | 3020 | if (partial_cluster > 0 && err == 0) { | |
3021 | /* don't zero partial_cluster since it's not used afterwards */ | ||
3005 | ext4_free_blocks(handle, inode, NULL, | 3022 | ext4_free_blocks(handle, inode, NULL, |
3006 | EXT4_C2B(EXT4_SB(sb), partial_cluster), | 3023 | EXT4_C2B(sbi, partial_cluster), |
3007 | EXT4_SB(sb)->s_cluster_ratio, flags); | 3024 | sbi->s_cluster_ratio, |
3008 | partial_cluster = 0; | 3025 | get_default_free_blocks_flags(inode)); |
3009 | } | 3026 | } |
3010 | 3027 | ||
3011 | /* TODO: flexible tree reduction should be here */ | 3028 | /* TODO: flexible tree reduction should be here */ |
@@ -4267,6 +4284,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4267 | ext4_io_end_t *io = ext4_inode_aio(inode); | 4284 | ext4_io_end_t *io = ext4_inode_aio(inode); |
4268 | ext4_lblk_t cluster_offset; | 4285 | ext4_lblk_t cluster_offset; |
4269 | int set_unwritten = 0; | 4286 | int set_unwritten = 0; |
4287 | bool map_from_cluster = false; | ||
4270 | 4288 | ||
4271 | ext_debug("blocks %u/%u requested for inode %lu\n", | 4289 | ext_debug("blocks %u/%u requested for inode %lu\n", |
4272 | map->m_lblk, map->m_len, inode->i_ino); | 4290 | map->m_lblk, map->m_len, inode->i_ino); |
@@ -4343,10 +4361,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4343 | } | 4361 | } |
4344 | } | 4362 | } |
4345 | 4363 | ||
4346 | if ((sbi->s_cluster_ratio > 1) && | ||
4347 | ext4_find_delalloc_cluster(inode, map->m_lblk)) | ||
4348 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | ||
4349 | |||
4350 | /* | 4364 | /* |
4351 | * requested block isn't allocated yet; | 4365 | * requested block isn't allocated yet; |
4352 | * we couldn't try to create block if create flag is zero | 4366 | * we couldn't try to create block if create flag is zero |
@@ -4356,15 +4370,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4356 | * put just found gap into cache to speed up | 4370 | * put just found gap into cache to speed up |
4357 | * subsequent requests | 4371 | * subsequent requests |
4358 | */ | 4372 | */ |
4359 | if ((flags & EXT4_GET_BLOCKS_NO_PUT_HOLE) == 0) | 4373 | ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); |
4360 | ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); | ||
4361 | goto out2; | 4374 | goto out2; |
4362 | } | 4375 | } |
4363 | 4376 | ||
4364 | /* | 4377 | /* |
4365 | * Okay, we need to do block allocation. | 4378 | * Okay, we need to do block allocation. |
4366 | */ | 4379 | */ |
4367 | map->m_flags &= ~EXT4_MAP_FROM_CLUSTER; | ||
4368 | newex.ee_block = cpu_to_le32(map->m_lblk); | 4380 | newex.ee_block = cpu_to_le32(map->m_lblk); |
4369 | cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk); | 4381 | cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk); |
4370 | 4382 | ||
@@ -4376,7 +4388,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4376 | get_implied_cluster_alloc(inode->i_sb, map, ex, path)) { | 4388 | get_implied_cluster_alloc(inode->i_sb, map, ex, path)) { |
4377 | ar.len = allocated = map->m_len; | 4389 | ar.len = allocated = map->m_len; |
4378 | newblock = map->m_pblk; | 4390 | newblock = map->m_pblk; |
4379 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | 4391 | map_from_cluster = true; |
4380 | goto got_allocated_blocks; | 4392 | goto got_allocated_blocks; |
4381 | } | 4393 | } |
4382 | 4394 | ||
@@ -4397,7 +4409,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4397 | get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) { | 4409 | get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) { |
4398 | ar.len = allocated = map->m_len; | 4410 | ar.len = allocated = map->m_len; |
4399 | newblock = map->m_pblk; | 4411 | newblock = map->m_pblk; |
4400 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | 4412 | map_from_cluster = true; |
4401 | goto got_allocated_blocks; | 4413 | goto got_allocated_blocks; |
4402 | } | 4414 | } |
4403 | 4415 | ||
@@ -4523,7 +4535,7 @@ got_allocated_blocks: | |||
4523 | */ | 4535 | */ |
4524 | reserved_clusters = get_reserved_cluster_alloc(inode, | 4536 | reserved_clusters = get_reserved_cluster_alloc(inode, |
4525 | map->m_lblk, allocated); | 4537 | map->m_lblk, allocated); |
4526 | if (map->m_flags & EXT4_MAP_FROM_CLUSTER) { | 4538 | if (map_from_cluster) { |
4527 | if (reserved_clusters) { | 4539 | if (reserved_clusters) { |
4528 | /* | 4540 | /* |
4529 | * We have clusters reserved for this range. | 4541 | * We have clusters reserved for this range. |
@@ -4620,7 +4632,6 @@ out2: | |||
4620 | 4632 | ||
4621 | trace_ext4_ext_map_blocks_exit(inode, flags, map, | 4633 | trace_ext4_ext_map_blocks_exit(inode, flags, map, |
4622 | err ? err : allocated); | 4634 | err ? err : allocated); |
4623 | ext4_es_lru_add(inode); | ||
4624 | return err ? err : allocated; | 4635 | return err ? err : allocated; |
4625 | } | 4636 | } |
4626 | 4637 | ||
@@ -5140,7 +5151,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
5140 | if (ext4_has_inline_data(inode)) { | 5151 | if (ext4_has_inline_data(inode)) { |
5141 | int has_inline = 1; | 5152 | int has_inline = 1; |
5142 | 5153 | ||
5143 | error = ext4_inline_data_fiemap(inode, fieinfo, &has_inline); | 5154 | error = ext4_inline_data_fiemap(inode, fieinfo, &has_inline, |
5155 | start, len); | ||
5144 | 5156 | ||
5145 | if (has_inline) | 5157 | if (has_inline) |
5146 | return error; | 5158 | return error; |
@@ -5154,8 +5166,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
5154 | 5166 | ||
5155 | /* fallback to generic here if not in extents fmt */ | 5167 | /* fallback to generic here if not in extents fmt */ |
5156 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 5168 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
5157 | return generic_block_fiemap(inode, fieinfo, start, len, | 5169 | return __generic_block_fiemap(inode, fieinfo, start, len, |
5158 | ext4_get_block); | 5170 | ext4_get_block); |
5159 | 5171 | ||
5160 | if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS)) | 5172 | if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS)) |
5161 | return -EBADR; | 5173 | return -EBADR; |
@@ -5179,7 +5191,6 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
5179 | error = ext4_fill_fiemap_extents(inode, start_blk, | 5191 | error = ext4_fill_fiemap_extents(inode, start_blk, |
5180 | len_blks, fieinfo); | 5192 | len_blks, fieinfo); |
5181 | } | 5193 | } |
5182 | ext4_es_lru_add(inode); | ||
5183 | return error; | 5194 | return error; |
5184 | } | 5195 | } |
5185 | 5196 | ||
@@ -5239,8 +5250,6 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, | |||
5239 | return -EIO; | 5250 | return -EIO; |
5240 | 5251 | ||
5241 | ex_last = EXT_LAST_EXTENT(path[depth].p_hdr); | 5252 | ex_last = EXT_LAST_EXTENT(path[depth].p_hdr); |
5242 | if (!ex_last) | ||
5243 | return -EIO; | ||
5244 | 5253 | ||
5245 | err = ext4_access_path(handle, inode, path + depth); | 5254 | err = ext4_access_path(handle, inode, path + depth); |
5246 | if (err) | 5255 | if (err) |
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 94e7855ae71b..e04d45733976 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
@@ -147,10 +147,9 @@ static struct kmem_cache *ext4_es_cachep; | |||
147 | static int __es_insert_extent(struct inode *inode, struct extent_status *newes); | 147 | static int __es_insert_extent(struct inode *inode, struct extent_status *newes); |
148 | static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | 148 | static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, |
149 | ext4_lblk_t end); | 149 | ext4_lblk_t end); |
150 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, | 150 | static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan); |
151 | int nr_to_scan); | 151 | static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, |
152 | static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | 152 | struct ext4_inode_info *locked_ei); |
153 | struct ext4_inode_info *locked_ei); | ||
154 | 153 | ||
155 | int __init ext4_init_es(void) | 154 | int __init ext4_init_es(void) |
156 | { | 155 | { |
@@ -298,6 +297,36 @@ out: | |||
298 | trace_ext4_es_find_delayed_extent_range_exit(inode, es); | 297 | trace_ext4_es_find_delayed_extent_range_exit(inode, es); |
299 | } | 298 | } |
300 | 299 | ||
300 | static void ext4_es_list_add(struct inode *inode) | ||
301 | { | ||
302 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
303 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
304 | |||
305 | if (!list_empty(&ei->i_es_list)) | ||
306 | return; | ||
307 | |||
308 | spin_lock(&sbi->s_es_lock); | ||
309 | if (list_empty(&ei->i_es_list)) { | ||
310 | list_add_tail(&ei->i_es_list, &sbi->s_es_list); | ||
311 | sbi->s_es_nr_inode++; | ||
312 | } | ||
313 | spin_unlock(&sbi->s_es_lock); | ||
314 | } | ||
315 | |||
316 | static void ext4_es_list_del(struct inode *inode) | ||
317 | { | ||
318 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
319 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
320 | |||
321 | spin_lock(&sbi->s_es_lock); | ||
322 | if (!list_empty(&ei->i_es_list)) { | ||
323 | list_del_init(&ei->i_es_list); | ||
324 | sbi->s_es_nr_inode--; | ||
325 | WARN_ON_ONCE(sbi->s_es_nr_inode < 0); | ||
326 | } | ||
327 | spin_unlock(&sbi->s_es_lock); | ||
328 | } | ||
329 | |||
301 | static struct extent_status * | 330 | static struct extent_status * |
302 | ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, | 331 | ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, |
303 | ext4_fsblk_t pblk) | 332 | ext4_fsblk_t pblk) |
@@ -314,9 +343,10 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, | |||
314 | * We don't count delayed extent because we never try to reclaim them | 343 | * We don't count delayed extent because we never try to reclaim them |
315 | */ | 344 | */ |
316 | if (!ext4_es_is_delayed(es)) { | 345 | if (!ext4_es_is_delayed(es)) { |
317 | EXT4_I(inode)->i_es_lru_nr++; | 346 | if (!EXT4_I(inode)->i_es_shk_nr++) |
347 | ext4_es_list_add(inode); | ||
318 | percpu_counter_inc(&EXT4_SB(inode->i_sb)-> | 348 | percpu_counter_inc(&EXT4_SB(inode->i_sb)-> |
319 | s_es_stats.es_stats_lru_cnt); | 349 | s_es_stats.es_stats_shk_cnt); |
320 | } | 350 | } |
321 | 351 | ||
322 | EXT4_I(inode)->i_es_all_nr++; | 352 | EXT4_I(inode)->i_es_all_nr++; |
@@ -330,12 +360,13 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) | |||
330 | EXT4_I(inode)->i_es_all_nr--; | 360 | EXT4_I(inode)->i_es_all_nr--; |
331 | percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); | 361 | percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); |
332 | 362 | ||
333 | /* Decrease the lru counter when this es is not delayed */ | 363 | /* Decrease the shrink counter when this es is not delayed */ |
334 | if (!ext4_es_is_delayed(es)) { | 364 | if (!ext4_es_is_delayed(es)) { |
335 | BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0); | 365 | BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0); |
336 | EXT4_I(inode)->i_es_lru_nr--; | 366 | if (!--EXT4_I(inode)->i_es_shk_nr) |
367 | ext4_es_list_del(inode); | ||
337 | percpu_counter_dec(&EXT4_SB(inode->i_sb)-> | 368 | percpu_counter_dec(&EXT4_SB(inode->i_sb)-> |
338 | s_es_stats.es_stats_lru_cnt); | 369 | s_es_stats.es_stats_shk_cnt); |
339 | } | 370 | } |
340 | 371 | ||
341 | kmem_cache_free(ext4_es_cachep, es); | 372 | kmem_cache_free(ext4_es_cachep, es); |
@@ -351,7 +382,7 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) | |||
351 | static int ext4_es_can_be_merged(struct extent_status *es1, | 382 | static int ext4_es_can_be_merged(struct extent_status *es1, |
352 | struct extent_status *es2) | 383 | struct extent_status *es2) |
353 | { | 384 | { |
354 | if (ext4_es_status(es1) != ext4_es_status(es2)) | 385 | if (ext4_es_type(es1) != ext4_es_type(es2)) |
355 | return 0; | 386 | return 0; |
356 | 387 | ||
357 | if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) { | 388 | if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) { |
@@ -394,6 +425,8 @@ ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es) | |||
394 | es1 = rb_entry(node, struct extent_status, rb_node); | 425 | es1 = rb_entry(node, struct extent_status, rb_node); |
395 | if (ext4_es_can_be_merged(es1, es)) { | 426 | if (ext4_es_can_be_merged(es1, es)) { |
396 | es1->es_len += es->es_len; | 427 | es1->es_len += es->es_len; |
428 | if (ext4_es_is_referenced(es)) | ||
429 | ext4_es_set_referenced(es1); | ||
397 | rb_erase(&es->rb_node, &tree->root); | 430 | rb_erase(&es->rb_node, &tree->root); |
398 | ext4_es_free_extent(inode, es); | 431 | ext4_es_free_extent(inode, es); |
399 | es = es1; | 432 | es = es1; |
@@ -416,6 +449,8 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es) | |||
416 | es1 = rb_entry(node, struct extent_status, rb_node); | 449 | es1 = rb_entry(node, struct extent_status, rb_node); |
417 | if (ext4_es_can_be_merged(es, es1)) { | 450 | if (ext4_es_can_be_merged(es, es1)) { |
418 | es->es_len += es1->es_len; | 451 | es->es_len += es1->es_len; |
452 | if (ext4_es_is_referenced(es1)) | ||
453 | ext4_es_set_referenced(es); | ||
419 | rb_erase(node, &tree->root); | 454 | rb_erase(node, &tree->root); |
420 | ext4_es_free_extent(inode, es1); | 455 | ext4_es_free_extent(inode, es1); |
421 | } | 456 | } |
@@ -683,8 +718,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, | |||
683 | goto error; | 718 | goto error; |
684 | retry: | 719 | retry: |
685 | err = __es_insert_extent(inode, &newes); | 720 | err = __es_insert_extent(inode, &newes); |
686 | if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, | 721 | if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb), |
687 | EXT4_I(inode))) | 722 | 128, EXT4_I(inode))) |
688 | goto retry; | 723 | goto retry; |
689 | if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) | 724 | if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) |
690 | err = 0; | 725 | err = 0; |
@@ -782,6 +817,8 @@ out: | |||
782 | es->es_lblk = es1->es_lblk; | 817 | es->es_lblk = es1->es_lblk; |
783 | es->es_len = es1->es_len; | 818 | es->es_len = es1->es_len; |
784 | es->es_pblk = es1->es_pblk; | 819 | es->es_pblk = es1->es_pblk; |
820 | if (!ext4_es_is_referenced(es)) | ||
821 | ext4_es_set_referenced(es); | ||
785 | stats->es_stats_cache_hits++; | 822 | stats->es_stats_cache_hits++; |
786 | } else { | 823 | } else { |
787 | stats->es_stats_cache_misses++; | 824 | stats->es_stats_cache_misses++; |
@@ -841,8 +878,8 @@ retry: | |||
841 | es->es_lblk = orig_es.es_lblk; | 878 | es->es_lblk = orig_es.es_lblk; |
842 | es->es_len = orig_es.es_len; | 879 | es->es_len = orig_es.es_len; |
843 | if ((err == -ENOMEM) && | 880 | if ((err == -ENOMEM) && |
844 | __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, | 881 | __es_shrink(EXT4_SB(inode->i_sb), |
845 | EXT4_I(inode))) | 882 | 128, EXT4_I(inode))) |
846 | goto retry; | 883 | goto retry; |
847 | goto out; | 884 | goto out; |
848 | } | 885 | } |
@@ -914,6 +951,11 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
914 | end = lblk + len - 1; | 951 | end = lblk + len - 1; |
915 | BUG_ON(end < lblk); | 952 | BUG_ON(end < lblk); |
916 | 953 | ||
954 | /* | ||
955 | * ext4_clear_inode() depends on us taking i_es_lock unconditionally | ||
956 | * so that we are sure __es_shrink() is done with the inode before it | ||
957 | * is reclaimed. | ||
958 | */ | ||
917 | write_lock(&EXT4_I(inode)->i_es_lock); | 959 | write_lock(&EXT4_I(inode)->i_es_lock); |
918 | err = __es_remove_extent(inode, lblk, end); | 960 | err = __es_remove_extent(inode, lblk, end); |
919 | write_unlock(&EXT4_I(inode)->i_es_lock); | 961 | write_unlock(&EXT4_I(inode)->i_es_lock); |
@@ -921,114 +963,75 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
921 | return err; | 963 | return err; |
922 | } | 964 | } |
923 | 965 | ||
924 | static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, | 966 | static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, |
925 | struct list_head *b) | 967 | struct ext4_inode_info *locked_ei) |
926 | { | ||
927 | struct ext4_inode_info *eia, *eib; | ||
928 | eia = list_entry(a, struct ext4_inode_info, i_es_lru); | ||
929 | eib = list_entry(b, struct ext4_inode_info, i_es_lru); | ||
930 | |||
931 | if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) && | ||
932 | !ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED)) | ||
933 | return 1; | ||
934 | if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) && | ||
935 | ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED)) | ||
936 | return -1; | ||
937 | if (eia->i_touch_when == eib->i_touch_when) | ||
938 | return 0; | ||
939 | if (time_after(eia->i_touch_when, eib->i_touch_when)) | ||
940 | return 1; | ||
941 | else | ||
942 | return -1; | ||
943 | } | ||
944 | |||
945 | static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | ||
946 | struct ext4_inode_info *locked_ei) | ||
947 | { | 968 | { |
948 | struct ext4_inode_info *ei; | 969 | struct ext4_inode_info *ei; |
949 | struct ext4_es_stats *es_stats; | 970 | struct ext4_es_stats *es_stats; |
950 | struct list_head *cur, *tmp; | ||
951 | LIST_HEAD(skipped); | ||
952 | ktime_t start_time; | 971 | ktime_t start_time; |
953 | u64 scan_time; | 972 | u64 scan_time; |
973 | int nr_to_walk; | ||
954 | int nr_shrunk = 0; | 974 | int nr_shrunk = 0; |
955 | int retried = 0, skip_precached = 1, nr_skipped = 0; | 975 | int retried = 0, nr_skipped = 0; |
956 | 976 | ||
957 | es_stats = &sbi->s_es_stats; | 977 | es_stats = &sbi->s_es_stats; |
958 | start_time = ktime_get(); | 978 | start_time = ktime_get(); |
959 | spin_lock(&sbi->s_es_lru_lock); | ||
960 | 979 | ||
961 | retry: | 980 | retry: |
962 | list_for_each_safe(cur, tmp, &sbi->s_es_lru) { | 981 | spin_lock(&sbi->s_es_lock); |
963 | int shrunk; | 982 | nr_to_walk = sbi->s_es_nr_inode; |
964 | 983 | while (nr_to_walk-- > 0) { | |
965 | /* | 984 | if (list_empty(&sbi->s_es_list)) { |
966 | * If we have already reclaimed all extents from extent | 985 | spin_unlock(&sbi->s_es_lock); |
967 | * status tree, just stop the loop immediately. | 986 | goto out; |
968 | */ | 987 | } |
969 | if (percpu_counter_read_positive( | 988 | ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info, |
970 | &es_stats->es_stats_lru_cnt) == 0) | 989 | i_es_list); |
971 | break; | 990 | /* Move the inode to the tail */ |
972 | 991 | list_move_tail(&ei->i_es_list, &sbi->s_es_list); | |
973 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); | ||
974 | 992 | ||
975 | /* | 993 | /* |
976 | * Skip the inode that is newer than the last_sorted | 994 | * Normally we try hard to avoid shrinking precached inodes, |
977 | * time. Normally we try hard to avoid shrinking | 995 | * but we will as a last resort. |
978 | * precached inodes, but we will as a last resort. | ||
979 | */ | 996 | */ |
980 | if ((es_stats->es_stats_last_sorted < ei->i_touch_when) || | 997 | if (!retried && ext4_test_inode_state(&ei->vfs_inode, |
981 | (skip_precached && ext4_test_inode_state(&ei->vfs_inode, | 998 | EXT4_STATE_EXT_PRECACHED)) { |
982 | EXT4_STATE_EXT_PRECACHED))) { | ||
983 | nr_skipped++; | 999 | nr_skipped++; |
984 | list_move_tail(cur, &skipped); | ||
985 | continue; | 1000 | continue; |
986 | } | 1001 | } |
987 | 1002 | ||
988 | if (ei->i_es_lru_nr == 0 || ei == locked_ei || | 1003 | if (ei == locked_ei || !write_trylock(&ei->i_es_lock)) { |
989 | !write_trylock(&ei->i_es_lock)) | 1004 | nr_skipped++; |
990 | continue; | 1005 | continue; |
1006 | } | ||
1007 | /* | ||
1008 | * Now we hold i_es_lock which protects us from inode reclaim | ||
1009 | * freeing inode under us | ||
1010 | */ | ||
1011 | spin_unlock(&sbi->s_es_lock); | ||
991 | 1012 | ||
992 | shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); | 1013 | nr_shrunk += es_reclaim_extents(ei, &nr_to_scan); |
993 | if (ei->i_es_lru_nr == 0) | ||
994 | list_del_init(&ei->i_es_lru); | ||
995 | write_unlock(&ei->i_es_lock); | 1014 | write_unlock(&ei->i_es_lock); |
996 | 1015 | ||
997 | nr_shrunk += shrunk; | 1016 | if (nr_to_scan <= 0) |
998 | nr_to_scan -= shrunk; | 1017 | goto out; |
999 | if (nr_to_scan == 0) | 1018 | spin_lock(&sbi->s_es_lock); |
1000 | break; | ||
1001 | } | 1019 | } |
1002 | 1020 | spin_unlock(&sbi->s_es_lock); | |
1003 | /* Move the newer inodes into the tail of the LRU list. */ | ||
1004 | list_splice_tail(&skipped, &sbi->s_es_lru); | ||
1005 | INIT_LIST_HEAD(&skipped); | ||
1006 | 1021 | ||
1007 | /* | 1022 | /* |
1008 | * If we skipped any inodes, and we weren't able to make any | 1023 | * If we skipped any inodes, and we weren't able to make any |
1009 | * forward progress, sort the list and try again. | 1024 | * forward progress, try again to scan precached inodes. |
1010 | */ | 1025 | */ |
1011 | if ((nr_shrunk == 0) && nr_skipped && !retried) { | 1026 | if ((nr_shrunk == 0) && nr_skipped && !retried) { |
1012 | retried++; | 1027 | retried++; |
1013 | list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); | ||
1014 | es_stats->es_stats_last_sorted = jiffies; | ||
1015 | ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, | ||
1016 | i_es_lru); | ||
1017 | /* | ||
1018 | * If there are no non-precached inodes left on the | ||
1019 | * list, start releasing precached extents. | ||
1020 | */ | ||
1021 | if (ext4_test_inode_state(&ei->vfs_inode, | ||
1022 | EXT4_STATE_EXT_PRECACHED)) | ||
1023 | skip_precached = 0; | ||
1024 | goto retry; | 1028 | goto retry; |
1025 | } | 1029 | } |
1026 | 1030 | ||
1027 | spin_unlock(&sbi->s_es_lru_lock); | ||
1028 | |||
1029 | if (locked_ei && nr_shrunk == 0) | 1031 | if (locked_ei && nr_shrunk == 0) |
1030 | nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); | 1032 | nr_shrunk = es_reclaim_extents(locked_ei, &nr_to_scan); |
1031 | 1033 | ||
1034 | out: | ||
1032 | scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); | 1035 | scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); |
1033 | if (likely(es_stats->es_stats_scan_time)) | 1036 | if (likely(es_stats->es_stats_scan_time)) |
1034 | es_stats->es_stats_scan_time = (scan_time + | 1037 | es_stats->es_stats_scan_time = (scan_time + |
@@ -1043,7 +1046,7 @@ retry: | |||
1043 | else | 1046 | else |
1044 | es_stats->es_stats_shrunk = nr_shrunk; | 1047 | es_stats->es_stats_shrunk = nr_shrunk; |
1045 | 1048 | ||
1046 | trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, skip_precached, | 1049 | trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, |
1047 | nr_skipped, retried); | 1050 | nr_skipped, retried); |
1048 | return nr_shrunk; | 1051 | return nr_shrunk; |
1049 | } | 1052 | } |
@@ -1055,7 +1058,7 @@ static unsigned long ext4_es_count(struct shrinker *shrink, | |||
1055 | struct ext4_sb_info *sbi; | 1058 | struct ext4_sb_info *sbi; |
1056 | 1059 | ||
1057 | sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); | 1060 | sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); |
1058 | nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); | 1061 | nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt); |
1059 | trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr); | 1062 | trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr); |
1060 | return nr; | 1063 | return nr; |
1061 | } | 1064 | } |
@@ -1068,13 +1071,13 @@ static unsigned long ext4_es_scan(struct shrinker *shrink, | |||
1068 | int nr_to_scan = sc->nr_to_scan; | 1071 | int nr_to_scan = sc->nr_to_scan; |
1069 | int ret, nr_shrunk; | 1072 | int ret, nr_shrunk; |
1070 | 1073 | ||
1071 | ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); | 1074 | ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt); |
1072 | trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret); | 1075 | trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret); |
1073 | 1076 | ||
1074 | if (!nr_to_scan) | 1077 | if (!nr_to_scan) |
1075 | return ret; | 1078 | return ret; |
1076 | 1079 | ||
1077 | nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); | 1080 | nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL); |
1078 | 1081 | ||
1079 | trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret); | 1082 | trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret); |
1080 | return nr_shrunk; | 1083 | return nr_shrunk; |
@@ -1102,28 +1105,24 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) | |||
1102 | return 0; | 1105 | return 0; |
1103 | 1106 | ||
1104 | /* here we just find an inode that has the max nr. of objects */ | 1107 | /* here we just find an inode that has the max nr. of objects */ |
1105 | spin_lock(&sbi->s_es_lru_lock); | 1108 | spin_lock(&sbi->s_es_lock); |
1106 | list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) { | 1109 | list_for_each_entry(ei, &sbi->s_es_list, i_es_list) { |
1107 | inode_cnt++; | 1110 | inode_cnt++; |
1108 | if (max && max->i_es_all_nr < ei->i_es_all_nr) | 1111 | if (max && max->i_es_all_nr < ei->i_es_all_nr) |
1109 | max = ei; | 1112 | max = ei; |
1110 | else if (!max) | 1113 | else if (!max) |
1111 | max = ei; | 1114 | max = ei; |
1112 | } | 1115 | } |
1113 | spin_unlock(&sbi->s_es_lru_lock); | 1116 | spin_unlock(&sbi->s_es_lock); |
1114 | 1117 | ||
1115 | seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n", | 1118 | seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n", |
1116 | percpu_counter_sum_positive(&es_stats->es_stats_all_cnt), | 1119 | percpu_counter_sum_positive(&es_stats->es_stats_all_cnt), |
1117 | percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt)); | 1120 | percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt)); |
1118 | seq_printf(seq, " %lu/%lu cache hits/misses\n", | 1121 | seq_printf(seq, " %lu/%lu cache hits/misses\n", |
1119 | es_stats->es_stats_cache_hits, | 1122 | es_stats->es_stats_cache_hits, |
1120 | es_stats->es_stats_cache_misses); | 1123 | es_stats->es_stats_cache_misses); |
1121 | if (es_stats->es_stats_last_sorted != 0) | ||
1122 | seq_printf(seq, " %u ms last sorted interval\n", | ||
1123 | jiffies_to_msecs(jiffies - | ||
1124 | es_stats->es_stats_last_sorted)); | ||
1125 | if (inode_cnt) | 1124 | if (inode_cnt) |
1126 | seq_printf(seq, " %d inodes on lru list\n", inode_cnt); | 1125 | seq_printf(seq, " %d inodes on list\n", inode_cnt); |
1127 | 1126 | ||
1128 | seq_printf(seq, "average:\n %llu us scan time\n", | 1127 | seq_printf(seq, "average:\n %llu us scan time\n", |
1129 | div_u64(es_stats->es_stats_scan_time, 1000)); | 1128 | div_u64(es_stats->es_stats_scan_time, 1000)); |
@@ -1132,7 +1131,7 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) | |||
1132 | seq_printf(seq, | 1131 | seq_printf(seq, |
1133 | "maximum:\n %lu inode (%u objects, %u reclaimable)\n" | 1132 | "maximum:\n %lu inode (%u objects, %u reclaimable)\n" |
1134 | " %llu us max scan time\n", | 1133 | " %llu us max scan time\n", |
1135 | max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr, | 1134 | max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_shk_nr, |
1136 | div_u64(es_stats->es_stats_max_scan_time, 1000)); | 1135 | div_u64(es_stats->es_stats_max_scan_time, 1000)); |
1137 | 1136 | ||
1138 | return 0; | 1137 | return 0; |
@@ -1181,9 +1180,11 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) | |||
1181 | { | 1180 | { |
1182 | int err; | 1181 | int err; |
1183 | 1182 | ||
1184 | INIT_LIST_HEAD(&sbi->s_es_lru); | 1183 | /* Make sure we have enough bits for physical block number */ |
1185 | spin_lock_init(&sbi->s_es_lru_lock); | 1184 | BUILD_BUG_ON(ES_SHIFT < 48); |
1186 | sbi->s_es_stats.es_stats_last_sorted = 0; | 1185 | INIT_LIST_HEAD(&sbi->s_es_list); |
1186 | sbi->s_es_nr_inode = 0; | ||
1187 | spin_lock_init(&sbi->s_es_lock); | ||
1187 | sbi->s_es_stats.es_stats_shrunk = 0; | 1188 | sbi->s_es_stats.es_stats_shrunk = 0; |
1188 | sbi->s_es_stats.es_stats_cache_hits = 0; | 1189 | sbi->s_es_stats.es_stats_cache_hits = 0; |
1189 | sbi->s_es_stats.es_stats_cache_misses = 0; | 1190 | sbi->s_es_stats.es_stats_cache_misses = 0; |
@@ -1192,7 +1193,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) | |||
1192 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL); | 1193 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL); |
1193 | if (err) | 1194 | if (err) |
1194 | return err; | 1195 | return err; |
1195 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, 0, GFP_KERNEL); | 1196 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL); |
1196 | if (err) | 1197 | if (err) |
1197 | goto err1; | 1198 | goto err1; |
1198 | 1199 | ||
@@ -1210,7 +1211,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) | |||
1210 | return 0; | 1211 | return 0; |
1211 | 1212 | ||
1212 | err2: | 1213 | err2: |
1213 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); | 1214 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt); |
1214 | err1: | 1215 | err1: |
1215 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); | 1216 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); |
1216 | return err; | 1217 | return err; |
@@ -1221,71 +1222,83 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) | |||
1221 | if (sbi->s_proc) | 1222 | if (sbi->s_proc) |
1222 | remove_proc_entry("es_shrinker_info", sbi->s_proc); | 1223 | remove_proc_entry("es_shrinker_info", sbi->s_proc); |
1223 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); | 1224 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); |
1224 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); | 1225 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt); |
1225 | unregister_shrinker(&sbi->s_es_shrinker); | 1226 | unregister_shrinker(&sbi->s_es_shrinker); |
1226 | } | 1227 | } |
1227 | 1228 | ||
1228 | void ext4_es_lru_add(struct inode *inode) | 1229 | /* |
1230 | * Shrink extents in given inode from ei->i_es_shrink_lblk till end. Scan at | ||
1231 | * most *nr_to_scan extents, update *nr_to_scan accordingly. | ||
1232 | * | ||
1233 | * Return 0 if we hit end of tree / interval, 1 if we exhausted nr_to_scan. | ||
1234 | * Increment *nr_shrunk by the number of reclaimed extents. Also update | ||
1235 | * ei->i_es_shrink_lblk to where we should continue scanning. | ||
1236 | */ | ||
1237 | static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end, | ||
1238 | int *nr_to_scan, int *nr_shrunk) | ||
1229 | { | 1239 | { |
1230 | struct ext4_inode_info *ei = EXT4_I(inode); | 1240 | struct inode *inode = &ei->vfs_inode; |
1231 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1241 | struct ext4_es_tree *tree = &ei->i_es_tree; |
1232 | 1242 | struct extent_status *es; | |
1233 | ei->i_touch_when = jiffies; | 1243 | struct rb_node *node; |
1234 | |||
1235 | if (!list_empty(&ei->i_es_lru)) | ||
1236 | return; | ||
1237 | 1244 | ||
1238 | spin_lock(&sbi->s_es_lru_lock); | 1245 | es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk); |
1239 | if (list_empty(&ei->i_es_lru)) | 1246 | if (!es) |
1240 | list_add_tail(&ei->i_es_lru, &sbi->s_es_lru); | 1247 | goto out_wrap; |
1241 | spin_unlock(&sbi->s_es_lru_lock); | 1248 | node = &es->rb_node; |
1242 | } | 1249 | while (*nr_to_scan > 0) { |
1250 | if (es->es_lblk > end) { | ||
1251 | ei->i_es_shrink_lblk = end + 1; | ||
1252 | return 0; | ||
1253 | } | ||
1243 | 1254 | ||
1244 | void ext4_es_lru_del(struct inode *inode) | 1255 | (*nr_to_scan)--; |
1245 | { | 1256 | node = rb_next(&es->rb_node); |
1246 | struct ext4_inode_info *ei = EXT4_I(inode); | 1257 | /* |
1247 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1258 | * We can't reclaim delayed extent from status tree because |
1259 | * fiemap, bigallic, and seek_data/hole need to use it. | ||
1260 | */ | ||
1261 | if (ext4_es_is_delayed(es)) | ||
1262 | goto next; | ||
1263 | if (ext4_es_is_referenced(es)) { | ||
1264 | ext4_es_clear_referenced(es); | ||
1265 | goto next; | ||
1266 | } | ||
1248 | 1267 | ||
1249 | spin_lock(&sbi->s_es_lru_lock); | 1268 | rb_erase(&es->rb_node, &tree->root); |
1250 | if (!list_empty(&ei->i_es_lru)) | 1269 | ext4_es_free_extent(inode, es); |
1251 | list_del_init(&ei->i_es_lru); | 1270 | (*nr_shrunk)++; |
1252 | spin_unlock(&sbi->s_es_lru_lock); | 1271 | next: |
1272 | if (!node) | ||
1273 | goto out_wrap; | ||
1274 | es = rb_entry(node, struct extent_status, rb_node); | ||
1275 | } | ||
1276 | ei->i_es_shrink_lblk = es->es_lblk; | ||
1277 | return 1; | ||
1278 | out_wrap: | ||
1279 | ei->i_es_shrink_lblk = 0; | ||
1280 | return 0; | ||
1253 | } | 1281 | } |
1254 | 1282 | ||
1255 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, | 1283 | static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan) |
1256 | int nr_to_scan) | ||
1257 | { | 1284 | { |
1258 | struct inode *inode = &ei->vfs_inode; | 1285 | struct inode *inode = &ei->vfs_inode; |
1259 | struct ext4_es_tree *tree = &ei->i_es_tree; | 1286 | int nr_shrunk = 0; |
1260 | struct rb_node *node; | 1287 | ext4_lblk_t start = ei->i_es_shrink_lblk; |
1261 | struct extent_status *es; | ||
1262 | unsigned long nr_shrunk = 0; | ||
1263 | static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, | 1288 | static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, |
1264 | DEFAULT_RATELIMIT_BURST); | 1289 | DEFAULT_RATELIMIT_BURST); |
1265 | 1290 | ||
1266 | if (ei->i_es_lru_nr == 0) | 1291 | if (ei->i_es_shk_nr == 0) |
1267 | return 0; | 1292 | return 0; |
1268 | 1293 | ||
1269 | if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) && | 1294 | if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) && |
1270 | __ratelimit(&_rs)) | 1295 | __ratelimit(&_rs)) |
1271 | ext4_warning(inode->i_sb, "forced shrink of precached extents"); | 1296 | ext4_warning(inode->i_sb, "forced shrink of precached extents"); |
1272 | 1297 | ||
1273 | node = rb_first(&tree->root); | 1298 | if (!es_do_reclaim_extents(ei, EXT_MAX_BLOCKS, nr_to_scan, &nr_shrunk) && |
1274 | while (node != NULL) { | 1299 | start != 0) |
1275 | es = rb_entry(node, struct extent_status, rb_node); | 1300 | es_do_reclaim_extents(ei, start - 1, nr_to_scan, &nr_shrunk); |
1276 | node = rb_next(&es->rb_node); | 1301 | |
1277 | /* | 1302 | ei->i_es_tree.cache_es = NULL; |
1278 | * We can't reclaim delayed extent from status tree because | ||
1279 | * fiemap, bigallic, and seek_data/hole need to use it. | ||
1280 | */ | ||
1281 | if (!ext4_es_is_delayed(es)) { | ||
1282 | rb_erase(&es->rb_node, &tree->root); | ||
1283 | ext4_es_free_extent(inode, es); | ||
1284 | nr_shrunk++; | ||
1285 | if (--nr_to_scan == 0) | ||
1286 | break; | ||
1287 | } | ||
1288 | } | ||
1289 | tree->cache_es = NULL; | ||
1290 | return nr_shrunk; | 1303 | return nr_shrunk; |
1291 | } | 1304 | } |
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index efd5f970b501..691b52613ce4 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h | |||
@@ -29,25 +29,28 @@ | |||
29 | /* | 29 | /* |
30 | * These flags live in the high bits of extent_status.es_pblk | 30 | * These flags live in the high bits of extent_status.es_pblk |
31 | */ | 31 | */ |
32 | #define ES_SHIFT 60 | 32 | enum { |
33 | 33 | ES_WRITTEN_B, | |
34 | #define EXTENT_STATUS_WRITTEN (1 << 3) | 34 | ES_UNWRITTEN_B, |
35 | #define EXTENT_STATUS_UNWRITTEN (1 << 2) | 35 | ES_DELAYED_B, |
36 | #define EXTENT_STATUS_DELAYED (1 << 1) | 36 | ES_HOLE_B, |
37 | #define EXTENT_STATUS_HOLE (1 << 0) | 37 | ES_REFERENCED_B, |
38 | ES_FLAGS | ||
39 | }; | ||
38 | 40 | ||
39 | #define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \ | 41 | #define ES_SHIFT (sizeof(ext4_fsblk_t)*8 - ES_FLAGS) |
40 | EXTENT_STATUS_UNWRITTEN | \ | 42 | #define ES_MASK (~((ext4_fsblk_t)0) << ES_SHIFT) |
41 | EXTENT_STATUS_DELAYED | \ | ||
42 | EXTENT_STATUS_HOLE) | ||
43 | 43 | ||
44 | #define ES_WRITTEN (1ULL << 63) | 44 | #define EXTENT_STATUS_WRITTEN (1 << ES_WRITTEN_B) |
45 | #define ES_UNWRITTEN (1ULL << 62) | 45 | #define EXTENT_STATUS_UNWRITTEN (1 << ES_UNWRITTEN_B) |
46 | #define ES_DELAYED (1ULL << 61) | 46 | #define EXTENT_STATUS_DELAYED (1 << ES_DELAYED_B) |
47 | #define ES_HOLE (1ULL << 60) | 47 | #define EXTENT_STATUS_HOLE (1 << ES_HOLE_B) |
48 | #define EXTENT_STATUS_REFERENCED (1 << ES_REFERENCED_B) | ||
48 | 49 | ||
49 | #define ES_MASK (ES_WRITTEN | ES_UNWRITTEN | \ | 50 | #define ES_TYPE_MASK ((ext4_fsblk_t)(EXTENT_STATUS_WRITTEN | \ |
50 | ES_DELAYED | ES_HOLE) | 51 | EXTENT_STATUS_UNWRITTEN | \ |
52 | EXTENT_STATUS_DELAYED | \ | ||
53 | EXTENT_STATUS_HOLE) << ES_SHIFT) | ||
51 | 54 | ||
52 | struct ext4_sb_info; | 55 | struct ext4_sb_info; |
53 | struct ext4_extent; | 56 | struct ext4_extent; |
@@ -65,14 +68,13 @@ struct ext4_es_tree { | |||
65 | }; | 68 | }; |
66 | 69 | ||
67 | struct ext4_es_stats { | 70 | struct ext4_es_stats { |
68 | unsigned long es_stats_last_sorted; | ||
69 | unsigned long es_stats_shrunk; | 71 | unsigned long es_stats_shrunk; |
70 | unsigned long es_stats_cache_hits; | 72 | unsigned long es_stats_cache_hits; |
71 | unsigned long es_stats_cache_misses; | 73 | unsigned long es_stats_cache_misses; |
72 | u64 es_stats_scan_time; | 74 | u64 es_stats_scan_time; |
73 | u64 es_stats_max_scan_time; | 75 | u64 es_stats_max_scan_time; |
74 | struct percpu_counter es_stats_all_cnt; | 76 | struct percpu_counter es_stats_all_cnt; |
75 | struct percpu_counter es_stats_lru_cnt; | 77 | struct percpu_counter es_stats_shk_cnt; |
76 | }; | 78 | }; |
77 | 79 | ||
78 | extern int __init ext4_init_es(void); | 80 | extern int __init ext4_init_es(void); |
@@ -93,29 +95,49 @@ extern void ext4_es_find_delayed_extent_range(struct inode *inode, | |||
93 | extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, | 95 | extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, |
94 | struct extent_status *es); | 96 | struct extent_status *es); |
95 | 97 | ||
98 | static inline unsigned int ext4_es_status(struct extent_status *es) | ||
99 | { | ||
100 | return es->es_pblk >> ES_SHIFT; | ||
101 | } | ||
102 | |||
103 | static inline unsigned int ext4_es_type(struct extent_status *es) | ||
104 | { | ||
105 | return (es->es_pblk & ES_TYPE_MASK) >> ES_SHIFT; | ||
106 | } | ||
107 | |||
96 | static inline int ext4_es_is_written(struct extent_status *es) | 108 | static inline int ext4_es_is_written(struct extent_status *es) |
97 | { | 109 | { |
98 | return (es->es_pblk & ES_WRITTEN) != 0; | 110 | return (ext4_es_type(es) & EXTENT_STATUS_WRITTEN) != 0; |
99 | } | 111 | } |
100 | 112 | ||
101 | static inline int ext4_es_is_unwritten(struct extent_status *es) | 113 | static inline int ext4_es_is_unwritten(struct extent_status *es) |
102 | { | 114 | { |
103 | return (es->es_pblk & ES_UNWRITTEN) != 0; | 115 | return (ext4_es_type(es) & EXTENT_STATUS_UNWRITTEN) != 0; |
104 | } | 116 | } |
105 | 117 | ||
106 | static inline int ext4_es_is_delayed(struct extent_status *es) | 118 | static inline int ext4_es_is_delayed(struct extent_status *es) |
107 | { | 119 | { |
108 | return (es->es_pblk & ES_DELAYED) != 0; | 120 | return (ext4_es_type(es) & EXTENT_STATUS_DELAYED) != 0; |
109 | } | 121 | } |
110 | 122 | ||
111 | static inline int ext4_es_is_hole(struct extent_status *es) | 123 | static inline int ext4_es_is_hole(struct extent_status *es) |
112 | { | 124 | { |
113 | return (es->es_pblk & ES_HOLE) != 0; | 125 | return (ext4_es_type(es) & EXTENT_STATUS_HOLE) != 0; |
114 | } | 126 | } |
115 | 127 | ||
116 | static inline unsigned int ext4_es_status(struct extent_status *es) | 128 | static inline void ext4_es_set_referenced(struct extent_status *es) |
117 | { | 129 | { |
118 | return es->es_pblk >> ES_SHIFT; | 130 | es->es_pblk |= ((ext4_fsblk_t)EXTENT_STATUS_REFERENCED) << ES_SHIFT; |
131 | } | ||
132 | |||
133 | static inline void ext4_es_clear_referenced(struct extent_status *es) | ||
134 | { | ||
135 | es->es_pblk &= ~(((ext4_fsblk_t)EXTENT_STATUS_REFERENCED) << ES_SHIFT); | ||
136 | } | ||
137 | |||
138 | static inline int ext4_es_is_referenced(struct extent_status *es) | ||
139 | { | ||
140 | return (ext4_es_status(es) & EXTENT_STATUS_REFERENCED) != 0; | ||
119 | } | 141 | } |
120 | 142 | ||
121 | static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es) | 143 | static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es) |
@@ -135,23 +157,19 @@ static inline void ext4_es_store_pblock(struct extent_status *es, | |||
135 | static inline void ext4_es_store_status(struct extent_status *es, | 157 | static inline void ext4_es_store_status(struct extent_status *es, |
136 | unsigned int status) | 158 | unsigned int status) |
137 | { | 159 | { |
138 | es->es_pblk = (((ext4_fsblk_t) | 160 | es->es_pblk = (((ext4_fsblk_t)status << ES_SHIFT) & ES_MASK) | |
139 | (status & EXTENT_STATUS_FLAGS) << ES_SHIFT) | | 161 | (es->es_pblk & ~ES_MASK); |
140 | (es->es_pblk & ~ES_MASK)); | ||
141 | } | 162 | } |
142 | 163 | ||
143 | static inline void ext4_es_store_pblock_status(struct extent_status *es, | 164 | static inline void ext4_es_store_pblock_status(struct extent_status *es, |
144 | ext4_fsblk_t pb, | 165 | ext4_fsblk_t pb, |
145 | unsigned int status) | 166 | unsigned int status) |
146 | { | 167 | { |
147 | es->es_pblk = (((ext4_fsblk_t) | 168 | es->es_pblk = (((ext4_fsblk_t)status << ES_SHIFT) & ES_MASK) | |
148 | (status & EXTENT_STATUS_FLAGS) << ES_SHIFT) | | 169 | (pb & ~ES_MASK); |
149 | (pb & ~ES_MASK)); | ||
150 | } | 170 | } |
151 | 171 | ||
152 | extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi); | 172 | extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi); |
153 | extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); | 173 | extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); |
154 | extern void ext4_es_lru_add(struct inode *inode); | ||
155 | extern void ext4_es_lru_del(struct inode *inode); | ||
156 | 174 | ||
157 | #endif /* _EXT4_EXTENTS_STATUS_H */ | 175 | #endif /* _EXT4_EXTENTS_STATUS_H */ |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 8131be8c0af3..513c12cf444c 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -273,24 +273,19 @@ static int ext4_file_open(struct inode * inode, struct file * filp) | |||
273 | * we determine this extent as a data or a hole according to whether the | 273 | * we determine this extent as a data or a hole according to whether the |
274 | * page cache has data or not. | 274 | * page cache has data or not. |
275 | */ | 275 | */ |
276 | static int ext4_find_unwritten_pgoff(struct inode *inode, | 276 | static int ext4_find_unwritten_pgoff(struct inode *inode, int whence, |
277 | int whence, | 277 | loff_t endoff, loff_t *offset) |
278 | struct ext4_map_blocks *map, | ||
279 | loff_t *offset) | ||
280 | { | 278 | { |
281 | struct pagevec pvec; | 279 | struct pagevec pvec; |
282 | unsigned int blkbits; | ||
283 | pgoff_t index; | 280 | pgoff_t index; |
284 | pgoff_t end; | 281 | pgoff_t end; |
285 | loff_t endoff; | ||
286 | loff_t startoff; | 282 | loff_t startoff; |
287 | loff_t lastoff; | 283 | loff_t lastoff; |
288 | int found = 0; | 284 | int found = 0; |
289 | 285 | ||
290 | blkbits = inode->i_sb->s_blocksize_bits; | ||
291 | startoff = *offset; | 286 | startoff = *offset; |
292 | lastoff = startoff; | 287 | lastoff = startoff; |
293 | endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits; | 288 | |
294 | 289 | ||
295 | index = startoff >> PAGE_CACHE_SHIFT; | 290 | index = startoff >> PAGE_CACHE_SHIFT; |
296 | end = endoff >> PAGE_CACHE_SHIFT; | 291 | end = endoff >> PAGE_CACHE_SHIFT; |
@@ -408,147 +403,144 @@ out: | |||
408 | static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | 403 | static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) |
409 | { | 404 | { |
410 | struct inode *inode = file->f_mapping->host; | 405 | struct inode *inode = file->f_mapping->host; |
411 | struct ext4_map_blocks map; | 406 | struct fiemap_extent_info fie; |
412 | struct extent_status es; | 407 | struct fiemap_extent ext[2]; |
413 | ext4_lblk_t start, last, end; | 408 | loff_t next; |
414 | loff_t dataoff, isize; | 409 | int i, ret = 0; |
415 | int blkbits; | ||
416 | int ret = 0; | ||
417 | 410 | ||
418 | mutex_lock(&inode->i_mutex); | 411 | mutex_lock(&inode->i_mutex); |
419 | 412 | if (offset >= inode->i_size) { | |
420 | isize = i_size_read(inode); | ||
421 | if (offset >= isize) { | ||
422 | mutex_unlock(&inode->i_mutex); | 413 | mutex_unlock(&inode->i_mutex); |
423 | return -ENXIO; | 414 | return -ENXIO; |
424 | } | 415 | } |
425 | 416 | fie.fi_flags = 0; | |
426 | blkbits = inode->i_sb->s_blocksize_bits; | 417 | fie.fi_extents_max = 2; |
427 | start = offset >> blkbits; | 418 | fie.fi_extents_start = (struct fiemap_extent __user *) &ext; |
428 | last = start; | 419 | while (1) { |
429 | end = isize >> blkbits; | 420 | mm_segment_t old_fs = get_fs(); |
430 | dataoff = offset; | 421 | |
431 | 422 | fie.fi_extents_mapped = 0; | |
432 | do { | 423 | memset(ext, 0, sizeof(*ext) * fie.fi_extents_max); |
433 | map.m_lblk = last; | 424 | |
434 | map.m_len = end - last + 1; | 425 | set_fs(get_ds()); |
435 | ret = ext4_map_blocks(NULL, inode, &map, 0); | 426 | ret = ext4_fiemap(inode, &fie, offset, maxsize - offset); |
436 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { | 427 | set_fs(old_fs); |
437 | if (last != start) | 428 | if (ret) |
438 | dataoff = (loff_t)last << blkbits; | ||
439 | break; | 429 | break; |
440 | } | ||
441 | 430 | ||
442 | /* | 431 | /* No extents found, EOF */ |
443 | * If there is a delay extent at this offset, | 432 | if (!fie.fi_extents_mapped) { |
444 | * it will be as a data. | 433 | ret = -ENXIO; |
445 | */ | ||
446 | ext4_es_find_delayed_extent_range(inode, last, last, &es); | ||
447 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { | ||
448 | if (last != start) | ||
449 | dataoff = (loff_t)last << blkbits; | ||
450 | break; | 434 | break; |
451 | } | 435 | } |
436 | for (i = 0; i < fie.fi_extents_mapped; i++) { | ||
437 | next = (loff_t)(ext[i].fe_length + ext[i].fe_logical); | ||
452 | 438 | ||
453 | /* | 439 | if (offset < (loff_t)ext[i].fe_logical) |
454 | * If there is a unwritten extent at this offset, | 440 | offset = (loff_t)ext[i].fe_logical; |
455 | * it will be as a data or a hole according to page | 441 | /* |
456 | * cache that has data or not. | 442 | * If extent is not unwritten, then it contains valid |
457 | */ | 443 | * data, mapped or delayed. |
458 | if (map.m_flags & EXT4_MAP_UNWRITTEN) { | 444 | */ |
459 | int unwritten; | 445 | if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN)) |
460 | unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA, | 446 | goto out; |
461 | &map, &dataoff); | ||
462 | if (unwritten) | ||
463 | break; | ||
464 | } | ||
465 | 447 | ||
466 | last++; | 448 | /* |
467 | dataoff = (loff_t)last << blkbits; | 449 | * If there is a unwritten extent at this offset, |
468 | } while (last <= end); | 450 | * it will be as a data or a hole according to page |
451 | * cache that has data or not. | ||
452 | */ | ||
453 | if (ext4_find_unwritten_pgoff(inode, SEEK_DATA, | ||
454 | next, &offset)) | ||
455 | goto out; | ||
469 | 456 | ||
457 | if (ext[i].fe_flags & FIEMAP_EXTENT_LAST) { | ||
458 | ret = -ENXIO; | ||
459 | goto out; | ||
460 | } | ||
461 | offset = next; | ||
462 | } | ||
463 | } | ||
464 | if (offset > inode->i_size) | ||
465 | offset = inode->i_size; | ||
466 | out: | ||
470 | mutex_unlock(&inode->i_mutex); | 467 | mutex_unlock(&inode->i_mutex); |
468 | if (ret) | ||
469 | return ret; | ||
471 | 470 | ||
472 | if (dataoff > isize) | 471 | return vfs_setpos(file, offset, maxsize); |
473 | return -ENXIO; | ||
474 | |||
475 | return vfs_setpos(file, dataoff, maxsize); | ||
476 | } | 472 | } |
477 | 473 | ||
478 | /* | 474 | /* |
479 | * ext4_seek_hole() retrieves the offset for SEEK_HOLE. | 475 | * ext4_seek_hole() retrieves the offset for SEEK_HOLE |
480 | */ | 476 | */ |
481 | static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) | 477 | static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) |
482 | { | 478 | { |
483 | struct inode *inode = file->f_mapping->host; | 479 | struct inode *inode = file->f_mapping->host; |
484 | struct ext4_map_blocks map; | 480 | struct fiemap_extent_info fie; |
485 | struct extent_status es; | 481 | struct fiemap_extent ext[2]; |
486 | ext4_lblk_t start, last, end; | 482 | loff_t next; |
487 | loff_t holeoff, isize; | 483 | int i, ret = 0; |
488 | int blkbits; | ||
489 | int ret = 0; | ||
490 | 484 | ||
491 | mutex_lock(&inode->i_mutex); | 485 | mutex_lock(&inode->i_mutex); |
492 | 486 | if (offset >= inode->i_size) { | |
493 | isize = i_size_read(inode); | ||
494 | if (offset >= isize) { | ||
495 | mutex_unlock(&inode->i_mutex); | 487 | mutex_unlock(&inode->i_mutex); |
496 | return -ENXIO; | 488 | return -ENXIO; |
497 | } | 489 | } |
498 | 490 | ||
499 | blkbits = inode->i_sb->s_blocksize_bits; | 491 | fie.fi_flags = 0; |
500 | start = offset >> blkbits; | 492 | fie.fi_extents_max = 2; |
501 | last = start; | 493 | fie.fi_extents_start = (struct fiemap_extent __user *)&ext; |
502 | end = isize >> blkbits; | 494 | while (1) { |
503 | holeoff = offset; | 495 | mm_segment_t old_fs = get_fs(); |
504 | 496 | ||
505 | do { | 497 | fie.fi_extents_mapped = 0; |
506 | map.m_lblk = last; | 498 | memset(ext, 0, sizeof(*ext)); |
507 | map.m_len = end - last + 1; | ||
508 | ret = ext4_map_blocks(NULL, inode, &map, 0); | ||
509 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { | ||
510 | last += ret; | ||
511 | holeoff = (loff_t)last << blkbits; | ||
512 | continue; | ||
513 | } | ||
514 | 499 | ||
515 | /* | 500 | set_fs(get_ds()); |
516 | * If there is a delay extent at this offset, | 501 | ret = ext4_fiemap(inode, &fie, offset, maxsize - offset); |
517 | * we will skip this extent. | 502 | set_fs(old_fs); |
518 | */ | 503 | if (ret) |
519 | ext4_es_find_delayed_extent_range(inode, last, last, &es); | 504 | break; |
520 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { | ||
521 | last = es.es_lblk + es.es_len; | ||
522 | holeoff = (loff_t)last << blkbits; | ||
523 | continue; | ||
524 | } | ||
525 | 505 | ||
526 | /* | 506 | /* No extents found */ |
527 | * If there is a unwritten extent at this offset, | 507 | if (!fie.fi_extents_mapped) |
528 | * it will be as a data or a hole according to page | 508 | break; |
529 | * cache that has data or not. | 509 | |
530 | */ | 510 | for (i = 0; i < fie.fi_extents_mapped; i++) { |
531 | if (map.m_flags & EXT4_MAP_UNWRITTEN) { | 511 | next = (loff_t)(ext[i].fe_logical + ext[i].fe_length); |
532 | int unwritten; | 512 | /* |
533 | unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE, | 513 | * If extent is not unwritten, then it contains valid |
534 | &map, &holeoff); | 514 | * data, mapped or delayed. |
535 | if (!unwritten) { | 515 | */ |
536 | last += ret; | 516 | if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN)) { |
537 | holeoff = (loff_t)last << blkbits; | 517 | if (offset < (loff_t)ext[i].fe_logical) |
518 | goto out; | ||
519 | offset = next; | ||
538 | continue; | 520 | continue; |
539 | } | 521 | } |
540 | } | 522 | /* |
541 | 523 | * If there is a unwritten extent at this offset, | |
542 | /* find a hole */ | 524 | * it will be as a data or a hole according to page |
543 | break; | 525 | * cache that has data or not. |
544 | } while (last <= end); | 526 | */ |
527 | if (ext4_find_unwritten_pgoff(inode, SEEK_HOLE, | ||
528 | next, &offset)) | ||
529 | goto out; | ||
545 | 530 | ||
531 | offset = next; | ||
532 | if (ext[i].fe_flags & FIEMAP_EXTENT_LAST) | ||
533 | goto out; | ||
534 | } | ||
535 | } | ||
536 | if (offset > inode->i_size) | ||
537 | offset = inode->i_size; | ||
538 | out: | ||
546 | mutex_unlock(&inode->i_mutex); | 539 | mutex_unlock(&inode->i_mutex); |
540 | if (ret) | ||
541 | return ret; | ||
547 | 542 | ||
548 | if (holeoff > isize) | 543 | return vfs_setpos(file, offset, maxsize); |
549 | holeoff = isize; | ||
550 | |||
551 | return vfs_setpos(file, holeoff, maxsize); | ||
552 | } | 544 | } |
553 | 545 | ||
554 | /* | 546 | /* |
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 3ea62695abce..4b143febf21f 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c | |||
@@ -811,8 +811,11 @@ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping, | |||
811 | ret = __block_write_begin(page, 0, inline_size, | 811 | ret = __block_write_begin(page, 0, inline_size, |
812 | ext4_da_get_block_prep); | 812 | ext4_da_get_block_prep); |
813 | if (ret) { | 813 | if (ret) { |
814 | up_read(&EXT4_I(inode)->xattr_sem); | ||
815 | unlock_page(page); | ||
816 | page_cache_release(page); | ||
814 | ext4_truncate_failed_write(inode); | 817 | ext4_truncate_failed_write(inode); |
815 | goto out; | 818 | return ret; |
816 | } | 819 | } |
817 | 820 | ||
818 | SetPageDirty(page); | 821 | SetPageDirty(page); |
@@ -870,6 +873,12 @@ retry_journal: | |||
870 | goto out_journal; | 873 | goto out_journal; |
871 | } | 874 | } |
872 | 875 | ||
876 | /* | ||
877 | * We cannot recurse into the filesystem as the transaction | ||
878 | * is already started. | ||
879 | */ | ||
880 | flags |= AOP_FLAG_NOFS; | ||
881 | |||
873 | if (ret == -ENOSPC) { | 882 | if (ret == -ENOSPC) { |
874 | ret = ext4_da_convert_inline_data_to_extent(mapping, | 883 | ret = ext4_da_convert_inline_data_to_extent(mapping, |
875 | inode, | 884 | inode, |
@@ -882,11 +891,6 @@ retry_journal: | |||
882 | goto out; | 891 | goto out; |
883 | } | 892 | } |
884 | 893 | ||
885 | /* | ||
886 | * We cannot recurse into the filesystem as the transaction | ||
887 | * is already started. | ||
888 | */ | ||
889 | flags |= AOP_FLAG_NOFS; | ||
890 | 894 | ||
891 | page = grab_cache_page_write_begin(mapping, 0, flags); | 895 | page = grab_cache_page_write_begin(mapping, 0, flags); |
892 | if (!page) { | 896 | if (!page) { |
@@ -1807,11 +1811,12 @@ int ext4_destroy_inline_data(handle_t *handle, struct inode *inode) | |||
1807 | 1811 | ||
1808 | int ext4_inline_data_fiemap(struct inode *inode, | 1812 | int ext4_inline_data_fiemap(struct inode *inode, |
1809 | struct fiemap_extent_info *fieinfo, | 1813 | struct fiemap_extent_info *fieinfo, |
1810 | int *has_inline) | 1814 | int *has_inline, __u64 start, __u64 len) |
1811 | { | 1815 | { |
1812 | __u64 physical = 0; | 1816 | __u64 physical = 0; |
1813 | __u64 length; | 1817 | __u64 inline_len; |
1814 | __u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_LAST; | 1818 | __u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED | |
1819 | FIEMAP_EXTENT_LAST; | ||
1815 | int error = 0; | 1820 | int error = 0; |
1816 | struct ext4_iloc iloc; | 1821 | struct ext4_iloc iloc; |
1817 | 1822 | ||
@@ -1820,6 +1825,13 @@ int ext4_inline_data_fiemap(struct inode *inode, | |||
1820 | *has_inline = 0; | 1825 | *has_inline = 0; |
1821 | goto out; | 1826 | goto out; |
1822 | } | 1827 | } |
1828 | inline_len = min_t(size_t, ext4_get_inline_size(inode), | ||
1829 | i_size_read(inode)); | ||
1830 | if (start >= inline_len) | ||
1831 | goto out; | ||
1832 | if (start + len < inline_len) | ||
1833 | inline_len = start + len; | ||
1834 | inline_len -= start; | ||
1823 | 1835 | ||
1824 | error = ext4_get_inode_loc(inode, &iloc); | 1836 | error = ext4_get_inode_loc(inode, &iloc); |
1825 | if (error) | 1837 | if (error) |
@@ -1828,11 +1840,10 @@ int ext4_inline_data_fiemap(struct inode *inode, | |||
1828 | physical = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; | 1840 | physical = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; |
1829 | physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; | 1841 | physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; |
1830 | physical += offsetof(struct ext4_inode, i_block); | 1842 | physical += offsetof(struct ext4_inode, i_block); |
1831 | length = i_size_read(inode); | ||
1832 | 1843 | ||
1833 | if (physical) | 1844 | if (physical) |
1834 | error = fiemap_fill_next_extent(fieinfo, 0, physical, | 1845 | error = fiemap_fill_next_extent(fieinfo, start, physical, |
1835 | length, flags); | 1846 | inline_len, flags); |
1836 | brelse(iloc.bh); | 1847 | brelse(iloc.bh); |
1837 | out: | 1848 | out: |
1838 | up_read(&EXT4_I(inode)->xattr_sem); | 1849 | up_read(&EXT4_I(inode)->xattr_sem); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3356ab5395f4..5653fa42930b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -416,11 +416,6 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, | |||
416 | } | 416 | } |
417 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) | 417 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) |
418 | up_read((&EXT4_I(inode)->i_data_sem)); | 418 | up_read((&EXT4_I(inode)->i_data_sem)); |
419 | /* | ||
420 | * Clear EXT4_MAP_FROM_CLUSTER and EXT4_MAP_BOUNDARY flag | ||
421 | * because it shouldn't be marked in es_map->m_flags. | ||
422 | */ | ||
423 | map->m_flags &= ~(EXT4_MAP_FROM_CLUSTER | EXT4_MAP_BOUNDARY); | ||
424 | 419 | ||
425 | /* | 420 | /* |
426 | * We don't check m_len because extent will be collpased in status | 421 | * We don't check m_len because extent will be collpased in status |
@@ -491,7 +486,6 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
491 | 486 | ||
492 | /* Lookup extent status tree firstly */ | 487 | /* Lookup extent status tree firstly */ |
493 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { | 488 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { |
494 | ext4_es_lru_add(inode); | ||
495 | if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { | 489 | if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { |
496 | map->m_pblk = ext4_es_pblock(&es) + | 490 | map->m_pblk = ext4_es_pblock(&es) + |
497 | map->m_lblk - es.es_lblk; | 491 | map->m_lblk - es.es_lblk; |
@@ -1393,7 +1387,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | |||
1393 | 1387 | ||
1394 | /* Lookup extent status tree firstly */ | 1388 | /* Lookup extent status tree firstly */ |
1395 | if (ext4_es_lookup_extent(inode, iblock, &es)) { | 1389 | if (ext4_es_lookup_extent(inode, iblock, &es)) { |
1396 | ext4_es_lru_add(inode); | ||
1397 | if (ext4_es_is_hole(&es)) { | 1390 | if (ext4_es_is_hole(&es)) { |
1398 | retval = 0; | 1391 | retval = 0; |
1399 | down_read(&EXT4_I(inode)->i_data_sem); | 1392 | down_read(&EXT4_I(inode)->i_data_sem); |
@@ -1434,24 +1427,12 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | |||
1434 | * file system block. | 1427 | * file system block. |
1435 | */ | 1428 | */ |
1436 | down_read(&EXT4_I(inode)->i_data_sem); | 1429 | down_read(&EXT4_I(inode)->i_data_sem); |
1437 | if (ext4_has_inline_data(inode)) { | 1430 | if (ext4_has_inline_data(inode)) |
1438 | /* | ||
1439 | * We will soon create blocks for this page, and let | ||
1440 | * us pretend as if the blocks aren't allocated yet. | ||
1441 | * In case of clusters, we have to handle the work | ||
1442 | * of mapping from cluster so that the reserved space | ||
1443 | * is calculated properly. | ||
1444 | */ | ||
1445 | if ((EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) && | ||
1446 | ext4_find_delalloc_cluster(inode, map->m_lblk)) | ||
1447 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | ||
1448 | retval = 0; | 1431 | retval = 0; |
1449 | } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 1432 | else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
1450 | retval = ext4_ext_map_blocks(NULL, inode, map, | 1433 | retval = ext4_ext_map_blocks(NULL, inode, map, 0); |
1451 | EXT4_GET_BLOCKS_NO_PUT_HOLE); | ||
1452 | else | 1434 | else |
1453 | retval = ext4_ind_map_blocks(NULL, inode, map, | 1435 | retval = ext4_ind_map_blocks(NULL, inode, map, 0); |
1454 | EXT4_GET_BLOCKS_NO_PUT_HOLE); | ||
1455 | 1436 | ||
1456 | add_delayed: | 1437 | add_delayed: |
1457 | if (retval == 0) { | 1438 | if (retval == 0) { |
@@ -1465,7 +1446,8 @@ add_delayed: | |||
1465 | * then we don't need to reserve it again. However we still need | 1446 | * then we don't need to reserve it again. However we still need |
1466 | * to reserve metadata for every block we're going to write. | 1447 | * to reserve metadata for every block we're going to write. |
1467 | */ | 1448 | */ |
1468 | if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { | 1449 | if (EXT4_SB(inode->i_sb)->s_cluster_ratio <= 1 || |
1450 | !ext4_find_delalloc_cluster(inode, map->m_lblk)) { | ||
1469 | ret = ext4_da_reserve_space(inode, iblock); | 1451 | ret = ext4_da_reserve_space(inode, iblock); |
1470 | if (ret) { | 1452 | if (ret) { |
1471 | /* not enough space to reserve */ | 1453 | /* not enough space to reserve */ |
@@ -1481,11 +1463,6 @@ add_delayed: | |||
1481 | goto out_unlock; | 1463 | goto out_unlock; |
1482 | } | 1464 | } |
1483 | 1465 | ||
1484 | /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served | ||
1485 | * and it should not appear on the bh->b_state. | ||
1486 | */ | ||
1487 | map->m_flags &= ~EXT4_MAP_FROM_CLUSTER; | ||
1488 | |||
1489 | map_bh(bh, inode->i_sb, invalid_block); | 1466 | map_bh(bh, inode->i_sb, invalid_block); |
1490 | set_buffer_new(bh); | 1467 | set_buffer_new(bh); |
1491 | set_buffer_delay(bh); | 1468 | set_buffer_delay(bh); |
@@ -3643,7 +3620,7 @@ out_stop: | |||
3643 | * If this was a simple ftruncate() and the file will remain alive, | 3620 | * If this was a simple ftruncate() and the file will remain alive, |
3644 | * then we need to clear up the orphan record which we created above. | 3621 | * then we need to clear up the orphan record which we created above. |
3645 | * However, if this was a real unlink then we were called by | 3622 | * However, if this was a real unlink then we were called by |
3646 | * ext4_delete_inode(), and we allow that function to clean up the | 3623 | * ext4_evict_inode(), and we allow that function to clean up the |
3647 | * orphan info for us. | 3624 | * orphan info for us. |
3648 | */ | 3625 | */ |
3649 | if (inode->i_nlink) | 3626 | if (inode->i_nlink) |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index bfda18a15592..f58a0d106726 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -78,8 +78,6 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2) | |||
78 | memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); | 78 | memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); |
79 | ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); | 79 | ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); |
80 | ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); | 80 | ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); |
81 | ext4_es_lru_del(inode1); | ||
82 | ext4_es_lru_del(inode2); | ||
83 | 81 | ||
84 | isize = i_size_read(inode1); | 82 | isize = i_size_read(inode1); |
85 | i_size_write(inode1, i_size_read(inode2)); | 83 | i_size_write(inode1, i_size_read(inode2)); |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index dbfe15c2533c..8d1e60214ef0 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -2358,7 +2358,7 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) | |||
2358 | if (sbi->s_group_info) { | 2358 | if (sbi->s_group_info) { |
2359 | memcpy(new_groupinfo, sbi->s_group_info, | 2359 | memcpy(new_groupinfo, sbi->s_group_info, |
2360 | sbi->s_group_info_size * sizeof(*sbi->s_group_info)); | 2360 | sbi->s_group_info_size * sizeof(*sbi->s_group_info)); |
2361 | ext4_kvfree(sbi->s_group_info); | 2361 | kvfree(sbi->s_group_info); |
2362 | } | 2362 | } |
2363 | sbi->s_group_info = new_groupinfo; | 2363 | sbi->s_group_info = new_groupinfo; |
2364 | sbi->s_group_info_size = size / sizeof(*sbi->s_group_info); | 2364 | sbi->s_group_info_size = size / sizeof(*sbi->s_group_info); |
@@ -2385,7 +2385,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2385 | if (group % EXT4_DESC_PER_BLOCK(sb) == 0) { | 2385 | if (group % EXT4_DESC_PER_BLOCK(sb) == 0) { |
2386 | metalen = sizeof(*meta_group_info) << | 2386 | metalen = sizeof(*meta_group_info) << |
2387 | EXT4_DESC_PER_BLOCK_BITS(sb); | 2387 | EXT4_DESC_PER_BLOCK_BITS(sb); |
2388 | meta_group_info = kmalloc(metalen, GFP_KERNEL); | 2388 | meta_group_info = kmalloc(metalen, GFP_NOFS); |
2389 | if (meta_group_info == NULL) { | 2389 | if (meta_group_info == NULL) { |
2390 | ext4_msg(sb, KERN_ERR, "can't allocate mem " | 2390 | ext4_msg(sb, KERN_ERR, "can't allocate mem " |
2391 | "for a buddy group"); | 2391 | "for a buddy group"); |
@@ -2399,7 +2399,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2399 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; | 2399 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; |
2400 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); | 2400 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); |
2401 | 2401 | ||
2402 | meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_KERNEL); | 2402 | meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS); |
2403 | if (meta_group_info[i] == NULL) { | 2403 | if (meta_group_info[i] == NULL) { |
2404 | ext4_msg(sb, KERN_ERR, "can't allocate buddy mem"); | 2404 | ext4_msg(sb, KERN_ERR, "can't allocate buddy mem"); |
2405 | goto exit_group_info; | 2405 | goto exit_group_info; |
@@ -2428,7 +2428,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2428 | { | 2428 | { |
2429 | struct buffer_head *bh; | 2429 | struct buffer_head *bh; |
2430 | meta_group_info[i]->bb_bitmap = | 2430 | meta_group_info[i]->bb_bitmap = |
2431 | kmalloc(sb->s_blocksize, GFP_KERNEL); | 2431 | kmalloc(sb->s_blocksize, GFP_NOFS); |
2432 | BUG_ON(meta_group_info[i]->bb_bitmap == NULL); | 2432 | BUG_ON(meta_group_info[i]->bb_bitmap == NULL); |
2433 | bh = ext4_read_block_bitmap(sb, group); | 2433 | bh = ext4_read_block_bitmap(sb, group); |
2434 | BUG_ON(bh == NULL); | 2434 | BUG_ON(bh == NULL); |
@@ -2495,7 +2495,7 @@ err_freebuddy: | |||
2495 | kfree(sbi->s_group_info[i]); | 2495 | kfree(sbi->s_group_info[i]); |
2496 | iput(sbi->s_buddy_cache); | 2496 | iput(sbi->s_buddy_cache); |
2497 | err_freesgi: | 2497 | err_freesgi: |
2498 | ext4_kvfree(sbi->s_group_info); | 2498 | kvfree(sbi->s_group_info); |
2499 | return -ENOMEM; | 2499 | return -ENOMEM; |
2500 | } | 2500 | } |
2501 | 2501 | ||
@@ -2708,12 +2708,11 @@ int ext4_mb_release(struct super_block *sb) | |||
2708 | EXT4_DESC_PER_BLOCK_BITS(sb); | 2708 | EXT4_DESC_PER_BLOCK_BITS(sb); |
2709 | for (i = 0; i < num_meta_group_infos; i++) | 2709 | for (i = 0; i < num_meta_group_infos; i++) |
2710 | kfree(sbi->s_group_info[i]); | 2710 | kfree(sbi->s_group_info[i]); |
2711 | ext4_kvfree(sbi->s_group_info); | 2711 | kvfree(sbi->s_group_info); |
2712 | } | 2712 | } |
2713 | kfree(sbi->s_mb_offsets); | 2713 | kfree(sbi->s_mb_offsets); |
2714 | kfree(sbi->s_mb_maxs); | 2714 | kfree(sbi->s_mb_maxs); |
2715 | if (sbi->s_buddy_cache) | 2715 | iput(sbi->s_buddy_cache); |
2716 | iput(sbi->s_buddy_cache); | ||
2717 | if (sbi->s_mb_stats) { | 2716 | if (sbi->s_mb_stats) { |
2718 | ext4_msg(sb, KERN_INFO, | 2717 | ext4_msg(sb, KERN_INFO, |
2719 | "mballoc: %u blocks %u reqs (%u success)", | 2718 | "mballoc: %u blocks %u reqs (%u success)", |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index a432634f2e6a..3cb267aee802 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -592,7 +592,7 @@ err_out: | |||
592 | 592 | ||
593 | /* | 593 | /* |
594 | * set the i_blocks count to zero | 594 | * set the i_blocks count to zero |
595 | * so that the ext4_delete_inode does the | 595 | * so that the ext4_evict_inode() does the |
596 | * right job | 596 | * right job |
597 | * | 597 | * |
598 | * We don't need to take the i_lock because | 598 | * We don't need to take the i_lock because |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 9f2311bc9c4f..503ea15dc5db 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -273,6 +273,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
273 | int replaced_count = 0; | 273 | int replaced_count = 0; |
274 | int from = data_offset_in_page << orig_inode->i_blkbits; | 274 | int from = data_offset_in_page << orig_inode->i_blkbits; |
275 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; | 275 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; |
276 | struct super_block *sb = orig_inode->i_sb; | ||
276 | 277 | ||
277 | /* | 278 | /* |
278 | * It needs twice the amount of ordinary journal buffers because | 279 | * It needs twice the amount of ordinary journal buffers because |
@@ -405,10 +406,13 @@ unlock_pages: | |||
405 | page_cache_release(pagep[1]); | 406 | page_cache_release(pagep[1]); |
406 | stop_journal: | 407 | stop_journal: |
407 | ext4_journal_stop(handle); | 408 | ext4_journal_stop(handle); |
409 | if (*err == -ENOSPC && | ||
410 | ext4_should_retry_alloc(sb, &retries)) | ||
411 | goto again; | ||
408 | /* Buffer was busy because probably is pinned to journal transaction, | 412 | /* Buffer was busy because probably is pinned to journal transaction, |
409 | * force transaction commit may help to free it. */ | 413 | * force transaction commit may help to free it. */ |
410 | if (*err == -EBUSY && ext4_should_retry_alloc(orig_inode->i_sb, | 414 | if (*err == -EBUSY && retries++ < 4 && EXT4_SB(sb)->s_journal && |
411 | &retries)) | 415 | jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal)) |
412 | goto again; | 416 | goto again; |
413 | return replaced_count; | 417 | return replaced_count; |
414 | 418 | ||
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 426211882f72..2291923dae4e 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -2814,7 +2814,6 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) | |||
2814 | ext4_orphan_add(handle, inode); | 2814 | ext4_orphan_add(handle, inode); |
2815 | inode->i_ctime = ext4_current_time(inode); | 2815 | inode->i_ctime = ext4_current_time(inode); |
2816 | ext4_mark_inode_dirty(handle, inode); | 2816 | ext4_mark_inode_dirty(handle, inode); |
2817 | retval = 0; | ||
2818 | 2817 | ||
2819 | end_unlink: | 2818 | end_unlink: |
2820 | brelse(bh); | 2819 | brelse(bh); |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index ca4588388fc3..bf76f405a5f9 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -856,7 +856,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
856 | n_group_desc[gdb_num] = gdb_bh; | 856 | n_group_desc[gdb_num] = gdb_bh; |
857 | EXT4_SB(sb)->s_group_desc = n_group_desc; | 857 | EXT4_SB(sb)->s_group_desc = n_group_desc; |
858 | EXT4_SB(sb)->s_gdb_count++; | 858 | EXT4_SB(sb)->s_gdb_count++; |
859 | ext4_kvfree(o_group_desc); | 859 | kvfree(o_group_desc); |
860 | 860 | ||
861 | le16_add_cpu(&es->s_reserved_gdt_blocks, -1); | 861 | le16_add_cpu(&es->s_reserved_gdt_blocks, -1); |
862 | err = ext4_handle_dirty_super(handle, sb); | 862 | err = ext4_handle_dirty_super(handle, sb); |
@@ -866,7 +866,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
866 | return err; | 866 | return err; |
867 | 867 | ||
868 | exit_inode: | 868 | exit_inode: |
869 | ext4_kvfree(n_group_desc); | 869 | kvfree(n_group_desc); |
870 | brelse(iloc.bh); | 870 | brelse(iloc.bh); |
871 | exit_dind: | 871 | exit_dind: |
872 | brelse(dind); | 872 | brelse(dind); |
@@ -909,7 +909,7 @@ static int add_new_gdb_meta_bg(struct super_block *sb, | |||
909 | n_group_desc[gdb_num] = gdb_bh; | 909 | n_group_desc[gdb_num] = gdb_bh; |
910 | EXT4_SB(sb)->s_group_desc = n_group_desc; | 910 | EXT4_SB(sb)->s_group_desc = n_group_desc; |
911 | EXT4_SB(sb)->s_gdb_count++; | 911 | EXT4_SB(sb)->s_gdb_count++; |
912 | ext4_kvfree(o_group_desc); | 912 | kvfree(o_group_desc); |
913 | BUFFER_TRACE(gdb_bh, "get_write_access"); | 913 | BUFFER_TRACE(gdb_bh, "get_write_access"); |
914 | err = ext4_journal_get_write_access(handle, gdb_bh); | 914 | err = ext4_journal_get_write_access(handle, gdb_bh); |
915 | if (unlikely(err)) | 915 | if (unlikely(err)) |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 63e802b8ec68..43c92b1685cb 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -176,15 +176,6 @@ void *ext4_kvzalloc(size_t size, gfp_t flags) | |||
176 | return ret; | 176 | return ret; |
177 | } | 177 | } |
178 | 178 | ||
179 | void ext4_kvfree(void *ptr) | ||
180 | { | ||
181 | if (is_vmalloc_addr(ptr)) | ||
182 | vfree(ptr); | ||
183 | else | ||
184 | kfree(ptr); | ||
185 | |||
186 | } | ||
187 | |||
188 | ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, | 179 | ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, |
189 | struct ext4_group_desc *bg) | 180 | struct ext4_group_desc *bg) |
190 | { | 181 | { |
@@ -811,8 +802,8 @@ static void ext4_put_super(struct super_block *sb) | |||
811 | 802 | ||
812 | for (i = 0; i < sbi->s_gdb_count; i++) | 803 | for (i = 0; i < sbi->s_gdb_count; i++) |
813 | brelse(sbi->s_group_desc[i]); | 804 | brelse(sbi->s_group_desc[i]); |
814 | ext4_kvfree(sbi->s_group_desc); | 805 | kvfree(sbi->s_group_desc); |
815 | ext4_kvfree(sbi->s_flex_groups); | 806 | kvfree(sbi->s_flex_groups); |
816 | percpu_counter_destroy(&sbi->s_freeclusters_counter); | 807 | percpu_counter_destroy(&sbi->s_freeclusters_counter); |
817 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 808 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
818 | percpu_counter_destroy(&sbi->s_dirs_counter); | 809 | percpu_counter_destroy(&sbi->s_dirs_counter); |
@@ -880,10 +871,10 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
880 | spin_lock_init(&ei->i_prealloc_lock); | 871 | spin_lock_init(&ei->i_prealloc_lock); |
881 | ext4_es_init_tree(&ei->i_es_tree); | 872 | ext4_es_init_tree(&ei->i_es_tree); |
882 | rwlock_init(&ei->i_es_lock); | 873 | rwlock_init(&ei->i_es_lock); |
883 | INIT_LIST_HEAD(&ei->i_es_lru); | 874 | INIT_LIST_HEAD(&ei->i_es_list); |
884 | ei->i_es_all_nr = 0; | 875 | ei->i_es_all_nr = 0; |
885 | ei->i_es_lru_nr = 0; | 876 | ei->i_es_shk_nr = 0; |
886 | ei->i_touch_when = 0; | 877 | ei->i_es_shrink_lblk = 0; |
887 | ei->i_reserved_data_blocks = 0; | 878 | ei->i_reserved_data_blocks = 0; |
888 | ei->i_reserved_meta_blocks = 0; | 879 | ei->i_reserved_meta_blocks = 0; |
889 | ei->i_allocated_meta_blocks = 0; | 880 | ei->i_allocated_meta_blocks = 0; |
@@ -973,7 +964,6 @@ void ext4_clear_inode(struct inode *inode) | |||
973 | dquot_drop(inode); | 964 | dquot_drop(inode); |
974 | ext4_discard_preallocations(inode); | 965 | ext4_discard_preallocations(inode); |
975 | ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); | 966 | ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); |
976 | ext4_es_lru_del(inode); | ||
977 | if (EXT4_I(inode)->jinode) { | 967 | if (EXT4_I(inode)->jinode) { |
978 | jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), | 968 | jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), |
979 | EXT4_I(inode)->jinode); | 969 | EXT4_I(inode)->jinode); |
@@ -1153,7 +1143,7 @@ enum { | |||
1153 | Opt_inode_readahead_blks, Opt_journal_ioprio, | 1143 | Opt_inode_readahead_blks, Opt_journal_ioprio, |
1154 | Opt_dioread_nolock, Opt_dioread_lock, | 1144 | Opt_dioread_nolock, Opt_dioread_lock, |
1155 | Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, | 1145 | Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, |
1156 | Opt_max_dir_size_kb, | 1146 | Opt_max_dir_size_kb, Opt_nojournal_checksum, |
1157 | }; | 1147 | }; |
1158 | 1148 | ||
1159 | static const match_table_t tokens = { | 1149 | static const match_table_t tokens = { |
@@ -1187,6 +1177,7 @@ static const match_table_t tokens = { | |||
1187 | {Opt_journal_dev, "journal_dev=%u"}, | 1177 | {Opt_journal_dev, "journal_dev=%u"}, |
1188 | {Opt_journal_path, "journal_path=%s"}, | 1178 | {Opt_journal_path, "journal_path=%s"}, |
1189 | {Opt_journal_checksum, "journal_checksum"}, | 1179 | {Opt_journal_checksum, "journal_checksum"}, |
1180 | {Opt_nojournal_checksum, "nojournal_checksum"}, | ||
1190 | {Opt_journal_async_commit, "journal_async_commit"}, | 1181 | {Opt_journal_async_commit, "journal_async_commit"}, |
1191 | {Opt_abort, "abort"}, | 1182 | {Opt_abort, "abort"}, |
1192 | {Opt_data_journal, "data=journal"}, | 1183 | {Opt_data_journal, "data=journal"}, |
@@ -1368,6 +1359,8 @@ static const struct mount_opts { | |||
1368 | MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, | 1359 | MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, |
1369 | {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, | 1360 | {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, |
1370 | MOPT_EXT4_ONLY | MOPT_CLEAR}, | 1361 | MOPT_EXT4_ONLY | MOPT_CLEAR}, |
1362 | {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, | ||
1363 | MOPT_EXT4_ONLY | MOPT_CLEAR}, | ||
1371 | {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, | 1364 | {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, |
1372 | MOPT_EXT4_ONLY | MOPT_SET}, | 1365 | MOPT_EXT4_ONLY | MOPT_SET}, |
1373 | {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | | 1366 | {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | |
@@ -1709,6 +1702,12 @@ static int parse_options(char *options, struct super_block *sb, | |||
1709 | return 0; | 1702 | return 0; |
1710 | } | 1703 | } |
1711 | } | 1704 | } |
1705 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA && | ||
1706 | test_opt(sb, JOURNAL_ASYNC_COMMIT)) { | ||
1707 | ext4_msg(sb, KERN_ERR, "can't mount with journal_async_commit " | ||
1708 | "in data=ordered mode"); | ||
1709 | return 0; | ||
1710 | } | ||
1712 | return 1; | 1711 | return 1; |
1713 | } | 1712 | } |
1714 | 1713 | ||
@@ -1946,7 +1945,7 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) | |||
1946 | memcpy(new_groups, sbi->s_flex_groups, | 1945 | memcpy(new_groups, sbi->s_flex_groups, |
1947 | (sbi->s_flex_groups_allocated * | 1946 | (sbi->s_flex_groups_allocated * |
1948 | sizeof(struct flex_groups))); | 1947 | sizeof(struct flex_groups))); |
1949 | ext4_kvfree(sbi->s_flex_groups); | 1948 | kvfree(sbi->s_flex_groups); |
1950 | } | 1949 | } |
1951 | sbi->s_flex_groups = new_groups; | 1950 | sbi->s_flex_groups = new_groups; |
1952 | sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); | 1951 | sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); |
@@ -3317,7 +3316,7 @@ int ext4_calculate_overhead(struct super_block *sb) | |||
3317 | struct ext4_super_block *es = sbi->s_es; | 3316 | struct ext4_super_block *es = sbi->s_es; |
3318 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); | 3317 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); |
3319 | ext4_fsblk_t overhead = 0; | 3318 | ext4_fsblk_t overhead = 0; |
3320 | char *buf = (char *) get_zeroed_page(GFP_KERNEL); | 3319 | char *buf = (char *) get_zeroed_page(GFP_NOFS); |
3321 | 3320 | ||
3322 | if (!buf) | 3321 | if (!buf) |
3323 | return -ENOMEM; | 3322 | return -ENOMEM; |
@@ -3345,8 +3344,8 @@ int ext4_calculate_overhead(struct super_block *sb) | |||
3345 | memset(buf, 0, PAGE_SIZE); | 3344 | memset(buf, 0, PAGE_SIZE); |
3346 | cond_resched(); | 3345 | cond_resched(); |
3347 | } | 3346 | } |
3348 | /* Add the journal blocks as well */ | 3347 | /* Add the internal journal blocks as well */ |
3349 | if (sbi->s_journal) | 3348 | if (sbi->s_journal && !sbi->journal_bdev) |
3350 | overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen); | 3349 | overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen); |
3351 | 3350 | ||
3352 | sbi->s_overhead = overhead; | 3351 | sbi->s_overhead = overhead; |
@@ -4232,7 +4231,7 @@ failed_mount7: | |||
4232 | failed_mount6: | 4231 | failed_mount6: |
4233 | ext4_mb_release(sb); | 4232 | ext4_mb_release(sb); |
4234 | if (sbi->s_flex_groups) | 4233 | if (sbi->s_flex_groups) |
4235 | ext4_kvfree(sbi->s_flex_groups); | 4234 | kvfree(sbi->s_flex_groups); |
4236 | percpu_counter_destroy(&sbi->s_freeclusters_counter); | 4235 | percpu_counter_destroy(&sbi->s_freeclusters_counter); |
4237 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 4236 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
4238 | percpu_counter_destroy(&sbi->s_dirs_counter); | 4237 | percpu_counter_destroy(&sbi->s_dirs_counter); |
@@ -4261,7 +4260,7 @@ failed_mount3: | |||
4261 | failed_mount2: | 4260 | failed_mount2: |
4262 | for (i = 0; i < db_count; i++) | 4261 | for (i = 0; i < db_count; i++) |
4263 | brelse(sbi->s_group_desc[i]); | 4262 | brelse(sbi->s_group_desc[i]); |
4264 | ext4_kvfree(sbi->s_group_desc); | 4263 | kvfree(sbi->s_group_desc); |
4265 | failed_mount: | 4264 | failed_mount: |
4266 | if (sbi->s_chksum_driver) | 4265 | if (sbi->s_chksum_driver) |
4267 | crypto_free_shash(sbi->s_chksum_driver); | 4266 | crypto_free_shash(sbi->s_chksum_driver); |
@@ -4862,6 +4861,14 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4862 | goto restore_opts; | 4861 | goto restore_opts; |
4863 | } | 4862 | } |
4864 | 4863 | ||
4864 | if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ | ||
4865 | test_opt(sb, JOURNAL_CHECKSUM)) { | ||
4866 | ext4_msg(sb, KERN_ERR, "changing journal_checksum " | ||
4867 | "during remount not supported"); | ||
4868 | err = -EINVAL; | ||
4869 | goto restore_opts; | ||
4870 | } | ||
4871 | |||
4865 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | 4872 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { |
4866 | if (test_opt2(sb, EXPLICIT_DELALLOC)) { | 4873 | if (test_opt2(sb, EXPLICIT_DELALLOC)) { |
4867 | ext4_msg(sb, KERN_ERR, "can't mount with " | 4874 | ext4_msg(sb, KERN_ERR, "can't mount with " |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 1df94fabe4eb..b96bd8076b70 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -1714,8 +1714,7 @@ int jbd2_journal_destroy(journal_t *journal) | |||
1714 | 1714 | ||
1715 | if (journal->j_proc_entry) | 1715 | if (journal->j_proc_entry) |
1716 | jbd2_stats_proc_exit(journal); | 1716 | jbd2_stats_proc_exit(journal); |
1717 | if (journal->j_inode) | 1717 | iput(journal->j_inode); |
1718 | iput(journal->j_inode); | ||
1719 | if (journal->j_revoke) | 1718 | if (journal->j_revoke) |
1720 | jbd2_journal_destroy_revoke(journal); | 1719 | jbd2_journal_destroy_revoke(journal); |
1721 | if (journal->j_chksum_driver) | 1720 | if (journal->j_chksum_driver) |
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index ec881b312700..2f389ce5023c 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c | |||
@@ -61,6 +61,11 @@ module_param(mem_size, ulong, 0400); | |||
61 | MODULE_PARM_DESC(mem_size, | 61 | MODULE_PARM_DESC(mem_size, |
62 | "size of reserved RAM used to store oops/panic logs"); | 62 | "size of reserved RAM used to store oops/panic logs"); |
63 | 63 | ||
64 | static unsigned int mem_type; | ||
65 | module_param(mem_type, uint, 0600); | ||
66 | MODULE_PARM_DESC(mem_type, | ||
67 | "set to 1 to try to use unbuffered memory (default 0)"); | ||
68 | |||
64 | static int dump_oops = 1; | 69 | static int dump_oops = 1; |
65 | module_param(dump_oops, int, 0600); | 70 | module_param(dump_oops, int, 0600); |
66 | MODULE_PARM_DESC(dump_oops, | 71 | MODULE_PARM_DESC(dump_oops, |
@@ -79,6 +84,7 @@ struct ramoops_context { | |||
79 | struct persistent_ram_zone *fprz; | 84 | struct persistent_ram_zone *fprz; |
80 | phys_addr_t phys_addr; | 85 | phys_addr_t phys_addr; |
81 | unsigned long size; | 86 | unsigned long size; |
87 | unsigned int memtype; | ||
82 | size_t record_size; | 88 | size_t record_size; |
83 | size_t console_size; | 89 | size_t console_size; |
84 | size_t ftrace_size; | 90 | size_t ftrace_size; |
@@ -366,7 +372,8 @@ static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt, | |||
366 | size_t sz = cxt->record_size; | 372 | size_t sz = cxt->record_size; |
367 | 373 | ||
368 | cxt->przs[i] = persistent_ram_new(*paddr, sz, 0, | 374 | cxt->przs[i] = persistent_ram_new(*paddr, sz, 0, |
369 | &cxt->ecc_info); | 375 | &cxt->ecc_info, |
376 | cxt->memtype); | ||
370 | if (IS_ERR(cxt->przs[i])) { | 377 | if (IS_ERR(cxt->przs[i])) { |
371 | err = PTR_ERR(cxt->przs[i]); | 378 | err = PTR_ERR(cxt->przs[i]); |
372 | dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n", | 379 | dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n", |
@@ -396,7 +403,7 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt, | |||
396 | return -ENOMEM; | 403 | return -ENOMEM; |
397 | } | 404 | } |
398 | 405 | ||
399 | *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info); | 406 | *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info, cxt->memtype); |
400 | if (IS_ERR(*prz)) { | 407 | if (IS_ERR(*prz)) { |
401 | int err = PTR_ERR(*prz); | 408 | int err = PTR_ERR(*prz); |
402 | 409 | ||
@@ -443,6 +450,7 @@ static int ramoops_probe(struct platform_device *pdev) | |||
443 | 450 | ||
444 | cxt->size = pdata->mem_size; | 451 | cxt->size = pdata->mem_size; |
445 | cxt->phys_addr = pdata->mem_address; | 452 | cxt->phys_addr = pdata->mem_address; |
453 | cxt->memtype = pdata->mem_type; | ||
446 | cxt->record_size = pdata->record_size; | 454 | cxt->record_size = pdata->record_size; |
447 | cxt->console_size = pdata->console_size; | 455 | cxt->console_size = pdata->console_size; |
448 | cxt->ftrace_size = pdata->ftrace_size; | 456 | cxt->ftrace_size = pdata->ftrace_size; |
@@ -572,6 +580,7 @@ static void ramoops_register_dummy(void) | |||
572 | 580 | ||
573 | dummy_data->mem_size = mem_size; | 581 | dummy_data->mem_size = mem_size; |
574 | dummy_data->mem_address = mem_address; | 582 | dummy_data->mem_address = mem_address; |
583 | dummy_data->mem_type = 0; | ||
575 | dummy_data->record_size = record_size; | 584 | dummy_data->record_size = record_size; |
576 | dummy_data->console_size = ramoops_console_size; | 585 | dummy_data->console_size = ramoops_console_size; |
577 | dummy_data->ftrace_size = ramoops_ftrace_size; | 586 | dummy_data->ftrace_size = ramoops_ftrace_size; |
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 9d7b9a83699e..76c3f80efdfa 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c | |||
@@ -380,7 +380,8 @@ void persistent_ram_zap(struct persistent_ram_zone *prz) | |||
380 | persistent_ram_update_header_ecc(prz); | 380 | persistent_ram_update_header_ecc(prz); |
381 | } | 381 | } |
382 | 382 | ||
383 | static void *persistent_ram_vmap(phys_addr_t start, size_t size) | 383 | static void *persistent_ram_vmap(phys_addr_t start, size_t size, |
384 | unsigned int memtype) | ||
384 | { | 385 | { |
385 | struct page **pages; | 386 | struct page **pages; |
386 | phys_addr_t page_start; | 387 | phys_addr_t page_start; |
@@ -392,7 +393,10 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size) | |||
392 | page_start = start - offset_in_page(start); | 393 | page_start = start - offset_in_page(start); |
393 | page_count = DIV_ROUND_UP(size + offset_in_page(start), PAGE_SIZE); | 394 | page_count = DIV_ROUND_UP(size + offset_in_page(start), PAGE_SIZE); |
394 | 395 | ||
395 | prot = pgprot_noncached(PAGE_KERNEL); | 396 | if (memtype) |
397 | prot = pgprot_noncached(PAGE_KERNEL); | ||
398 | else | ||
399 | prot = pgprot_writecombine(PAGE_KERNEL); | ||
396 | 400 | ||
397 | pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL); | 401 | pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL); |
398 | if (!pages) { | 402 | if (!pages) { |
@@ -411,8 +415,11 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size) | |||
411 | return vaddr; | 415 | return vaddr; |
412 | } | 416 | } |
413 | 417 | ||
414 | static void *persistent_ram_iomap(phys_addr_t start, size_t size) | 418 | static void *persistent_ram_iomap(phys_addr_t start, size_t size, |
419 | unsigned int memtype) | ||
415 | { | 420 | { |
421 | void *va; | ||
422 | |||
416 | if (!request_mem_region(start, size, "persistent_ram")) { | 423 | if (!request_mem_region(start, size, "persistent_ram")) { |
417 | pr_err("request mem region (0x%llx@0x%llx) failed\n", | 424 | pr_err("request mem region (0x%llx@0x%llx) failed\n", |
418 | (unsigned long long)size, (unsigned long long)start); | 425 | (unsigned long long)size, (unsigned long long)start); |
@@ -422,19 +429,24 @@ static void *persistent_ram_iomap(phys_addr_t start, size_t size) | |||
422 | buffer_start_add = buffer_start_add_locked; | 429 | buffer_start_add = buffer_start_add_locked; |
423 | buffer_size_add = buffer_size_add_locked; | 430 | buffer_size_add = buffer_size_add_locked; |
424 | 431 | ||
425 | return ioremap(start, size); | 432 | if (memtype) |
433 | va = ioremap(start, size); | ||
434 | else | ||
435 | va = ioremap_wc(start, size); | ||
436 | |||
437 | return va; | ||
426 | } | 438 | } |
427 | 439 | ||
428 | static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size, | 440 | static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size, |
429 | struct persistent_ram_zone *prz) | 441 | struct persistent_ram_zone *prz, int memtype) |
430 | { | 442 | { |
431 | prz->paddr = start; | 443 | prz->paddr = start; |
432 | prz->size = size; | 444 | prz->size = size; |
433 | 445 | ||
434 | if (pfn_valid(start >> PAGE_SHIFT)) | 446 | if (pfn_valid(start >> PAGE_SHIFT)) |
435 | prz->vaddr = persistent_ram_vmap(start, size); | 447 | prz->vaddr = persistent_ram_vmap(start, size, memtype); |
436 | else | 448 | else |
437 | prz->vaddr = persistent_ram_iomap(start, size); | 449 | prz->vaddr = persistent_ram_iomap(start, size, memtype); |
438 | 450 | ||
439 | if (!prz->vaddr) { | 451 | if (!prz->vaddr) { |
440 | pr_err("%s: Failed to map 0x%llx pages at 0x%llx\n", __func__, | 452 | pr_err("%s: Failed to map 0x%llx pages at 0x%llx\n", __func__, |
@@ -500,7 +512,8 @@ void persistent_ram_free(struct persistent_ram_zone *prz) | |||
500 | } | 512 | } |
501 | 513 | ||
502 | struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, | 514 | struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, |
503 | u32 sig, struct persistent_ram_ecc_info *ecc_info) | 515 | u32 sig, struct persistent_ram_ecc_info *ecc_info, |
516 | unsigned int memtype) | ||
504 | { | 517 | { |
505 | struct persistent_ram_zone *prz; | 518 | struct persistent_ram_zone *prz; |
506 | int ret = -ENOMEM; | 519 | int ret = -ENOMEM; |
@@ -511,7 +524,7 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, | |||
511 | goto err; | 524 | goto err; |
512 | } | 525 | } |
513 | 526 | ||
514 | ret = persistent_ram_buffer_map(start, size, prz); | 527 | ret = persistent_ram_buffer_map(start, size, prz, memtype); |
515 | if (ret) | 528 | if (ret) |
516 | goto err; | 529 | goto err; |
517 | 530 | ||
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index d571e173a990..9d6486d416a3 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -2772,7 +2772,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name, | |||
2772 | 2772 | ||
2773 | if (journal_init_dev(sb, journal, j_dev_name) != 0) { | 2773 | if (journal_init_dev(sb, journal, j_dev_name) != 0) { |
2774 | reiserfs_warning(sb, "sh-462", | 2774 | reiserfs_warning(sb, "sh-462", |
2775 | "unable to initialize jornal device"); | 2775 | "unable to initialize journal device"); |
2776 | goto free_and_return; | 2776 | goto free_and_return; |
2777 | } | 2777 | } |
2778 | 2778 | ||
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index b5b593c45270..538519ee37d9 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
@@ -262,6 +262,7 @@ static int write_begin_slow(struct address_space *mapping, | |||
262 | if (err) { | 262 | if (err) { |
263 | unlock_page(page); | 263 | unlock_page(page); |
264 | page_cache_release(page); | 264 | page_cache_release(page); |
265 | ubifs_release_budget(c, &req); | ||
265 | return err; | 266 | return err; |
266 | } | 267 | } |
267 | } | 268 | } |
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index fb166e204441..f6ac3f29323c 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c | |||
@@ -571,7 +571,11 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, | |||
571 | 571 | ||
572 | aligned_dlen = ALIGN(dlen, 8); | 572 | aligned_dlen = ALIGN(dlen, 8); |
573 | aligned_ilen = ALIGN(ilen, 8); | 573 | aligned_ilen = ALIGN(ilen, 8); |
574 | |||
574 | len = aligned_dlen + aligned_ilen + UBIFS_INO_NODE_SZ; | 575 | len = aligned_dlen + aligned_ilen + UBIFS_INO_NODE_SZ; |
576 | /* Make sure to also account for extended attributes */ | ||
577 | len += host_ui->data_len; | ||
578 | |||
575 | dent = kmalloc(len, GFP_NOFS); | 579 | dent = kmalloc(len, GFP_NOFS); |
576 | if (!dent) | 580 | if (!dent) |
577 | return -ENOMEM; | 581 | return -ENOMEM; |
@@ -648,7 +652,8 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, | |||
648 | 652 | ||
649 | ino_key_init(c, &ino_key, dir->i_ino); | 653 | ino_key_init(c, &ino_key, dir->i_ino); |
650 | ino_offs += aligned_ilen; | 654 | ino_offs += aligned_ilen; |
651 | err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs, UBIFS_INO_NODE_SZ); | 655 | err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs, |
656 | UBIFS_INO_NODE_SZ + host_ui->data_len); | ||
652 | if (err) | 657 | if (err) |
653 | goto out_ro; | 658 | goto out_ro; |
654 | 659 | ||
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h deleted file mode 100644 index 6e247a99f5db..000000000000 --- a/fs/xfs/libxfs/xfs_ag.h +++ /dev/null | |||
@@ -1,281 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_AG_H__ | ||
19 | #define __XFS_AG_H__ | ||
20 | |||
21 | /* | ||
22 | * Allocation group header | ||
23 | * This is divided into three structures, placed in sequential 512-byte | ||
24 | * buffers after a copy of the superblock (also in a 512-byte buffer). | ||
25 | */ | ||
26 | |||
27 | struct xfs_buf; | ||
28 | struct xfs_mount; | ||
29 | struct xfs_trans; | ||
30 | |||
31 | #define XFS_AGF_MAGIC 0x58414746 /* 'XAGF' */ | ||
32 | #define XFS_AGI_MAGIC 0x58414749 /* 'XAGI' */ | ||
33 | #define XFS_AGFL_MAGIC 0x5841464c /* 'XAFL' */ | ||
34 | #define XFS_AGF_VERSION 1 | ||
35 | #define XFS_AGI_VERSION 1 | ||
36 | |||
37 | #define XFS_AGF_GOOD_VERSION(v) ((v) == XFS_AGF_VERSION) | ||
38 | #define XFS_AGI_GOOD_VERSION(v) ((v) == XFS_AGI_VERSION) | ||
39 | |||
40 | /* | ||
41 | * Btree number 0 is bno, 1 is cnt. This value gives the size of the | ||
42 | * arrays below. | ||
43 | */ | ||
44 | #define XFS_BTNUM_AGF ((int)XFS_BTNUM_CNTi + 1) | ||
45 | |||
46 | /* | ||
47 | * The second word of agf_levels in the first a.g. overlaps the EFS | ||
48 | * superblock's magic number. Since the magic numbers valid for EFS | ||
49 | * are > 64k, our value cannot be confused for an EFS superblock's. | ||
50 | */ | ||
51 | |||
52 | typedef struct xfs_agf { | ||
53 | /* | ||
54 | * Common allocation group header information | ||
55 | */ | ||
56 | __be32 agf_magicnum; /* magic number == XFS_AGF_MAGIC */ | ||
57 | __be32 agf_versionnum; /* header version == XFS_AGF_VERSION */ | ||
58 | __be32 agf_seqno; /* sequence # starting from 0 */ | ||
59 | __be32 agf_length; /* size in blocks of a.g. */ | ||
60 | /* | ||
61 | * Freespace information | ||
62 | */ | ||
63 | __be32 agf_roots[XFS_BTNUM_AGF]; /* root blocks */ | ||
64 | __be32 agf_spare0; /* spare field */ | ||
65 | __be32 agf_levels[XFS_BTNUM_AGF]; /* btree levels */ | ||
66 | __be32 agf_spare1; /* spare field */ | ||
67 | |||
68 | __be32 agf_flfirst; /* first freelist block's index */ | ||
69 | __be32 agf_fllast; /* last freelist block's index */ | ||
70 | __be32 agf_flcount; /* count of blocks in freelist */ | ||
71 | __be32 agf_freeblks; /* total free blocks */ | ||
72 | |||
73 | __be32 agf_longest; /* longest free space */ | ||
74 | __be32 agf_btreeblks; /* # of blocks held in AGF btrees */ | ||
75 | uuid_t agf_uuid; /* uuid of filesystem */ | ||
76 | |||
77 | /* | ||
78 | * reserve some contiguous space for future logged fields before we add | ||
79 | * the unlogged fields. This makes the range logging via flags and | ||
80 | * structure offsets much simpler. | ||
81 | */ | ||
82 | __be64 agf_spare64[16]; | ||
83 | |||
84 | /* unlogged fields, written during buffer writeback. */ | ||
85 | __be64 agf_lsn; /* last write sequence */ | ||
86 | __be32 agf_crc; /* crc of agf sector */ | ||
87 | __be32 agf_spare2; | ||
88 | |||
89 | /* structure must be padded to 64 bit alignment */ | ||
90 | } xfs_agf_t; | ||
91 | |||
92 | #define XFS_AGF_CRC_OFF offsetof(struct xfs_agf, agf_crc) | ||
93 | |||
94 | #define XFS_AGF_MAGICNUM 0x00000001 | ||
95 | #define XFS_AGF_VERSIONNUM 0x00000002 | ||
96 | #define XFS_AGF_SEQNO 0x00000004 | ||
97 | #define XFS_AGF_LENGTH 0x00000008 | ||
98 | #define XFS_AGF_ROOTS 0x00000010 | ||
99 | #define XFS_AGF_LEVELS 0x00000020 | ||
100 | #define XFS_AGF_FLFIRST 0x00000040 | ||
101 | #define XFS_AGF_FLLAST 0x00000080 | ||
102 | #define XFS_AGF_FLCOUNT 0x00000100 | ||
103 | #define XFS_AGF_FREEBLKS 0x00000200 | ||
104 | #define XFS_AGF_LONGEST 0x00000400 | ||
105 | #define XFS_AGF_BTREEBLKS 0x00000800 | ||
106 | #define XFS_AGF_UUID 0x00001000 | ||
107 | #define XFS_AGF_NUM_BITS 13 | ||
108 | #define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) | ||
109 | |||
110 | #define XFS_AGF_FLAGS \ | ||
111 | { XFS_AGF_MAGICNUM, "MAGICNUM" }, \ | ||
112 | { XFS_AGF_VERSIONNUM, "VERSIONNUM" }, \ | ||
113 | { XFS_AGF_SEQNO, "SEQNO" }, \ | ||
114 | { XFS_AGF_LENGTH, "LENGTH" }, \ | ||
115 | { XFS_AGF_ROOTS, "ROOTS" }, \ | ||
116 | { XFS_AGF_LEVELS, "LEVELS" }, \ | ||
117 | { XFS_AGF_FLFIRST, "FLFIRST" }, \ | ||
118 | { XFS_AGF_FLLAST, "FLLAST" }, \ | ||
119 | { XFS_AGF_FLCOUNT, "FLCOUNT" }, \ | ||
120 | { XFS_AGF_FREEBLKS, "FREEBLKS" }, \ | ||
121 | { XFS_AGF_LONGEST, "LONGEST" }, \ | ||
122 | { XFS_AGF_BTREEBLKS, "BTREEBLKS" }, \ | ||
123 | { XFS_AGF_UUID, "UUID" } | ||
124 | |||
125 | /* disk block (xfs_daddr_t) in the AG */ | ||
126 | #define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) | ||
127 | #define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp)) | ||
128 | #define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)((bp)->b_addr)) | ||
129 | |||
130 | extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp, | ||
131 | xfs_agnumber_t agno, int flags, struct xfs_buf **bpp); | ||
132 | |||
133 | /* | ||
134 | * Size of the unlinked inode hash table in the agi. | ||
135 | */ | ||
136 | #define XFS_AGI_UNLINKED_BUCKETS 64 | ||
137 | |||
138 | typedef struct xfs_agi { | ||
139 | /* | ||
140 | * Common allocation group header information | ||
141 | */ | ||
142 | __be32 agi_magicnum; /* magic number == XFS_AGI_MAGIC */ | ||
143 | __be32 agi_versionnum; /* header version == XFS_AGI_VERSION */ | ||
144 | __be32 agi_seqno; /* sequence # starting from 0 */ | ||
145 | __be32 agi_length; /* size in blocks of a.g. */ | ||
146 | /* | ||
147 | * Inode information | ||
148 | * Inodes are mapped by interpreting the inode number, so no | ||
149 | * mapping data is needed here. | ||
150 | */ | ||
151 | __be32 agi_count; /* count of allocated inodes */ | ||
152 | __be32 agi_root; /* root of inode btree */ | ||
153 | __be32 agi_level; /* levels in inode btree */ | ||
154 | __be32 agi_freecount; /* number of free inodes */ | ||
155 | |||
156 | __be32 agi_newino; /* new inode just allocated */ | ||
157 | __be32 agi_dirino; /* last directory inode chunk */ | ||
158 | /* | ||
159 | * Hash table of inodes which have been unlinked but are | ||
160 | * still being referenced. | ||
161 | */ | ||
162 | __be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS]; | ||
163 | /* | ||
164 | * This marks the end of logging region 1 and start of logging region 2. | ||
165 | */ | ||
166 | uuid_t agi_uuid; /* uuid of filesystem */ | ||
167 | __be32 agi_crc; /* crc of agi sector */ | ||
168 | __be32 agi_pad32; | ||
169 | __be64 agi_lsn; /* last write sequence */ | ||
170 | |||
171 | __be32 agi_free_root; /* root of the free inode btree */ | ||
172 | __be32 agi_free_level;/* levels in free inode btree */ | ||
173 | |||
174 | /* structure must be padded to 64 bit alignment */ | ||
175 | } xfs_agi_t; | ||
176 | |||
177 | #define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc) | ||
178 | |||
179 | #define XFS_AGI_MAGICNUM (1 << 0) | ||
180 | #define XFS_AGI_VERSIONNUM (1 << 1) | ||
181 | #define XFS_AGI_SEQNO (1 << 2) | ||
182 | #define XFS_AGI_LENGTH (1 << 3) | ||
183 | #define XFS_AGI_COUNT (1 << 4) | ||
184 | #define XFS_AGI_ROOT (1 << 5) | ||
185 | #define XFS_AGI_LEVEL (1 << 6) | ||
186 | #define XFS_AGI_FREECOUNT (1 << 7) | ||
187 | #define XFS_AGI_NEWINO (1 << 8) | ||
188 | #define XFS_AGI_DIRINO (1 << 9) | ||
189 | #define XFS_AGI_UNLINKED (1 << 10) | ||
190 | #define XFS_AGI_NUM_BITS_R1 11 /* end of the 1st agi logging region */ | ||
191 | #define XFS_AGI_ALL_BITS_R1 ((1 << XFS_AGI_NUM_BITS_R1) - 1) | ||
192 | #define XFS_AGI_FREE_ROOT (1 << 11) | ||
193 | #define XFS_AGI_FREE_LEVEL (1 << 12) | ||
194 | #define XFS_AGI_NUM_BITS_R2 13 | ||
195 | |||
196 | /* disk block (xfs_daddr_t) in the AG */ | ||
197 | #define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) | ||
198 | #define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp)) | ||
199 | #define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)((bp)->b_addr)) | ||
200 | |||
201 | extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, | ||
202 | xfs_agnumber_t agno, struct xfs_buf **bpp); | ||
203 | |||
204 | /* | ||
205 | * The third a.g. block contains the a.g. freelist, an array | ||
206 | * of block pointers to blocks owned by the allocation btree code. | ||
207 | */ | ||
208 | #define XFS_AGFL_DADDR(mp) ((xfs_daddr_t)(3 << (mp)->m_sectbb_log)) | ||
209 | #define XFS_AGFL_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp)) | ||
210 | #define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)((bp)->b_addr)) | ||
211 | |||
212 | #define XFS_BUF_TO_AGFL_BNO(mp, bp) \ | ||
213 | (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \ | ||
214 | &(XFS_BUF_TO_AGFL(bp)->agfl_bno[0]) : \ | ||
215 | (__be32 *)(bp)->b_addr) | ||
216 | |||
217 | /* | ||
218 | * Size of the AGFL. For CRC-enabled filesystes we steal a couple of | ||
219 | * slots in the beginning of the block for a proper header with the | ||
220 | * location information and CRC. | ||
221 | */ | ||
222 | #define XFS_AGFL_SIZE(mp) \ | ||
223 | (((mp)->m_sb.sb_sectsize - \ | ||
224 | (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \ | ||
225 | sizeof(struct xfs_agfl) : 0)) / \ | ||
226 | sizeof(xfs_agblock_t)) | ||
227 | |||
228 | typedef struct xfs_agfl { | ||
229 | __be32 agfl_magicnum; | ||
230 | __be32 agfl_seqno; | ||
231 | uuid_t agfl_uuid; | ||
232 | __be64 agfl_lsn; | ||
233 | __be32 agfl_crc; | ||
234 | __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */ | ||
235 | } xfs_agfl_t; | ||
236 | |||
237 | #define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc) | ||
238 | |||
239 | /* | ||
240 | * tags for inode radix tree | ||
241 | */ | ||
242 | #define XFS_ICI_NO_TAG (-1) /* special flag for an untagged lookup | ||
243 | in xfs_inode_ag_iterator */ | ||
244 | #define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */ | ||
245 | #define XFS_ICI_EOFBLOCKS_TAG 1 /* inode has blocks beyond EOF */ | ||
246 | |||
247 | #define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) | ||
248 | #define XFS_MIN_FREELIST_RAW(bl,cl,mp) \ | ||
249 | (MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + MIN(cl + 1, XFS_AG_MAXLEVELS(mp))) | ||
250 | #define XFS_MIN_FREELIST(a,mp) \ | ||
251 | (XFS_MIN_FREELIST_RAW( \ | ||
252 | be32_to_cpu((a)->agf_levels[XFS_BTNUM_BNOi]), \ | ||
253 | be32_to_cpu((a)->agf_levels[XFS_BTNUM_CNTi]), mp)) | ||
254 | #define XFS_MIN_FREELIST_PAG(pag,mp) \ | ||
255 | (XFS_MIN_FREELIST_RAW( \ | ||
256 | (unsigned int)(pag)->pagf_levels[XFS_BTNUM_BNOi], \ | ||
257 | (unsigned int)(pag)->pagf_levels[XFS_BTNUM_CNTi], mp)) | ||
258 | |||
259 | #define XFS_AGB_TO_FSB(mp,agno,agbno) \ | ||
260 | (((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno)) | ||
261 | #define XFS_FSB_TO_AGNO(mp,fsbno) \ | ||
262 | ((xfs_agnumber_t)((fsbno) >> (mp)->m_sb.sb_agblklog)) | ||
263 | #define XFS_FSB_TO_AGBNO(mp,fsbno) \ | ||
264 | ((xfs_agblock_t)((fsbno) & xfs_mask32lo((mp)->m_sb.sb_agblklog))) | ||
265 | #define XFS_AGB_TO_DADDR(mp,agno,agbno) \ | ||
266 | ((xfs_daddr_t)XFS_FSB_TO_BB(mp, \ | ||
267 | (xfs_fsblock_t)(agno) * (mp)->m_sb.sb_agblocks + (agbno))) | ||
268 | #define XFS_AG_DADDR(mp,agno,d) (XFS_AGB_TO_DADDR(mp, agno, 0) + (d)) | ||
269 | |||
270 | /* | ||
271 | * For checking for bad ranges of xfs_daddr_t's, covering multiple | ||
272 | * allocation groups or a single xfs_daddr_t that's a superblock copy. | ||
273 | */ | ||
274 | #define XFS_AG_CHECK_DADDR(mp,d,len) \ | ||
275 | ((len) == 1 ? \ | ||
276 | ASSERT((d) == XFS_SB_DADDR || \ | ||
277 | xfs_daddr_to_agbno(mp, d) != XFS_SB_DADDR) : \ | ||
278 | ASSERT(xfs_daddr_to_agno(mp, d) == \ | ||
279 | xfs_daddr_to_agno(mp, (d) + (len) - 1))) | ||
280 | |||
281 | #endif /* __XFS_AG_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index eff34218f405..a6fbf4472017 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_bit.h" | 24 | #include "xfs_bit.h" |
25 | #include "xfs_sb.h" | 25 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | 26 | #include "xfs_mount.h" |
28 | #include "xfs_inode.h" | 27 | #include "xfs_inode.h" |
29 | #include "xfs_btree.h" | 28 | #include "xfs_btree.h" |
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index feacb061bab7..d1b4b6a5c894 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h | |||
@@ -231,4 +231,7 @@ xfs_alloc_get_rec( | |||
231 | xfs_extlen_t *len, /* output: length of extent */ | 231 | xfs_extlen_t *len, /* output: length of extent */ |
232 | int *stat); /* output: success/failure */ | 232 | int *stat); /* output: success/failure */ |
233 | 233 | ||
234 | int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp, | ||
235 | xfs_agnumber_t agno, int flags, struct xfs_buf **bpp); | ||
236 | |||
234 | #endif /* __XFS_ALLOC_H__ */ | 237 | #endif /* __XFS_ALLOC_H__ */ |
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index e0e83e24d3ef..59d521c09a17 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
27 | #include "xfs_btree.h" | 26 | #include "xfs_btree.h" |
28 | #include "xfs_alloc_btree.h" | 27 | #include "xfs_alloc_btree.h" |
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 353fb425faef..0a472fbe06d4 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c | |||
@@ -22,8 +22,6 @@ | |||
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_bit.h" | 24 | #include "xfs_bit.h" |
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
28 | #include "xfs_da_format.h" | 26 | #include "xfs_da_format.h" |
29 | #include "xfs_da_btree.h" | 27 | #include "xfs_da_btree.h" |
@@ -42,7 +40,6 @@ | |||
42 | #include "xfs_quota.h" | 40 | #include "xfs_quota.h" |
43 | #include "xfs_trans_space.h" | 41 | #include "xfs_trans_space.h" |
44 | #include "xfs_trace.h" | 42 | #include "xfs_trace.h" |
45 | #include "xfs_dinode.h" | ||
46 | 43 | ||
47 | /* | 44 | /* |
48 | * xfs_attr.c | 45 | * xfs_attr.c |
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index b1f73dbbf3d8..5d38e8b8a913 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c | |||
@@ -24,7 +24,6 @@ | |||
24 | #include "xfs_trans_resv.h" | 24 | #include "xfs_trans_resv.h" |
25 | #include "xfs_bit.h" | 25 | #include "xfs_bit.h" |
26 | #include "xfs_sb.h" | 26 | #include "xfs_sb.h" |
27 | #include "xfs_ag.h" | ||
28 | #include "xfs_mount.h" | 27 | #include "xfs_mount.h" |
29 | #include "xfs_da_format.h" | 28 | #include "xfs_da_format.h" |
30 | #include "xfs_da_btree.h" | 29 | #include "xfs_da_btree.h" |
@@ -41,7 +40,6 @@ | |||
41 | #include "xfs_trace.h" | 40 | #include "xfs_trace.h" |
42 | #include "xfs_buf_item.h" | 41 | #include "xfs_buf_item.h" |
43 | #include "xfs_cksum.h" | 42 | #include "xfs_cksum.h" |
44 | #include "xfs_dinode.h" | ||
45 | #include "xfs_dir2.h" | 43 | #include "xfs_dir2.h" |
46 | 44 | ||
47 | 45 | ||
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 7510ab8058a4..20de88d1bf86 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c | |||
@@ -23,8 +23,6 @@ | |||
23 | #include "xfs_log_format.h" | 23 | #include "xfs_log_format.h" |
24 | #include "xfs_trans_resv.h" | 24 | #include "xfs_trans_resv.h" |
25 | #include "xfs_bit.h" | 25 | #include "xfs_bit.h" |
26 | #include "xfs_sb.h" | ||
27 | #include "xfs_ag.h" | ||
28 | #include "xfs_mount.h" | 26 | #include "xfs_mount.h" |
29 | #include "xfs_da_format.h" | 27 | #include "xfs_da_format.h" |
30 | #include "xfs_da_btree.h" | 28 | #include "xfs_da_btree.h" |
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 79c981984dca..b5eb4743f75a 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c | |||
@@ -22,9 +22,7 @@ | |||
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_bit.h" | 24 | #include "xfs_bit.h" |
25 | #include "xfs_inum.h" | ||
26 | #include "xfs_sb.h" | 25 | #include "xfs_sb.h" |
27 | #include "xfs_ag.h" | ||
28 | #include "xfs_mount.h" | 26 | #include "xfs_mount.h" |
29 | #include "xfs_da_format.h" | 27 | #include "xfs_da_format.h" |
30 | #include "xfs_da_btree.h" | 28 | #include "xfs_da_btree.h" |
@@ -46,7 +44,6 @@ | |||
46 | #include "xfs_trace.h" | 44 | #include "xfs_trace.h" |
47 | #include "xfs_symlink.h" | 45 | #include "xfs_symlink.h" |
48 | #include "xfs_attr_leaf.h" | 46 | #include "xfs_attr_leaf.h" |
49 | #include "xfs_dinode.h" | ||
50 | #include "xfs_filestream.h" | 47 | #include "xfs_filestream.h" |
51 | 48 | ||
52 | 49 | ||
@@ -5450,13 +5447,11 @@ xfs_bmse_merge( | |||
5450 | struct xfs_btree_cur *cur, | 5447 | struct xfs_btree_cur *cur, |
5451 | int *logflags) /* output */ | 5448 | int *logflags) /* output */ |
5452 | { | 5449 | { |
5453 | struct xfs_ifork *ifp; | ||
5454 | struct xfs_bmbt_irec got; | 5450 | struct xfs_bmbt_irec got; |
5455 | struct xfs_bmbt_irec left; | 5451 | struct xfs_bmbt_irec left; |
5456 | xfs_filblks_t blockcount; | 5452 | xfs_filblks_t blockcount; |
5457 | int error, i; | 5453 | int error, i; |
5458 | 5454 | ||
5459 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
5460 | xfs_bmbt_get_all(gotp, &got); | 5455 | xfs_bmbt_get_all(gotp, &got); |
5461 | xfs_bmbt_get_all(leftp, &left); | 5456 | xfs_bmbt_get_all(leftp, &left); |
5462 | blockcount = left.br_blockcount + got.br_blockcount; | 5457 | blockcount = left.br_blockcount + got.br_blockcount; |
@@ -5489,32 +5484,25 @@ xfs_bmse_merge( | |||
5489 | error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock, | 5484 | error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock, |
5490 | got.br_blockcount, &i); | 5485 | got.br_blockcount, &i); |
5491 | if (error) | 5486 | if (error) |
5492 | goto out_error; | 5487 | return error; |
5493 | XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); | 5488 | XFS_WANT_CORRUPTED_RETURN(i == 1); |
5494 | 5489 | ||
5495 | error = xfs_btree_delete(cur, &i); | 5490 | error = xfs_btree_delete(cur, &i); |
5496 | if (error) | 5491 | if (error) |
5497 | goto out_error; | 5492 | return error; |
5498 | XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); | 5493 | XFS_WANT_CORRUPTED_RETURN(i == 1); |
5499 | 5494 | ||
5500 | /* lookup and update size of the previous extent */ | 5495 | /* lookup and update size of the previous extent */ |
5501 | error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock, | 5496 | error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock, |
5502 | left.br_blockcount, &i); | 5497 | left.br_blockcount, &i); |
5503 | if (error) | 5498 | if (error) |
5504 | goto out_error; | 5499 | return error; |
5505 | XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); | 5500 | XFS_WANT_CORRUPTED_RETURN(i == 1); |
5506 | 5501 | ||
5507 | left.br_blockcount = blockcount; | 5502 | left.br_blockcount = blockcount; |
5508 | 5503 | ||
5509 | error = xfs_bmbt_update(cur, left.br_startoff, left.br_startblock, | 5504 | return xfs_bmbt_update(cur, left.br_startoff, left.br_startblock, |
5510 | left.br_blockcount, left.br_state); | 5505 | left.br_blockcount, left.br_state); |
5511 | if (error) | ||
5512 | goto out_error; | ||
5513 | |||
5514 | return 0; | ||
5515 | |||
5516 | out_error: | ||
5517 | return error; | ||
5518 | } | 5506 | } |
5519 | 5507 | ||
5520 | /* | 5508 | /* |
@@ -5544,35 +5532,29 @@ xfs_bmse_shift_one( | |||
5544 | startoff = got.br_startoff - offset_shift_fsb; | 5532 | startoff = got.br_startoff - offset_shift_fsb; |
5545 | 5533 | ||
5546 | /* delalloc extents should be prevented by caller */ | 5534 | /* delalloc extents should be prevented by caller */ |
5547 | XFS_WANT_CORRUPTED_GOTO(!isnullstartblock(got.br_startblock), | 5535 | XFS_WANT_CORRUPTED_RETURN(!isnullstartblock(got.br_startblock)); |
5548 | out_error); | ||
5549 | 5536 | ||
5550 | /* | 5537 | /* |
5551 | * If this is the first extent in the file, make sure there's enough | 5538 | * Check for merge if we've got an extent to the left, otherwise make |
5552 | * room at the start of the file and jump right to the shift as there's | 5539 | * sure there's enough room at the start of the file for the shift. |
5553 | * no left extent to merge. | ||
5554 | */ | 5540 | */ |
5555 | if (*current_ext == 0) { | 5541 | if (*current_ext) { |
5556 | if (got.br_startoff < offset_shift_fsb) | 5542 | /* grab the left extent and check for a large enough hole */ |
5557 | return -EINVAL; | 5543 | leftp = xfs_iext_get_ext(ifp, *current_ext - 1); |
5558 | goto shift_extent; | 5544 | xfs_bmbt_get_all(leftp, &left); |
5559 | } | ||
5560 | 5545 | ||
5561 | /* grab the left extent and check for a large enough hole */ | 5546 | if (startoff < left.br_startoff + left.br_blockcount) |
5562 | leftp = xfs_iext_get_ext(ifp, *current_ext - 1); | 5547 | return -EINVAL; |
5563 | xfs_bmbt_get_all(leftp, &left); | ||
5564 | 5548 | ||
5565 | if (startoff < left.br_startoff + left.br_blockcount) | 5549 | /* check whether to merge the extent or shift it down */ |
5550 | if (xfs_bmse_can_merge(&left, &got, offset_shift_fsb)) { | ||
5551 | return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, | ||
5552 | *current_ext, gotp, leftp, cur, | ||
5553 | logflags); | ||
5554 | } | ||
5555 | } else if (got.br_startoff < offset_shift_fsb) | ||
5566 | return -EINVAL; | 5556 | return -EINVAL; |
5567 | 5557 | ||
5568 | /* check whether to merge the extent or shift it down */ | ||
5569 | if (!xfs_bmse_can_merge(&left, &got, offset_shift_fsb)) | ||
5570 | goto shift_extent; | ||
5571 | |||
5572 | return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, *current_ext, | ||
5573 | gotp, leftp, cur, logflags); | ||
5574 | |||
5575 | shift_extent: | ||
5576 | /* | 5558 | /* |
5577 | * Increment the extent index for the next iteration, update the start | 5559 | * Increment the extent index for the next iteration, update the start |
5578 | * offset of the in-core extent and update the btree if applicable. | 5560 | * offset of the in-core extent and update the btree if applicable. |
@@ -5589,18 +5571,11 @@ shift_extent: | |||
5589 | got.br_blockcount, &i); | 5571 | got.br_blockcount, &i); |
5590 | if (error) | 5572 | if (error) |
5591 | return error; | 5573 | return error; |
5592 | XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); | 5574 | XFS_WANT_CORRUPTED_RETURN(i == 1); |
5593 | 5575 | ||
5594 | got.br_startoff = startoff; | 5576 | got.br_startoff = startoff; |
5595 | error = xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, | 5577 | return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, |
5596 | got.br_blockcount, got.br_state); | 5578 | got.br_blockcount, got.br_state); |
5597 | if (error) | ||
5598 | return error; | ||
5599 | |||
5600 | return 0; | ||
5601 | |||
5602 | out_error: | ||
5603 | return error; | ||
5604 | } | 5579 | } |
5605 | 5580 | ||
5606 | /* | 5581 | /* |
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index fba753308f31..2c44c8e50782 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c | |||
@@ -22,8 +22,6 @@ | |||
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_bit.h" | 24 | #include "xfs_bit.h" |
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
28 | #include "xfs_inode.h" | 26 | #include "xfs_inode.h" |
29 | #include "xfs_trans.h" | 27 | #include "xfs_trans.h" |
@@ -36,7 +34,6 @@ | |||
36 | #include "xfs_quota.h" | 34 | #include "xfs_quota.h" |
37 | #include "xfs_trace.h" | 35 | #include "xfs_trace.h" |
38 | #include "xfs_cksum.h" | 36 | #include "xfs_cksum.h" |
39 | #include "xfs_dinode.h" | ||
40 | 37 | ||
41 | /* | 38 | /* |
42 | * Determine the extent state. | 39 | * Determine the extent state. |
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 8fe6a93ff473..81cad433df85 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c | |||
@@ -22,8 +22,6 @@ | |||
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_bit.h" | 24 | #include "xfs_bit.h" |
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
28 | #include "xfs_inode.h" | 26 | #include "xfs_inode.h" |
29 | #include "xfs_trans.h" | 27 | #include "xfs_trans.h" |
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index fd827530afec..9cb0115c6bd1 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c | |||
@@ -23,8 +23,6 @@ | |||
23 | #include "xfs_log_format.h" | 23 | #include "xfs_log_format.h" |
24 | #include "xfs_trans_resv.h" | 24 | #include "xfs_trans_resv.h" |
25 | #include "xfs_bit.h" | 25 | #include "xfs_bit.h" |
26 | #include "xfs_sb.h" | ||
27 | #include "xfs_ag.h" | ||
28 | #include "xfs_mount.h" | 26 | #include "xfs_mount.h" |
29 | #include "xfs_da_format.h" | 27 | #include "xfs_da_format.h" |
30 | #include "xfs_da_btree.h" | 28 | #include "xfs_da_btree.h" |
@@ -514,7 +512,6 @@ xfs_da3_root_split( | |||
514 | struct xfs_buf *bp; | 512 | struct xfs_buf *bp; |
515 | struct xfs_inode *dp; | 513 | struct xfs_inode *dp; |
516 | struct xfs_trans *tp; | 514 | struct xfs_trans *tp; |
517 | struct xfs_mount *mp; | ||
518 | struct xfs_dir2_leaf *leaf; | 515 | struct xfs_dir2_leaf *leaf; |
519 | xfs_dablk_t blkno; | 516 | xfs_dablk_t blkno; |
520 | int level; | 517 | int level; |
@@ -534,7 +531,6 @@ xfs_da3_root_split( | |||
534 | 531 | ||
535 | dp = args->dp; | 532 | dp = args->dp; |
536 | tp = args->trans; | 533 | tp = args->trans; |
537 | mp = state->mp; | ||
538 | error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork); | 534 | error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork); |
539 | if (error) | 535 | if (error) |
540 | return error; | 536 | return error; |
@@ -2342,14 +2338,12 @@ xfs_da_shrink_inode( | |||
2342 | xfs_inode_t *dp; | 2338 | xfs_inode_t *dp; |
2343 | int done, error, w, count; | 2339 | int done, error, w, count; |
2344 | xfs_trans_t *tp; | 2340 | xfs_trans_t *tp; |
2345 | xfs_mount_t *mp; | ||
2346 | 2341 | ||
2347 | trace_xfs_da_shrink_inode(args); | 2342 | trace_xfs_da_shrink_inode(args); |
2348 | 2343 | ||
2349 | dp = args->dp; | 2344 | dp = args->dp; |
2350 | w = args->whichfork; | 2345 | w = args->whichfork; |
2351 | tp = args->trans; | 2346 | tp = args->trans; |
2352 | mp = dp->i_mount; | ||
2353 | count = args->geo->fsbcount; | 2347 | count = args->geo->fsbcount; |
2354 | for (;;) { | 2348 | for (;;) { |
2355 | /* | 2349 | /* |
diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c index 7e42fdfd2f1d..9d624a622946 100644 --- a/fs/xfs/libxfs/xfs_da_format.c +++ b/fs/xfs/libxfs/xfs_da_format.c | |||
@@ -22,8 +22,6 @@ | |||
22 | #include "xfs_format.h" | 22 | #include "xfs_format.h" |
23 | #include "xfs_log_format.h" | 23 | #include "xfs_log_format.h" |
24 | #include "xfs_trans_resv.h" | 24 | #include "xfs_trans_resv.h" |
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
28 | #include "xfs_da_format.h" | 26 | #include "xfs_da_format.h" |
29 | #include "xfs_da_btree.h" | 27 | #include "xfs_da_btree.h" |
diff --git a/fs/xfs/libxfs/xfs_dinode.h b/fs/xfs/libxfs/xfs_dinode.h deleted file mode 100644 index 623bbe8fd921..000000000000 --- a/fs/xfs/libxfs/xfs_dinode.h +++ /dev/null | |||
@@ -1,243 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_DINODE_H__ | ||
19 | #define __XFS_DINODE_H__ | ||
20 | |||
21 | #define XFS_DINODE_MAGIC 0x494e /* 'IN' */ | ||
22 | #define XFS_DINODE_GOOD_VERSION(v) ((v) >= 1 && (v) <= 3) | ||
23 | |||
24 | typedef struct xfs_timestamp { | ||
25 | __be32 t_sec; /* timestamp seconds */ | ||
26 | __be32 t_nsec; /* timestamp nanoseconds */ | ||
27 | } xfs_timestamp_t; | ||
28 | |||
29 | /* | ||
30 | * On-disk inode structure. | ||
31 | * | ||
32 | * This is just the header or "dinode core", the inode is expanded to fill a | ||
33 | * variable size the leftover area split into a data and an attribute fork. | ||
34 | * The format of the data and attribute fork depends on the format of the | ||
35 | * inode as indicated by di_format and di_aformat. To access the data and | ||
36 | * attribute use the XFS_DFORK_DPTR, XFS_DFORK_APTR, and XFS_DFORK_PTR macros | ||
37 | * below. | ||
38 | * | ||
39 | * There is a very similar struct icdinode in xfs_inode which matches the | ||
40 | * layout of the first 96 bytes of this structure, but is kept in native | ||
41 | * format instead of big endian. | ||
42 | * | ||
43 | * Note: di_flushiter is only used by v1/2 inodes - it's effectively a zeroed | ||
44 | * padding field for v3 inodes. | ||
45 | */ | ||
46 | typedef struct xfs_dinode { | ||
47 | __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ | ||
48 | __be16 di_mode; /* mode and type of file */ | ||
49 | __u8 di_version; /* inode version */ | ||
50 | __u8 di_format; /* format of di_c data */ | ||
51 | __be16 di_onlink; /* old number of links to file */ | ||
52 | __be32 di_uid; /* owner's user id */ | ||
53 | __be32 di_gid; /* owner's group id */ | ||
54 | __be32 di_nlink; /* number of links to file */ | ||
55 | __be16 di_projid_lo; /* lower part of owner's project id */ | ||
56 | __be16 di_projid_hi; /* higher part owner's project id */ | ||
57 | __u8 di_pad[6]; /* unused, zeroed space */ | ||
58 | __be16 di_flushiter; /* incremented on flush */ | ||
59 | xfs_timestamp_t di_atime; /* time last accessed */ | ||
60 | xfs_timestamp_t di_mtime; /* time last modified */ | ||
61 | xfs_timestamp_t di_ctime; /* time created/inode modified */ | ||
62 | __be64 di_size; /* number of bytes in file */ | ||
63 | __be64 di_nblocks; /* # of direct & btree blocks used */ | ||
64 | __be32 di_extsize; /* basic/minimum extent size for file */ | ||
65 | __be32 di_nextents; /* number of extents in data fork */ | ||
66 | __be16 di_anextents; /* number of extents in attribute fork*/ | ||
67 | __u8 di_forkoff; /* attr fork offs, <<3 for 64b align */ | ||
68 | __s8 di_aformat; /* format of attr fork's data */ | ||
69 | __be32 di_dmevmask; /* DMIG event mask */ | ||
70 | __be16 di_dmstate; /* DMIG state info */ | ||
71 | __be16 di_flags; /* random flags, XFS_DIFLAG_... */ | ||
72 | __be32 di_gen; /* generation number */ | ||
73 | |||
74 | /* di_next_unlinked is the only non-core field in the old dinode */ | ||
75 | __be32 di_next_unlinked;/* agi unlinked list ptr */ | ||
76 | |||
77 | /* start of the extended dinode, writable fields */ | ||
78 | __le32 di_crc; /* CRC of the inode */ | ||
79 | __be64 di_changecount; /* number of attribute changes */ | ||
80 | __be64 di_lsn; /* flush sequence */ | ||
81 | __be64 di_flags2; /* more random flags */ | ||
82 | __u8 di_pad2[16]; /* more padding for future expansion */ | ||
83 | |||
84 | /* fields only written to during inode creation */ | ||
85 | xfs_timestamp_t di_crtime; /* time created */ | ||
86 | __be64 di_ino; /* inode number */ | ||
87 | uuid_t di_uuid; /* UUID of the filesystem */ | ||
88 | |||
89 | /* structure must be padded to 64 bit alignment */ | ||
90 | } xfs_dinode_t; | ||
91 | |||
92 | #define XFS_DINODE_CRC_OFF offsetof(struct xfs_dinode, di_crc) | ||
93 | |||
94 | #define DI_MAX_FLUSH 0xffff | ||
95 | |||
96 | /* | ||
97 | * Size of the core inode on disk. Version 1 and 2 inodes have | ||
98 | * the same size, but version 3 has grown a few additional fields. | ||
99 | */ | ||
100 | static inline uint xfs_dinode_size(int version) | ||
101 | { | ||
102 | if (version == 3) | ||
103 | return sizeof(struct xfs_dinode); | ||
104 | return offsetof(struct xfs_dinode, di_crc); | ||
105 | } | ||
106 | |||
107 | /* | ||
108 | * The 32 bit link count in the inode theoretically maxes out at UINT_MAX. | ||
109 | * Since the pathconf interface is signed, we use 2^31 - 1 instead. | ||
110 | * The old inode format had a 16 bit link count, so its maximum is USHRT_MAX. | ||
111 | */ | ||
112 | #define XFS_MAXLINK ((1U << 31) - 1U) | ||
113 | #define XFS_MAXLINK_1 65535U | ||
114 | |||
115 | /* | ||
116 | * Values for di_format | ||
117 | */ | ||
118 | typedef enum xfs_dinode_fmt { | ||
119 | XFS_DINODE_FMT_DEV, /* xfs_dev_t */ | ||
120 | XFS_DINODE_FMT_LOCAL, /* bulk data */ | ||
121 | XFS_DINODE_FMT_EXTENTS, /* struct xfs_bmbt_rec */ | ||
122 | XFS_DINODE_FMT_BTREE, /* struct xfs_bmdr_block */ | ||
123 | XFS_DINODE_FMT_UUID /* uuid_t */ | ||
124 | } xfs_dinode_fmt_t; | ||
125 | |||
126 | /* | ||
127 | * Inode minimum and maximum sizes. | ||
128 | */ | ||
129 | #define XFS_DINODE_MIN_LOG 8 | ||
130 | #define XFS_DINODE_MAX_LOG 11 | ||
131 | #define XFS_DINODE_MIN_SIZE (1 << XFS_DINODE_MIN_LOG) | ||
132 | #define XFS_DINODE_MAX_SIZE (1 << XFS_DINODE_MAX_LOG) | ||
133 | |||
134 | /* | ||
135 | * Inode size for given fs. | ||
136 | */ | ||
137 | #define XFS_LITINO(mp, version) \ | ||
138 | ((int)(((mp)->m_sb.sb_inodesize) - xfs_dinode_size(version))) | ||
139 | |||
140 | /* | ||
141 | * Inode data & attribute fork sizes, per inode. | ||
142 | */ | ||
143 | #define XFS_DFORK_Q(dip) ((dip)->di_forkoff != 0) | ||
144 | #define XFS_DFORK_BOFF(dip) ((int)((dip)->di_forkoff << 3)) | ||
145 | |||
146 | #define XFS_DFORK_DSIZE(dip,mp) \ | ||
147 | (XFS_DFORK_Q(dip) ? \ | ||
148 | XFS_DFORK_BOFF(dip) : \ | ||
149 | XFS_LITINO(mp, (dip)->di_version)) | ||
150 | #define XFS_DFORK_ASIZE(dip,mp) \ | ||
151 | (XFS_DFORK_Q(dip) ? \ | ||
152 | XFS_LITINO(mp, (dip)->di_version) - XFS_DFORK_BOFF(dip) : \ | ||
153 | 0) | ||
154 | #define XFS_DFORK_SIZE(dip,mp,w) \ | ||
155 | ((w) == XFS_DATA_FORK ? \ | ||
156 | XFS_DFORK_DSIZE(dip, mp) : \ | ||
157 | XFS_DFORK_ASIZE(dip, mp)) | ||
158 | |||
159 | /* | ||
160 | * Return pointers to the data or attribute forks. | ||
161 | */ | ||
162 | #define XFS_DFORK_DPTR(dip) \ | ||
163 | ((char *)dip + xfs_dinode_size(dip->di_version)) | ||
164 | #define XFS_DFORK_APTR(dip) \ | ||
165 | (XFS_DFORK_DPTR(dip) + XFS_DFORK_BOFF(dip)) | ||
166 | #define XFS_DFORK_PTR(dip,w) \ | ||
167 | ((w) == XFS_DATA_FORK ? XFS_DFORK_DPTR(dip) : XFS_DFORK_APTR(dip)) | ||
168 | |||
169 | #define XFS_DFORK_FORMAT(dip,w) \ | ||
170 | ((w) == XFS_DATA_FORK ? \ | ||
171 | (dip)->di_format : \ | ||
172 | (dip)->di_aformat) | ||
173 | #define XFS_DFORK_NEXTENTS(dip,w) \ | ||
174 | ((w) == XFS_DATA_FORK ? \ | ||
175 | be32_to_cpu((dip)->di_nextents) : \ | ||
176 | be16_to_cpu((dip)->di_anextents)) | ||
177 | |||
178 | #define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)((bp)->b_addr)) | ||
179 | |||
180 | /* | ||
181 | * For block and character special files the 32bit dev_t is stored at the | ||
182 | * beginning of the data fork. | ||
183 | */ | ||
184 | static inline xfs_dev_t xfs_dinode_get_rdev(struct xfs_dinode *dip) | ||
185 | { | ||
186 | return be32_to_cpu(*(__be32 *)XFS_DFORK_DPTR(dip)); | ||
187 | } | ||
188 | |||
189 | static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) | ||
190 | { | ||
191 | *(__be32 *)XFS_DFORK_DPTR(dip) = cpu_to_be32(rdev); | ||
192 | } | ||
193 | |||
194 | /* | ||
195 | * Values for di_flags | ||
196 | * There should be a one-to-one correspondence between these flags and the | ||
197 | * XFS_XFLAG_s. | ||
198 | */ | ||
199 | #define XFS_DIFLAG_REALTIME_BIT 0 /* file's blocks come from rt area */ | ||
200 | #define XFS_DIFLAG_PREALLOC_BIT 1 /* file space has been preallocated */ | ||
201 | #define XFS_DIFLAG_NEWRTBM_BIT 2 /* for rtbitmap inode, new format */ | ||
202 | #define XFS_DIFLAG_IMMUTABLE_BIT 3 /* inode is immutable */ | ||
203 | #define XFS_DIFLAG_APPEND_BIT 4 /* inode is append-only */ | ||
204 | #define XFS_DIFLAG_SYNC_BIT 5 /* inode is written synchronously */ | ||
205 | #define XFS_DIFLAG_NOATIME_BIT 6 /* do not update atime */ | ||
206 | #define XFS_DIFLAG_NODUMP_BIT 7 /* do not dump */ | ||
207 | #define XFS_DIFLAG_RTINHERIT_BIT 8 /* create with realtime bit set */ | ||
208 | #define XFS_DIFLAG_PROJINHERIT_BIT 9 /* create with parents projid */ | ||
209 | #define XFS_DIFLAG_NOSYMLINKS_BIT 10 /* disallow symlink creation */ | ||
210 | #define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */ | ||
211 | #define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */ | ||
212 | #define XFS_DIFLAG_NODEFRAG_BIT 13 /* do not reorganize/defragment */ | ||
213 | #define XFS_DIFLAG_FILESTREAM_BIT 14 /* use filestream allocator */ | ||
214 | #define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) | ||
215 | #define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) | ||
216 | #define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT) | ||
217 | #define XFS_DIFLAG_IMMUTABLE (1 << XFS_DIFLAG_IMMUTABLE_BIT) | ||
218 | #define XFS_DIFLAG_APPEND (1 << XFS_DIFLAG_APPEND_BIT) | ||
219 | #define XFS_DIFLAG_SYNC (1 << XFS_DIFLAG_SYNC_BIT) | ||
220 | #define XFS_DIFLAG_NOATIME (1 << XFS_DIFLAG_NOATIME_BIT) | ||
221 | #define XFS_DIFLAG_NODUMP (1 << XFS_DIFLAG_NODUMP_BIT) | ||
222 | #define XFS_DIFLAG_RTINHERIT (1 << XFS_DIFLAG_RTINHERIT_BIT) | ||
223 | #define XFS_DIFLAG_PROJINHERIT (1 << XFS_DIFLAG_PROJINHERIT_BIT) | ||
224 | #define XFS_DIFLAG_NOSYMLINKS (1 << XFS_DIFLAG_NOSYMLINKS_BIT) | ||
225 | #define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT) | ||
226 | #define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT) | ||
227 | #define XFS_DIFLAG_NODEFRAG (1 << XFS_DIFLAG_NODEFRAG_BIT) | ||
228 | #define XFS_DIFLAG_FILESTREAM (1 << XFS_DIFLAG_FILESTREAM_BIT) | ||
229 | |||
230 | #ifdef CONFIG_XFS_RT | ||
231 | #define XFS_IS_REALTIME_INODE(ip) ((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) | ||
232 | #else | ||
233 | #define XFS_IS_REALTIME_INODE(ip) (0) | ||
234 | #endif | ||
235 | |||
236 | #define XFS_DIFLAG_ANY \ | ||
237 | (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \ | ||
238 | XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \ | ||
239 | XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \ | ||
240 | XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \ | ||
241 | XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM) | ||
242 | |||
243 | #endif /* __XFS_DINODE_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 7075aaf131f4..a69fb3a1e161 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c | |||
@@ -20,9 +20,6 @@ | |||
20 | #include "xfs_format.h" | 20 | #include "xfs_format.h" |
21 | #include "xfs_log_format.h" | 21 | #include "xfs_log_format.h" |
22 | #include "xfs_trans_resv.h" | 22 | #include "xfs_trans_resv.h" |
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | 23 | #include "xfs_mount.h" |
27 | #include "xfs_da_format.h" | 24 | #include "xfs_da_format.h" |
28 | #include "xfs_da_btree.h" | 25 | #include "xfs_da_btree.h" |
@@ -34,10 +31,25 @@ | |||
34 | #include "xfs_dir2_priv.h" | 31 | #include "xfs_dir2_priv.h" |
35 | #include "xfs_error.h" | 32 | #include "xfs_error.h" |
36 | #include "xfs_trace.h" | 33 | #include "xfs_trace.h" |
37 | #include "xfs_dinode.h" | ||
38 | 34 | ||
39 | struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR }; | 35 | struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR }; |
40 | 36 | ||
37 | /* | ||
38 | * @mode, if set, indicates that the type field needs to be set up. | ||
39 | * This uses the transformation from file mode to DT_* as defined in linux/fs.h | ||
40 | * for file type specification. This will be propagated into the directory | ||
41 | * structure if appropriate for the given operation and filesystem config. | ||
42 | */ | ||
43 | const unsigned char xfs_mode_to_ftype[S_IFMT >> S_SHIFT] = { | ||
44 | [0] = XFS_DIR3_FT_UNKNOWN, | ||
45 | [S_IFREG >> S_SHIFT] = XFS_DIR3_FT_REG_FILE, | ||
46 | [S_IFDIR >> S_SHIFT] = XFS_DIR3_FT_DIR, | ||
47 | [S_IFCHR >> S_SHIFT] = XFS_DIR3_FT_CHRDEV, | ||
48 | [S_IFBLK >> S_SHIFT] = XFS_DIR3_FT_BLKDEV, | ||
49 | [S_IFIFO >> S_SHIFT] = XFS_DIR3_FT_FIFO, | ||
50 | [S_IFSOCK >> S_SHIFT] = XFS_DIR3_FT_SOCK, | ||
51 | [S_IFLNK >> S_SHIFT] = XFS_DIR3_FT_SYMLINK, | ||
52 | }; | ||
41 | 53 | ||
42 | /* | 54 | /* |
43 | * ASCII case-insensitive (ie. A-Z) support for directories that was | 55 | * ASCII case-insensitive (ie. A-Z) support for directories that was |
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index 4dff261e6ed5..e55353651f5b 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h | |||
@@ -32,6 +32,12 @@ struct xfs_dir2_data_unused; | |||
32 | extern struct xfs_name xfs_name_dotdot; | 32 | extern struct xfs_name xfs_name_dotdot; |
33 | 33 | ||
34 | /* | 34 | /* |
35 | * directory filetype conversion tables. | ||
36 | */ | ||
37 | #define S_SHIFT 12 | ||
38 | extern const unsigned char xfs_mode_to_ftype[]; | ||
39 | |||
40 | /* | ||
35 | * directory operations vector for encode/decode routines | 41 | * directory operations vector for encode/decode routines |
36 | */ | 42 | */ |
37 | struct xfs_dir_ops { | 43 | struct xfs_dir_ops { |
@@ -177,4 +183,138 @@ extern const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops; | |||
177 | extern const struct xfs_buf_ops xfs_dir3_free_buf_ops; | 183 | extern const struct xfs_buf_ops xfs_dir3_free_buf_ops; |
178 | extern const struct xfs_buf_ops xfs_dir3_data_buf_ops; | 184 | extern const struct xfs_buf_ops xfs_dir3_data_buf_ops; |
179 | 185 | ||
186 | /* | ||
187 | * Directory offset/block conversion functions. | ||
188 | * | ||
189 | * DB blocks here are logical directory block numbers, not filesystem blocks. | ||
190 | */ | ||
191 | |||
192 | /* | ||
193 | * Convert dataptr to byte in file space | ||
194 | */ | ||
195 | static inline xfs_dir2_off_t | ||
196 | xfs_dir2_dataptr_to_byte(xfs_dir2_dataptr_t dp) | ||
197 | { | ||
198 | return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG; | ||
199 | } | ||
200 | |||
201 | /* | ||
202 | * Convert byte in file space to dataptr. It had better be aligned. | ||
203 | */ | ||
204 | static inline xfs_dir2_dataptr_t | ||
205 | xfs_dir2_byte_to_dataptr(xfs_dir2_off_t by) | ||
206 | { | ||
207 | return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG); | ||
208 | } | ||
209 | |||
210 | /* | ||
211 | * Convert byte in space to (DB) block | ||
212 | */ | ||
213 | static inline xfs_dir2_db_t | ||
214 | xfs_dir2_byte_to_db(struct xfs_da_geometry *geo, xfs_dir2_off_t by) | ||
215 | { | ||
216 | return (xfs_dir2_db_t)(by >> geo->blklog); | ||
217 | } | ||
218 | |||
219 | /* | ||
220 | * Convert dataptr to a block number | ||
221 | */ | ||
222 | static inline xfs_dir2_db_t | ||
223 | xfs_dir2_dataptr_to_db(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp) | ||
224 | { | ||
225 | return xfs_dir2_byte_to_db(geo, xfs_dir2_dataptr_to_byte(dp)); | ||
226 | } | ||
227 | |||
228 | /* | ||
229 | * Convert byte in space to offset in a block | ||
230 | */ | ||
231 | static inline xfs_dir2_data_aoff_t | ||
232 | xfs_dir2_byte_to_off(struct xfs_da_geometry *geo, xfs_dir2_off_t by) | ||
233 | { | ||
234 | return (xfs_dir2_data_aoff_t)(by & (geo->blksize - 1)); | ||
235 | } | ||
236 | |||
237 | /* | ||
238 | * Convert dataptr to a byte offset in a block | ||
239 | */ | ||
240 | static inline xfs_dir2_data_aoff_t | ||
241 | xfs_dir2_dataptr_to_off(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp) | ||
242 | { | ||
243 | return xfs_dir2_byte_to_off(geo, xfs_dir2_dataptr_to_byte(dp)); | ||
244 | } | ||
245 | |||
246 | /* | ||
247 | * Convert block and offset to byte in space | ||
248 | */ | ||
249 | static inline xfs_dir2_off_t | ||
250 | xfs_dir2_db_off_to_byte(struct xfs_da_geometry *geo, xfs_dir2_db_t db, | ||
251 | xfs_dir2_data_aoff_t o) | ||
252 | { | ||
253 | return ((xfs_dir2_off_t)db << geo->blklog) + o; | ||
254 | } | ||
255 | |||
256 | /* | ||
257 | * Convert block (DB) to block (dablk) | ||
258 | */ | ||
259 | static inline xfs_dablk_t | ||
260 | xfs_dir2_db_to_da(struct xfs_da_geometry *geo, xfs_dir2_db_t db) | ||
261 | { | ||
262 | return (xfs_dablk_t)(db << (geo->blklog - geo->fsblog)); | ||
263 | } | ||
264 | |||
265 | /* | ||
266 | * Convert byte in space to (DA) block | ||
267 | */ | ||
268 | static inline xfs_dablk_t | ||
269 | xfs_dir2_byte_to_da(struct xfs_da_geometry *geo, xfs_dir2_off_t by) | ||
270 | { | ||
271 | return xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, by)); | ||
272 | } | ||
273 | |||
274 | /* | ||
275 | * Convert block and offset to dataptr | ||
276 | */ | ||
277 | static inline xfs_dir2_dataptr_t | ||
278 | xfs_dir2_db_off_to_dataptr(struct xfs_da_geometry *geo, xfs_dir2_db_t db, | ||
279 | xfs_dir2_data_aoff_t o) | ||
280 | { | ||
281 | return xfs_dir2_byte_to_dataptr(xfs_dir2_db_off_to_byte(geo, db, o)); | ||
282 | } | ||
283 | |||
284 | /* | ||
285 | * Convert block (dablk) to block (DB) | ||
286 | */ | ||
287 | static inline xfs_dir2_db_t | ||
288 | xfs_dir2_da_to_db(struct xfs_da_geometry *geo, xfs_dablk_t da) | ||
289 | { | ||
290 | return (xfs_dir2_db_t)(da >> (geo->blklog - geo->fsblog)); | ||
291 | } | ||
292 | |||
293 | /* | ||
294 | * Convert block (dablk) to byte offset in space | ||
295 | */ | ||
296 | static inline xfs_dir2_off_t | ||
297 | xfs_dir2_da_to_byte(struct xfs_da_geometry *geo, xfs_dablk_t da) | ||
298 | { | ||
299 | return xfs_dir2_db_off_to_byte(geo, xfs_dir2_da_to_db(geo, da), 0); | ||
300 | } | ||
301 | |||
302 | /* | ||
303 | * Directory tail pointer accessor functions. Based on block geometry. | ||
304 | */ | ||
305 | static inline struct xfs_dir2_block_tail * | ||
306 | xfs_dir2_block_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_data_hdr *hdr) | ||
307 | { | ||
308 | return ((struct xfs_dir2_block_tail *) | ||
309 | ((char *)hdr + geo->blksize)) - 1; | ||
310 | } | ||
311 | |||
312 | static inline struct xfs_dir2_leaf_tail * | ||
313 | xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp) | ||
314 | { | ||
315 | return (struct xfs_dir2_leaf_tail *) | ||
316 | ((char *)lp + geo->blksize - | ||
317 | sizeof(struct xfs_dir2_leaf_tail)); | ||
318 | } | ||
319 | |||
180 | #endif /* __XFS_DIR2_H__ */ | 320 | #endif /* __XFS_DIR2_H__ */ |
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index 9628ceccfa02..9354e190b82e 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c | |||
@@ -21,8 +21,6 @@ | |||
21 | #include "xfs_format.h" | 21 | #include "xfs_format.h" |
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
27 | #include "xfs_da_format.h" | 25 | #include "xfs_da_format.h" |
28 | #include "xfs_da_btree.h" | 26 | #include "xfs_da_btree.h" |
@@ -36,7 +34,6 @@ | |||
36 | #include "xfs_error.h" | 34 | #include "xfs_error.h" |
37 | #include "xfs_trace.h" | 35 | #include "xfs_trace.h" |
38 | #include "xfs_cksum.h" | 36 | #include "xfs_cksum.h" |
39 | #include "xfs_dinode.h" | ||
40 | 37 | ||
41 | /* | 38 | /* |
42 | * Local function prototypes. | 39 | * Local function prototypes. |
@@ -353,7 +350,6 @@ xfs_dir2_block_addname( | |||
353 | int low; /* low index for binary srch */ | 350 | int low; /* low index for binary srch */ |
354 | int lowstale; /* low stale index */ | 351 | int lowstale; /* low stale index */ |
355 | int mid=0; /* midpoint for binary srch */ | 352 | int mid=0; /* midpoint for binary srch */ |
356 | xfs_mount_t *mp; /* filesystem mount point */ | ||
357 | int needlog; /* need to log header */ | 353 | int needlog; /* need to log header */ |
358 | int needscan; /* need to rescan freespace */ | 354 | int needscan; /* need to rescan freespace */ |
359 | __be16 *tagp; /* pointer to tag value */ | 355 | __be16 *tagp; /* pointer to tag value */ |
@@ -363,7 +359,6 @@ xfs_dir2_block_addname( | |||
363 | 359 | ||
364 | dp = args->dp; | 360 | dp = args->dp; |
365 | tp = args->trans; | 361 | tp = args->trans; |
366 | mp = dp->i_mount; | ||
367 | 362 | ||
368 | /* Read the (one and only) directory block into bp. */ | 363 | /* Read the (one and only) directory block into bp. */ |
369 | error = xfs_dir3_block_read(tp, dp, &bp); | 364 | error = xfs_dir3_block_read(tp, dp, &bp); |
@@ -618,7 +613,6 @@ xfs_dir2_block_lookup( | |||
618 | xfs_inode_t *dp; /* incore inode */ | 613 | xfs_inode_t *dp; /* incore inode */ |
619 | int ent; /* entry index */ | 614 | int ent; /* entry index */ |
620 | int error; /* error return value */ | 615 | int error; /* error return value */ |
621 | xfs_mount_t *mp; /* filesystem mount point */ | ||
622 | 616 | ||
623 | trace_xfs_dir2_block_lookup(args); | 617 | trace_xfs_dir2_block_lookup(args); |
624 | 618 | ||
@@ -629,7 +623,6 @@ xfs_dir2_block_lookup( | |||
629 | if ((error = xfs_dir2_block_lookup_int(args, &bp, &ent))) | 623 | if ((error = xfs_dir2_block_lookup_int(args, &bp, &ent))) |
630 | return error; | 624 | return error; |
631 | dp = args->dp; | 625 | dp = args->dp; |
632 | mp = dp->i_mount; | ||
633 | hdr = bp->b_addr; | 626 | hdr = bp->b_addr; |
634 | xfs_dir3_data_check(dp, bp); | 627 | xfs_dir3_data_check(dp, bp); |
635 | btp = xfs_dir2_block_tail_p(args->geo, hdr); | 628 | btp = xfs_dir2_block_tail_p(args->geo, hdr); |
@@ -770,7 +763,6 @@ xfs_dir2_block_removename( | |||
770 | xfs_inode_t *dp; /* incore inode */ | 763 | xfs_inode_t *dp; /* incore inode */ |
771 | int ent; /* block leaf entry index */ | 764 | int ent; /* block leaf entry index */ |
772 | int error; /* error return value */ | 765 | int error; /* error return value */ |
773 | xfs_mount_t *mp; /* filesystem mount point */ | ||
774 | int needlog; /* need to log block header */ | 766 | int needlog; /* need to log block header */ |
775 | int needscan; /* need to fixup bestfree */ | 767 | int needscan; /* need to fixup bestfree */ |
776 | xfs_dir2_sf_hdr_t sfh; /* shortform header */ | 768 | xfs_dir2_sf_hdr_t sfh; /* shortform header */ |
@@ -788,7 +780,6 @@ xfs_dir2_block_removename( | |||
788 | } | 780 | } |
789 | dp = args->dp; | 781 | dp = args->dp; |
790 | tp = args->trans; | 782 | tp = args->trans; |
791 | mp = dp->i_mount; | ||
792 | hdr = bp->b_addr; | 783 | hdr = bp->b_addr; |
793 | btp = xfs_dir2_block_tail_p(args->geo, hdr); | 784 | btp = xfs_dir2_block_tail_p(args->geo, hdr); |
794 | blp = xfs_dir2_block_leaf_p(btp); | 785 | blp = xfs_dir2_block_leaf_p(btp); |
@@ -852,7 +843,6 @@ xfs_dir2_block_replace( | |||
852 | xfs_inode_t *dp; /* incore inode */ | 843 | xfs_inode_t *dp; /* incore inode */ |
853 | int ent; /* leaf entry index */ | 844 | int ent; /* leaf entry index */ |
854 | int error; /* error return value */ | 845 | int error; /* error return value */ |
855 | xfs_mount_t *mp; /* filesystem mount point */ | ||
856 | 846 | ||
857 | trace_xfs_dir2_block_replace(args); | 847 | trace_xfs_dir2_block_replace(args); |
858 | 848 | ||
@@ -864,7 +854,6 @@ xfs_dir2_block_replace( | |||
864 | return error; | 854 | return error; |
865 | } | 855 | } |
866 | dp = args->dp; | 856 | dp = args->dp; |
867 | mp = dp->i_mount; | ||
868 | hdr = bp->b_addr; | 857 | hdr = bp->b_addr; |
869 | btp = xfs_dir2_block_tail_p(args->geo, hdr); | 858 | btp = xfs_dir2_block_tail_p(args->geo, hdr); |
870 | blp = xfs_dir2_block_leaf_p(btp); | 859 | blp = xfs_dir2_block_leaf_p(btp); |
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index fdd803fecb8e..5ff31be9b1cd 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c | |||
@@ -21,8 +21,6 @@ | |||
21 | #include "xfs_format.h" | 21 | #include "xfs_format.h" |
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
27 | #include "xfs_da_format.h" | 25 | #include "xfs_da_format.h" |
28 | #include "xfs_da_btree.h" | 26 | #include "xfs_da_btree.h" |
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index a19174eb3cb2..106119955400 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c | |||
@@ -21,8 +21,6 @@ | |||
21 | #include "xfs_format.h" | 21 | #include "xfs_format.h" |
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
27 | #include "xfs_da_format.h" | 25 | #include "xfs_da_format.h" |
28 | #include "xfs_da_btree.h" | 26 | #include "xfs_da_btree.h" |
@@ -384,7 +382,6 @@ xfs_dir2_block_to_leaf( | |||
384 | xfs_dir2_db_t ldb; /* leaf block's bno */ | 382 | xfs_dir2_db_t ldb; /* leaf block's bno */ |
385 | xfs_dir2_leaf_t *leaf; /* leaf structure */ | 383 | xfs_dir2_leaf_t *leaf; /* leaf structure */ |
386 | xfs_dir2_leaf_tail_t *ltp; /* leaf's tail */ | 384 | xfs_dir2_leaf_tail_t *ltp; /* leaf's tail */ |
387 | xfs_mount_t *mp; /* filesystem mount point */ | ||
388 | int needlog; /* need to log block header */ | 385 | int needlog; /* need to log block header */ |
389 | int needscan; /* need to rescan bestfree */ | 386 | int needscan; /* need to rescan bestfree */ |
390 | xfs_trans_t *tp; /* transaction pointer */ | 387 | xfs_trans_t *tp; /* transaction pointer */ |
@@ -395,7 +392,6 @@ xfs_dir2_block_to_leaf( | |||
395 | trace_xfs_dir2_block_to_leaf(args); | 392 | trace_xfs_dir2_block_to_leaf(args); |
396 | 393 | ||
397 | dp = args->dp; | 394 | dp = args->dp; |
398 | mp = dp->i_mount; | ||
399 | tp = args->trans; | 395 | tp = args->trans; |
400 | /* | 396 | /* |
401 | * Add the leaf block to the inode. | 397 | * Add the leaf block to the inode. |
@@ -626,7 +622,6 @@ xfs_dir2_leaf_addname( | |||
626 | int lfloghigh; /* high leaf logging index */ | 622 | int lfloghigh; /* high leaf logging index */ |
627 | int lowstale; /* index of prev stale leaf */ | 623 | int lowstale; /* index of prev stale leaf */ |
628 | xfs_dir2_leaf_tail_t *ltp; /* leaf tail pointer */ | 624 | xfs_dir2_leaf_tail_t *ltp; /* leaf tail pointer */ |
629 | xfs_mount_t *mp; /* filesystem mount point */ | ||
630 | int needbytes; /* leaf block bytes needed */ | 625 | int needbytes; /* leaf block bytes needed */ |
631 | int needlog; /* need to log data header */ | 626 | int needlog; /* need to log data header */ |
632 | int needscan; /* need to rescan data free */ | 627 | int needscan; /* need to rescan data free */ |
@@ -641,7 +636,6 @@ xfs_dir2_leaf_addname( | |||
641 | 636 | ||
642 | dp = args->dp; | 637 | dp = args->dp; |
643 | tp = args->trans; | 638 | tp = args->trans; |
644 | mp = dp->i_mount; | ||
645 | 639 | ||
646 | error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp); | 640 | error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp); |
647 | if (error) | 641 | if (error) |
@@ -1356,11 +1350,9 @@ xfs_dir2_leaf_removename( | |||
1356 | xfs_dir2_leaf_t *leaf; /* leaf structure */ | 1350 | xfs_dir2_leaf_t *leaf; /* leaf structure */ |
1357 | xfs_dir2_leaf_entry_t *lep; /* leaf entry */ | 1351 | xfs_dir2_leaf_entry_t *lep; /* leaf entry */ |
1358 | xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ | 1352 | xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ |
1359 | xfs_mount_t *mp; /* filesystem mount point */ | ||
1360 | int needlog; /* need to log data header */ | 1353 | int needlog; /* need to log data header */ |
1361 | int needscan; /* need to rescan data frees */ | 1354 | int needscan; /* need to rescan data frees */ |
1362 | xfs_dir2_data_off_t oldbest; /* old value of best free */ | 1355 | xfs_dir2_data_off_t oldbest; /* old value of best free */ |
1363 | xfs_trans_t *tp; /* transaction pointer */ | ||
1364 | struct xfs_dir2_data_free *bf; /* bestfree table */ | 1356 | struct xfs_dir2_data_free *bf; /* bestfree table */ |
1365 | struct xfs_dir2_leaf_entry *ents; | 1357 | struct xfs_dir2_leaf_entry *ents; |
1366 | struct xfs_dir3_icleaf_hdr leafhdr; | 1358 | struct xfs_dir3_icleaf_hdr leafhdr; |
@@ -1374,8 +1366,6 @@ xfs_dir2_leaf_removename( | |||
1374 | return error; | 1366 | return error; |
1375 | } | 1367 | } |
1376 | dp = args->dp; | 1368 | dp = args->dp; |
1377 | tp = args->trans; | ||
1378 | mp = dp->i_mount; | ||
1379 | leaf = lbp->b_addr; | 1369 | leaf = lbp->b_addr; |
1380 | hdr = dbp->b_addr; | 1370 | hdr = dbp->b_addr; |
1381 | xfs_dir3_data_check(dp, dbp); | 1371 | xfs_dir3_data_check(dp, dbp); |
@@ -1607,11 +1597,9 @@ xfs_dir2_leaf_trim_data( | |||
1607 | int error; /* error return value */ | 1597 | int error; /* error return value */ |
1608 | xfs_dir2_leaf_t *leaf; /* leaf structure */ | 1598 | xfs_dir2_leaf_t *leaf; /* leaf structure */ |
1609 | xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ | 1599 | xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ |
1610 | xfs_mount_t *mp; /* filesystem mount point */ | ||
1611 | xfs_trans_t *tp; /* transaction pointer */ | 1600 | xfs_trans_t *tp; /* transaction pointer */ |
1612 | 1601 | ||
1613 | dp = args->dp; | 1602 | dp = args->dp; |
1614 | mp = dp->i_mount; | ||
1615 | tp = args->trans; | 1603 | tp = args->trans; |
1616 | /* | 1604 | /* |
1617 | * Read the offending data block. We need its buffer. | 1605 | * Read the offending data block. We need its buffer. |
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 2ae6ac2c11ae..41b80d3d3877 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c | |||
@@ -21,8 +21,6 @@ | |||
21 | #include "xfs_format.h" | 21 | #include "xfs_format.h" |
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
27 | #include "xfs_da_format.h" | 25 | #include "xfs_da_format.h" |
28 | #include "xfs_da_btree.h" | 26 | #include "xfs_da_btree.h" |
@@ -297,7 +295,6 @@ xfs_dir2_leaf_to_node( | |||
297 | int i; /* leaf freespace index */ | 295 | int i; /* leaf freespace index */ |
298 | xfs_dir2_leaf_t *leaf; /* leaf structure */ | 296 | xfs_dir2_leaf_t *leaf; /* leaf structure */ |
299 | xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ | 297 | xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ |
300 | xfs_mount_t *mp; /* filesystem mount point */ | ||
301 | int n; /* count of live freespc ents */ | 298 | int n; /* count of live freespc ents */ |
302 | xfs_dir2_data_off_t off; /* freespace entry value */ | 299 | xfs_dir2_data_off_t off; /* freespace entry value */ |
303 | __be16 *to; /* pointer to freespace entry */ | 300 | __be16 *to; /* pointer to freespace entry */ |
@@ -307,7 +304,6 @@ xfs_dir2_leaf_to_node( | |||
307 | trace_xfs_dir2_leaf_to_node(args); | 304 | trace_xfs_dir2_leaf_to_node(args); |
308 | 305 | ||
309 | dp = args->dp; | 306 | dp = args->dp; |
310 | mp = dp->i_mount; | ||
311 | tp = args->trans; | 307 | tp = args->trans; |
312 | /* | 308 | /* |
313 | * Add a freespace block to the directory. | 309 | * Add a freespace block to the directory. |
@@ -387,16 +383,12 @@ xfs_dir2_leafn_add( | |||
387 | int lfloghigh; /* high leaf entry logging */ | 383 | int lfloghigh; /* high leaf entry logging */ |
388 | int lfloglow; /* low leaf entry logging */ | 384 | int lfloglow; /* low leaf entry logging */ |
389 | int lowstale; /* previous stale entry */ | 385 | int lowstale; /* previous stale entry */ |
390 | xfs_mount_t *mp; /* filesystem mount point */ | ||
391 | xfs_trans_t *tp; /* transaction pointer */ | ||
392 | struct xfs_dir3_icleaf_hdr leafhdr; | 386 | struct xfs_dir3_icleaf_hdr leafhdr; |
393 | struct xfs_dir2_leaf_entry *ents; | 387 | struct xfs_dir2_leaf_entry *ents; |
394 | 388 | ||
395 | trace_xfs_dir2_leafn_add(args, index); | 389 | trace_xfs_dir2_leafn_add(args, index); |
396 | 390 | ||
397 | dp = args->dp; | 391 | dp = args->dp; |
398 | mp = dp->i_mount; | ||
399 | tp = args->trans; | ||
400 | leaf = bp->b_addr; | 392 | leaf = bp->b_addr; |
401 | dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); | 393 | dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); |
402 | ents = dp->d_ops->leaf_ents_p(leaf); | 394 | ents = dp->d_ops->leaf_ents_p(leaf); |
@@ -1170,7 +1162,6 @@ xfs_dir2_leafn_remove( | |||
1170 | xfs_dir2_leaf_entry_t *lep; /* leaf entry */ | 1162 | xfs_dir2_leaf_entry_t *lep; /* leaf entry */ |
1171 | int longest; /* longest data free entry */ | 1163 | int longest; /* longest data free entry */ |
1172 | int off; /* data block entry offset */ | 1164 | int off; /* data block entry offset */ |
1173 | xfs_mount_t *mp; /* filesystem mount point */ | ||
1174 | int needlog; /* need to log data header */ | 1165 | int needlog; /* need to log data header */ |
1175 | int needscan; /* need to rescan data frees */ | 1166 | int needscan; /* need to rescan data frees */ |
1176 | xfs_trans_t *tp; /* transaction pointer */ | 1167 | xfs_trans_t *tp; /* transaction pointer */ |
@@ -1182,7 +1173,6 @@ xfs_dir2_leafn_remove( | |||
1182 | 1173 | ||
1183 | dp = args->dp; | 1174 | dp = args->dp; |
1184 | tp = args->trans; | 1175 | tp = args->trans; |
1185 | mp = dp->i_mount; | ||
1186 | leaf = bp->b_addr; | 1176 | leaf = bp->b_addr; |
1187 | dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); | 1177 | dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); |
1188 | ents = dp->d_ops->leaf_ents_p(leaf); | 1178 | ents = dp->d_ops->leaf_ents_p(leaf); |
@@ -1323,7 +1313,6 @@ xfs_dir2_leafn_split( | |||
1323 | xfs_da_args_t *args; /* operation arguments */ | 1313 | xfs_da_args_t *args; /* operation arguments */ |
1324 | xfs_dablk_t blkno; /* new leaf block number */ | 1314 | xfs_dablk_t blkno; /* new leaf block number */ |
1325 | int error; /* error return value */ | 1315 | int error; /* error return value */ |
1326 | xfs_mount_t *mp; /* filesystem mount point */ | ||
1327 | struct xfs_inode *dp; | 1316 | struct xfs_inode *dp; |
1328 | 1317 | ||
1329 | /* | 1318 | /* |
@@ -1331,7 +1320,6 @@ xfs_dir2_leafn_split( | |||
1331 | */ | 1320 | */ |
1332 | args = state->args; | 1321 | args = state->args; |
1333 | dp = args->dp; | 1322 | dp = args->dp; |
1334 | mp = dp->i_mount; | ||
1335 | ASSERT(oldblk->magic == XFS_DIR2_LEAFN_MAGIC); | 1323 | ASSERT(oldblk->magic == XFS_DIR2_LEAFN_MAGIC); |
1336 | error = xfs_da_grow_inode(args, &blkno); | 1324 | error = xfs_da_grow_inode(args, &blkno); |
1337 | if (error) { | 1325 | if (error) { |
@@ -2231,12 +2219,10 @@ xfs_dir2_node_trim_free( | |||
2231 | xfs_inode_t *dp; /* incore directory inode */ | 2219 | xfs_inode_t *dp; /* incore directory inode */ |
2232 | int error; /* error return code */ | 2220 | int error; /* error return code */ |
2233 | xfs_dir2_free_t *free; /* freespace structure */ | 2221 | xfs_dir2_free_t *free; /* freespace structure */ |
2234 | xfs_mount_t *mp; /* filesystem mount point */ | ||
2235 | xfs_trans_t *tp; /* transaction pointer */ | 2222 | xfs_trans_t *tp; /* transaction pointer */ |
2236 | struct xfs_dir3_icfree_hdr freehdr; | 2223 | struct xfs_dir3_icfree_hdr freehdr; |
2237 | 2224 | ||
2238 | dp = args->dp; | 2225 | dp = args->dp; |
2239 | mp = dp->i_mount; | ||
2240 | tp = args->trans; | 2226 | tp = args->trans; |
2241 | /* | 2227 | /* |
2242 | * Read the freespace block. | 2228 | * Read the freespace block. |
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index 27ce0794d196..ef9f6ead96a4 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h | |||
@@ -20,140 +20,6 @@ | |||
20 | 20 | ||
21 | struct dir_context; | 21 | struct dir_context; |
22 | 22 | ||
23 | /* | ||
24 | * Directory offset/block conversion functions. | ||
25 | * | ||
26 | * DB blocks here are logical directory block numbers, not filesystem blocks. | ||
27 | */ | ||
28 | |||
29 | /* | ||
30 | * Convert dataptr to byte in file space | ||
31 | */ | ||
32 | static inline xfs_dir2_off_t | ||
33 | xfs_dir2_dataptr_to_byte(xfs_dir2_dataptr_t dp) | ||
34 | { | ||
35 | return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG; | ||
36 | } | ||
37 | |||
38 | /* | ||
39 | * Convert byte in file space to dataptr. It had better be aligned. | ||
40 | */ | ||
41 | static inline xfs_dir2_dataptr_t | ||
42 | xfs_dir2_byte_to_dataptr(xfs_dir2_off_t by) | ||
43 | { | ||
44 | return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG); | ||
45 | } | ||
46 | |||
47 | /* | ||
48 | * Convert byte in space to (DB) block | ||
49 | */ | ||
50 | static inline xfs_dir2_db_t | ||
51 | xfs_dir2_byte_to_db(struct xfs_da_geometry *geo, xfs_dir2_off_t by) | ||
52 | { | ||
53 | return (xfs_dir2_db_t)(by >> geo->blklog); | ||
54 | } | ||
55 | |||
56 | /* | ||
57 | * Convert dataptr to a block number | ||
58 | */ | ||
59 | static inline xfs_dir2_db_t | ||
60 | xfs_dir2_dataptr_to_db(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp) | ||
61 | { | ||
62 | return xfs_dir2_byte_to_db(geo, xfs_dir2_dataptr_to_byte(dp)); | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * Convert byte in space to offset in a block | ||
67 | */ | ||
68 | static inline xfs_dir2_data_aoff_t | ||
69 | xfs_dir2_byte_to_off(struct xfs_da_geometry *geo, xfs_dir2_off_t by) | ||
70 | { | ||
71 | return (xfs_dir2_data_aoff_t)(by & (geo->blksize - 1)); | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * Convert dataptr to a byte offset in a block | ||
76 | */ | ||
77 | static inline xfs_dir2_data_aoff_t | ||
78 | xfs_dir2_dataptr_to_off(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp) | ||
79 | { | ||
80 | return xfs_dir2_byte_to_off(geo, xfs_dir2_dataptr_to_byte(dp)); | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * Convert block and offset to byte in space | ||
85 | */ | ||
86 | static inline xfs_dir2_off_t | ||
87 | xfs_dir2_db_off_to_byte(struct xfs_da_geometry *geo, xfs_dir2_db_t db, | ||
88 | xfs_dir2_data_aoff_t o) | ||
89 | { | ||
90 | return ((xfs_dir2_off_t)db << geo->blklog) + o; | ||
91 | } | ||
92 | |||
93 | /* | ||
94 | * Convert block (DB) to block (dablk) | ||
95 | */ | ||
96 | static inline xfs_dablk_t | ||
97 | xfs_dir2_db_to_da(struct xfs_da_geometry *geo, xfs_dir2_db_t db) | ||
98 | { | ||
99 | return (xfs_dablk_t)(db << (geo->blklog - geo->fsblog)); | ||
100 | } | ||
101 | |||
102 | /* | ||
103 | * Convert byte in space to (DA) block | ||
104 | */ | ||
105 | static inline xfs_dablk_t | ||
106 | xfs_dir2_byte_to_da(struct xfs_da_geometry *geo, xfs_dir2_off_t by) | ||
107 | { | ||
108 | return xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, by)); | ||
109 | } | ||
110 | |||
111 | /* | ||
112 | * Convert block and offset to dataptr | ||
113 | */ | ||
114 | static inline xfs_dir2_dataptr_t | ||
115 | xfs_dir2_db_off_to_dataptr(struct xfs_da_geometry *geo, xfs_dir2_db_t db, | ||
116 | xfs_dir2_data_aoff_t o) | ||
117 | { | ||
118 | return xfs_dir2_byte_to_dataptr(xfs_dir2_db_off_to_byte(geo, db, o)); | ||
119 | } | ||
120 | |||
121 | /* | ||
122 | * Convert block (dablk) to block (DB) | ||
123 | */ | ||
124 | static inline xfs_dir2_db_t | ||
125 | xfs_dir2_da_to_db(struct xfs_da_geometry *geo, xfs_dablk_t da) | ||
126 | { | ||
127 | return (xfs_dir2_db_t)(da >> (geo->blklog - geo->fsblog)); | ||
128 | } | ||
129 | |||
130 | /* | ||
131 | * Convert block (dablk) to byte offset in space | ||
132 | */ | ||
133 | static inline xfs_dir2_off_t | ||
134 | xfs_dir2_da_to_byte(struct xfs_da_geometry *geo, xfs_dablk_t da) | ||
135 | { | ||
136 | return xfs_dir2_db_off_to_byte(geo, xfs_dir2_da_to_db(geo, da), 0); | ||
137 | } | ||
138 | |||
139 | /* | ||
140 | * Directory tail pointer accessor functions. Based on block geometry. | ||
141 | */ | ||
142 | static inline struct xfs_dir2_block_tail * | ||
143 | xfs_dir2_block_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_data_hdr *hdr) | ||
144 | { | ||
145 | return ((struct xfs_dir2_block_tail *) | ||
146 | ((char *)hdr + geo->blksize)) - 1; | ||
147 | } | ||
148 | |||
149 | static inline struct xfs_dir2_leaf_tail * | ||
150 | xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp) | ||
151 | { | ||
152 | return (struct xfs_dir2_leaf_tail *) | ||
153 | ((char *)lp + geo->blksize - | ||
154 | sizeof(struct xfs_dir2_leaf_tail)); | ||
155 | } | ||
156 | |||
157 | /* xfs_dir2.c */ | 23 | /* xfs_dir2.c */ |
158 | extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); | 24 | extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); |
159 | extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, | 25 | extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, |
@@ -161,12 +27,6 @@ extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, | |||
161 | extern int xfs_dir_cilookup_result(struct xfs_da_args *args, | 27 | extern int xfs_dir_cilookup_result(struct xfs_da_args *args, |
162 | const unsigned char *name, int len); | 28 | const unsigned char *name, int len); |
163 | 29 | ||
164 | #define S_SHIFT 12 | ||
165 | extern const unsigned char xfs_mode_to_ftype[]; | ||
166 | |||
167 | extern unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp, | ||
168 | __uint8_t filetype); | ||
169 | |||
170 | 30 | ||
171 | /* xfs_dir2_block.c */ | 31 | /* xfs_dir2_block.c */ |
172 | extern int xfs_dir3_block_read(struct xfs_trans *tp, struct xfs_inode *dp, | 32 | extern int xfs_dir3_block_read(struct xfs_trans *tp, struct xfs_inode *dp, |
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index 5079e051ef08..974d62e677f4 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c | |||
@@ -20,8 +20,6 @@ | |||
20 | #include "xfs_format.h" | 20 | #include "xfs_format.h" |
21 | #include "xfs_log_format.h" | 21 | #include "xfs_log_format.h" |
22 | #include "xfs_trans_resv.h" | 22 | #include "xfs_trans_resv.h" |
23 | #include "xfs_sb.h" | ||
24 | #include "xfs_ag.h" | ||
25 | #include "xfs_mount.h" | 23 | #include "xfs_mount.h" |
26 | #include "xfs_da_format.h" | 24 | #include "xfs_da_format.h" |
27 | #include "xfs_da_btree.h" | 25 | #include "xfs_da_btree.h" |
@@ -32,7 +30,6 @@ | |||
32 | #include "xfs_dir2.h" | 30 | #include "xfs_dir2.h" |
33 | #include "xfs_dir2_priv.h" | 31 | #include "xfs_dir2_priv.h" |
34 | #include "xfs_trace.h" | 32 | #include "xfs_trace.h" |
35 | #include "xfs_dinode.h" | ||
36 | 33 | ||
37 | /* | 34 | /* |
38 | * Prototypes for internal functions. | 35 | * Prototypes for internal functions. |
@@ -455,13 +452,11 @@ xfs_dir2_sf_addname_hard( | |||
455 | xfs_dir2_sf_hdr_t *oldsfp; /* original shortform dir */ | 452 | xfs_dir2_sf_hdr_t *oldsfp; /* original shortform dir */ |
456 | xfs_dir2_sf_entry_t *sfep; /* entry in new dir */ | 453 | xfs_dir2_sf_entry_t *sfep; /* entry in new dir */ |
457 | xfs_dir2_sf_hdr_t *sfp; /* new shortform dir */ | 454 | xfs_dir2_sf_hdr_t *sfp; /* new shortform dir */ |
458 | struct xfs_mount *mp; | ||
459 | 455 | ||
460 | /* | 456 | /* |
461 | * Copy the old directory to the stack buffer. | 457 | * Copy the old directory to the stack buffer. |
462 | */ | 458 | */ |
463 | dp = args->dp; | 459 | dp = args->dp; |
464 | mp = dp->i_mount; | ||
465 | 460 | ||
466 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | 461 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; |
467 | old_isize = (int)dp->i_d.di_size; | 462 | old_isize = (int)dp->i_d.di_size; |
@@ -542,7 +537,6 @@ xfs_dir2_sf_addname_pick( | |||
542 | xfs_inode_t *dp; /* incore directory inode */ | 537 | xfs_inode_t *dp; /* incore directory inode */ |
543 | int holefit; /* found hole it will fit in */ | 538 | int holefit; /* found hole it will fit in */ |
544 | int i; /* entry number */ | 539 | int i; /* entry number */ |
545 | xfs_mount_t *mp; /* filesystem mount point */ | ||
546 | xfs_dir2_data_aoff_t offset; /* data block offset */ | 540 | xfs_dir2_data_aoff_t offset; /* data block offset */ |
547 | xfs_dir2_sf_entry_t *sfep; /* shortform entry */ | 541 | xfs_dir2_sf_entry_t *sfep; /* shortform entry */ |
548 | xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ | 542 | xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ |
@@ -550,7 +544,6 @@ xfs_dir2_sf_addname_pick( | |||
550 | int used; /* data bytes used */ | 544 | int used; /* data bytes used */ |
551 | 545 | ||
552 | dp = args->dp; | 546 | dp = args->dp; |
553 | mp = dp->i_mount; | ||
554 | 547 | ||
555 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | 548 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; |
556 | size = dp->d_ops->data_entsize(args->namelen); | 549 | size = dp->d_ops->data_entsize(args->namelen); |
@@ -616,10 +609,8 @@ xfs_dir2_sf_check( | |||
616 | int offset; /* data offset */ | 609 | int offset; /* data offset */ |
617 | xfs_dir2_sf_entry_t *sfep; /* shortform dir entry */ | 610 | xfs_dir2_sf_entry_t *sfep; /* shortform dir entry */ |
618 | xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ | 611 | xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ |
619 | struct xfs_mount *mp; | ||
620 | 612 | ||
621 | dp = args->dp; | 613 | dp = args->dp; |
622 | mp = dp->i_mount; | ||
623 | 614 | ||
624 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | 615 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; |
625 | offset = dp->d_ops->data_first_offset; | 616 | offset = dp->d_ops->data_first_offset; |
@@ -1016,12 +1007,10 @@ xfs_dir2_sf_toino4( | |||
1016 | int oldsize; /* old inode size */ | 1007 | int oldsize; /* old inode size */ |
1017 | xfs_dir2_sf_entry_t *sfep; /* new sf entry */ | 1008 | xfs_dir2_sf_entry_t *sfep; /* new sf entry */ |
1018 | xfs_dir2_sf_hdr_t *sfp; /* new sf directory */ | 1009 | xfs_dir2_sf_hdr_t *sfp; /* new sf directory */ |
1019 | struct xfs_mount *mp; | ||
1020 | 1010 | ||
1021 | trace_xfs_dir2_sf_toino4(args); | 1011 | trace_xfs_dir2_sf_toino4(args); |
1022 | 1012 | ||
1023 | dp = args->dp; | 1013 | dp = args->dp; |
1024 | mp = dp->i_mount; | ||
1025 | 1014 | ||
1026 | /* | 1015 | /* |
1027 | * Copy the old directory to the buffer. | 1016 | * Copy the old directory to the buffer. |
@@ -1094,12 +1083,10 @@ xfs_dir2_sf_toino8( | |||
1094 | int oldsize; /* old inode size */ | 1083 | int oldsize; /* old inode size */ |
1095 | xfs_dir2_sf_entry_t *sfep; /* new sf entry */ | 1084 | xfs_dir2_sf_entry_t *sfep; /* new sf entry */ |
1096 | xfs_dir2_sf_hdr_t *sfp; /* new sf directory */ | 1085 | xfs_dir2_sf_hdr_t *sfp; /* new sf directory */ |
1097 | struct xfs_mount *mp; | ||
1098 | 1086 | ||
1099 | trace_xfs_dir2_sf_toino8(args); | 1087 | trace_xfs_dir2_sf_toino8(args); |
1100 | 1088 | ||
1101 | dp = args->dp; | 1089 | dp = args->dp; |
1102 | mp = dp->i_mount; | ||
1103 | 1090 | ||
1104 | /* | 1091 | /* |
1105 | * Copy the old directory to the buffer. | 1092 | * Copy the old directory to the buffer. |
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index bb969337efc8..6fbf2d853a54 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c | |||
@@ -22,8 +22,6 @@ | |||
22 | #include "xfs_format.h" | 22 | #include "xfs_format.h" |
23 | #include "xfs_log_format.h" | 23 | #include "xfs_log_format.h" |
24 | #include "xfs_trans_resv.h" | 24 | #include "xfs_trans_resv.h" |
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
28 | #include "xfs_inode.h" | 26 | #include "xfs_inode.h" |
29 | #include "xfs_quota.h" | 27 | #include "xfs_quota.h" |
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 7e42bba9a420..fbd6da263571 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h | |||
@@ -34,6 +34,1077 @@ struct xfs_buf; | |||
34 | struct xfs_ifork; | 34 | struct xfs_ifork; |
35 | 35 | ||
36 | /* | 36 | /* |
37 | * Super block | ||
38 | * Fits into a sector-sized buffer at address 0 of each allocation group. | ||
39 | * Only the first of these is ever updated except during growfs. | ||
40 | */ | ||
41 | #define XFS_SB_MAGIC 0x58465342 /* 'XFSB' */ | ||
42 | #define XFS_SB_VERSION_1 1 /* 5.3, 6.0.1, 6.1 */ | ||
43 | #define XFS_SB_VERSION_2 2 /* 6.2 - attributes */ | ||
44 | #define XFS_SB_VERSION_3 3 /* 6.2 - new inode version */ | ||
45 | #define XFS_SB_VERSION_4 4 /* 6.2+ - bitmask version */ | ||
46 | #define XFS_SB_VERSION_5 5 /* CRC enabled filesystem */ | ||
47 | #define XFS_SB_VERSION_NUMBITS 0x000f | ||
48 | #define XFS_SB_VERSION_ALLFBITS 0xfff0 | ||
49 | #define XFS_SB_VERSION_ATTRBIT 0x0010 | ||
50 | #define XFS_SB_VERSION_NLINKBIT 0x0020 | ||
51 | #define XFS_SB_VERSION_QUOTABIT 0x0040 | ||
52 | #define XFS_SB_VERSION_ALIGNBIT 0x0080 | ||
53 | #define XFS_SB_VERSION_DALIGNBIT 0x0100 | ||
54 | #define XFS_SB_VERSION_SHAREDBIT 0x0200 | ||
55 | #define XFS_SB_VERSION_LOGV2BIT 0x0400 | ||
56 | #define XFS_SB_VERSION_SECTORBIT 0x0800 | ||
57 | #define XFS_SB_VERSION_EXTFLGBIT 0x1000 | ||
58 | #define XFS_SB_VERSION_DIRV2BIT 0x2000 | ||
59 | #define XFS_SB_VERSION_BORGBIT 0x4000 /* ASCII only case-insens. */ | ||
60 | #define XFS_SB_VERSION_MOREBITSBIT 0x8000 | ||
61 | |||
62 | /* | ||
63 | * Supported feature bit list is just all bits in the versionnum field because | ||
64 | * we've used them all up and understand them all. Except, of course, for the | ||
65 | * shared superblock bit, which nobody knows what it does and so is unsupported. | ||
66 | */ | ||
67 | #define XFS_SB_VERSION_OKBITS \ | ||
68 | ((XFS_SB_VERSION_NUMBITS | XFS_SB_VERSION_ALLFBITS) & \ | ||
69 | ~XFS_SB_VERSION_SHAREDBIT) | ||
70 | |||
71 | /* | ||
72 | * There are two words to hold XFS "feature" bits: the original | ||
73 | * word, sb_versionnum, and sb_features2. Whenever a bit is set in | ||
74 | * sb_features2, the feature bit XFS_SB_VERSION_MOREBITSBIT must be set. | ||
75 | * | ||
76 | * These defines represent bits in sb_features2. | ||
77 | */ | ||
78 | #define XFS_SB_VERSION2_RESERVED1BIT 0x00000001 | ||
79 | #define XFS_SB_VERSION2_LAZYSBCOUNTBIT 0x00000002 /* Superblk counters */ | ||
80 | #define XFS_SB_VERSION2_RESERVED4BIT 0x00000004 | ||
81 | #define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ | ||
82 | #define XFS_SB_VERSION2_PARENTBIT 0x00000010 /* parent pointers */ | ||
83 | #define XFS_SB_VERSION2_PROJID32BIT 0x00000080 /* 32 bit project id */ | ||
84 | #define XFS_SB_VERSION2_CRCBIT 0x00000100 /* metadata CRCs */ | ||
85 | #define XFS_SB_VERSION2_FTYPE 0x00000200 /* inode type in dir */ | ||
86 | |||
87 | #define XFS_SB_VERSION2_OKBITS \ | ||
88 | (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ | ||
89 | XFS_SB_VERSION2_ATTR2BIT | \ | ||
90 | XFS_SB_VERSION2_PROJID32BIT | \ | ||
91 | XFS_SB_VERSION2_FTYPE) | ||
92 | |||
93 | /* | ||
94 | * Superblock - in core version. Must match the ondisk version below. | ||
95 | * Must be padded to 64 bit alignment. | ||
96 | */ | ||
97 | typedef struct xfs_sb { | ||
98 | __uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */ | ||
99 | __uint32_t sb_blocksize; /* logical block size, bytes */ | ||
100 | xfs_rfsblock_t sb_dblocks; /* number of data blocks */ | ||
101 | xfs_rfsblock_t sb_rblocks; /* number of realtime blocks */ | ||
102 | xfs_rtblock_t sb_rextents; /* number of realtime extents */ | ||
103 | uuid_t sb_uuid; /* file system unique id */ | ||
104 | xfs_fsblock_t sb_logstart; /* starting block of log if internal */ | ||
105 | xfs_ino_t sb_rootino; /* root inode number */ | ||
106 | xfs_ino_t sb_rbmino; /* bitmap inode for realtime extents */ | ||
107 | xfs_ino_t sb_rsumino; /* summary inode for rt bitmap */ | ||
108 | xfs_agblock_t sb_rextsize; /* realtime extent size, blocks */ | ||
109 | xfs_agblock_t sb_agblocks; /* size of an allocation group */ | ||
110 | xfs_agnumber_t sb_agcount; /* number of allocation groups */ | ||
111 | xfs_extlen_t sb_rbmblocks; /* number of rt bitmap blocks */ | ||
112 | xfs_extlen_t sb_logblocks; /* number of log blocks */ | ||
113 | __uint16_t sb_versionnum; /* header version == XFS_SB_VERSION */ | ||
114 | __uint16_t sb_sectsize; /* volume sector size, bytes */ | ||
115 | __uint16_t sb_inodesize; /* inode size, bytes */ | ||
116 | __uint16_t sb_inopblock; /* inodes per block */ | ||
117 | char sb_fname[12]; /* file system name */ | ||
118 | __uint8_t sb_blocklog; /* log2 of sb_blocksize */ | ||
119 | __uint8_t sb_sectlog; /* log2 of sb_sectsize */ | ||
120 | __uint8_t sb_inodelog; /* log2 of sb_inodesize */ | ||
121 | __uint8_t sb_inopblog; /* log2 of sb_inopblock */ | ||
122 | __uint8_t sb_agblklog; /* log2 of sb_agblocks (rounded up) */ | ||
123 | __uint8_t sb_rextslog; /* log2 of sb_rextents */ | ||
124 | __uint8_t sb_inprogress; /* mkfs is in progress, don't mount */ | ||
125 | __uint8_t sb_imax_pct; /* max % of fs for inode space */ | ||
126 | /* statistics */ | ||
127 | /* | ||
128 | * These fields must remain contiguous. If you really | ||
129 | * want to change their layout, make sure you fix the | ||
130 | * code in xfs_trans_apply_sb_deltas(). | ||
131 | */ | ||
132 | __uint64_t sb_icount; /* allocated inodes */ | ||
133 | __uint64_t sb_ifree; /* free inodes */ | ||
134 | __uint64_t sb_fdblocks; /* free data blocks */ | ||
135 | __uint64_t sb_frextents; /* free realtime extents */ | ||
136 | /* | ||
137 | * End contiguous fields. | ||
138 | */ | ||
139 | xfs_ino_t sb_uquotino; /* user quota inode */ | ||
140 | xfs_ino_t sb_gquotino; /* group quota inode */ | ||
141 | __uint16_t sb_qflags; /* quota flags */ | ||
142 | __uint8_t sb_flags; /* misc. flags */ | ||
143 | __uint8_t sb_shared_vn; /* shared version number */ | ||
144 | xfs_extlen_t sb_inoalignmt; /* inode chunk alignment, fsblocks */ | ||
145 | __uint32_t sb_unit; /* stripe or raid unit */ | ||
146 | __uint32_t sb_width; /* stripe or raid width */ | ||
147 | __uint8_t sb_dirblklog; /* log2 of dir block size (fsbs) */ | ||
148 | __uint8_t sb_logsectlog; /* log2 of the log sector size */ | ||
149 | __uint16_t sb_logsectsize; /* sector size for the log, bytes */ | ||
150 | __uint32_t sb_logsunit; /* stripe unit size for the log */ | ||
151 | __uint32_t sb_features2; /* additional feature bits */ | ||
152 | |||
153 | /* | ||
154 | * bad features2 field as a result of failing to pad the sb | ||
155 | * structure to 64 bits. Some machines will be using this field | ||
156 | * for features2 bits. Easiest just to mark it bad and not use | ||
157 | * it for anything else. | ||
158 | */ | ||
159 | __uint32_t sb_bad_features2; | ||
160 | |||
161 | /* version 5 superblock fields start here */ | ||
162 | |||
163 | /* feature masks */ | ||
164 | __uint32_t sb_features_compat; | ||
165 | __uint32_t sb_features_ro_compat; | ||
166 | __uint32_t sb_features_incompat; | ||
167 | __uint32_t sb_features_log_incompat; | ||
168 | |||
169 | __uint32_t sb_crc; /* superblock crc */ | ||
170 | __uint32_t sb_pad; | ||
171 | |||
172 | xfs_ino_t sb_pquotino; /* project quota inode */ | ||
173 | xfs_lsn_t sb_lsn; /* last write sequence */ | ||
174 | |||
175 | /* must be padded to 64 bit alignment */ | ||
176 | } xfs_sb_t; | ||
177 | |||
178 | #define XFS_SB_CRC_OFF offsetof(struct xfs_sb, sb_crc) | ||
179 | |||
180 | /* | ||
181 | * Superblock - on disk version. Must match the in core version above. | ||
182 | * Must be padded to 64 bit alignment. | ||
183 | */ | ||
184 | typedef struct xfs_dsb { | ||
185 | __be32 sb_magicnum; /* magic number == XFS_SB_MAGIC */ | ||
186 | __be32 sb_blocksize; /* logical block size, bytes */ | ||
187 | __be64 sb_dblocks; /* number of data blocks */ | ||
188 | __be64 sb_rblocks; /* number of realtime blocks */ | ||
189 | __be64 sb_rextents; /* number of realtime extents */ | ||
190 | uuid_t sb_uuid; /* file system unique id */ | ||
191 | __be64 sb_logstart; /* starting block of log if internal */ | ||
192 | __be64 sb_rootino; /* root inode number */ | ||
193 | __be64 sb_rbmino; /* bitmap inode for realtime extents */ | ||
194 | __be64 sb_rsumino; /* summary inode for rt bitmap */ | ||
195 | __be32 sb_rextsize; /* realtime extent size, blocks */ | ||
196 | __be32 sb_agblocks; /* size of an allocation group */ | ||
197 | __be32 sb_agcount; /* number of allocation groups */ | ||
198 | __be32 sb_rbmblocks; /* number of rt bitmap blocks */ | ||
199 | __be32 sb_logblocks; /* number of log blocks */ | ||
200 | __be16 sb_versionnum; /* header version == XFS_SB_VERSION */ | ||
201 | __be16 sb_sectsize; /* volume sector size, bytes */ | ||
202 | __be16 sb_inodesize; /* inode size, bytes */ | ||
203 | __be16 sb_inopblock; /* inodes per block */ | ||
204 | char sb_fname[12]; /* file system name */ | ||
205 | __u8 sb_blocklog; /* log2 of sb_blocksize */ | ||
206 | __u8 sb_sectlog; /* log2 of sb_sectsize */ | ||
207 | __u8 sb_inodelog; /* log2 of sb_inodesize */ | ||
208 | __u8 sb_inopblog; /* log2 of sb_inopblock */ | ||
209 | __u8 sb_agblklog; /* log2 of sb_agblocks (rounded up) */ | ||
210 | __u8 sb_rextslog; /* log2 of sb_rextents */ | ||
211 | __u8 sb_inprogress; /* mkfs is in progress, don't mount */ | ||
212 | __u8 sb_imax_pct; /* max % of fs for inode space */ | ||
213 | /* statistics */ | ||
214 | /* | ||
215 | * These fields must remain contiguous. If you really | ||
216 | * want to change their layout, make sure you fix the | ||
217 | * code in xfs_trans_apply_sb_deltas(). | ||
218 | */ | ||
219 | __be64 sb_icount; /* allocated inodes */ | ||
220 | __be64 sb_ifree; /* free inodes */ | ||
221 | __be64 sb_fdblocks; /* free data blocks */ | ||
222 | __be64 sb_frextents; /* free realtime extents */ | ||
223 | /* | ||
224 | * End contiguous fields. | ||
225 | */ | ||
226 | __be64 sb_uquotino; /* user quota inode */ | ||
227 | __be64 sb_gquotino; /* group quota inode */ | ||
228 | __be16 sb_qflags; /* quota flags */ | ||
229 | __u8 sb_flags; /* misc. flags */ | ||
230 | __u8 sb_shared_vn; /* shared version number */ | ||
231 | __be32 sb_inoalignmt; /* inode chunk alignment, fsblocks */ | ||
232 | __be32 sb_unit; /* stripe or raid unit */ | ||
233 | __be32 sb_width; /* stripe or raid width */ | ||
234 | __u8 sb_dirblklog; /* log2 of dir block size (fsbs) */ | ||
235 | __u8 sb_logsectlog; /* log2 of the log sector size */ | ||
236 | __be16 sb_logsectsize; /* sector size for the log, bytes */ | ||
237 | __be32 sb_logsunit; /* stripe unit size for the log */ | ||
238 | __be32 sb_features2; /* additional feature bits */ | ||
239 | /* | ||
240 | * bad features2 field as a result of failing to pad the sb | ||
241 | * structure to 64 bits. Some machines will be using this field | ||
242 | * for features2 bits. Easiest just to mark it bad and not use | ||
243 | * it for anything else. | ||
244 | */ | ||
245 | __be32 sb_bad_features2; | ||
246 | |||
247 | /* version 5 superblock fields start here */ | ||
248 | |||
249 | /* feature masks */ | ||
250 | __be32 sb_features_compat; | ||
251 | __be32 sb_features_ro_compat; | ||
252 | __be32 sb_features_incompat; | ||
253 | __be32 sb_features_log_incompat; | ||
254 | |||
255 | __le32 sb_crc; /* superblock crc */ | ||
256 | __be32 sb_pad; | ||
257 | |||
258 | __be64 sb_pquotino; /* project quota inode */ | ||
259 | __be64 sb_lsn; /* last write sequence */ | ||
260 | |||
261 | /* must be padded to 64 bit alignment */ | ||
262 | } xfs_dsb_t; | ||
263 | |||
264 | /* | ||
265 | * Sequence number values for the fields. | ||
266 | */ | ||
267 | typedef enum { | ||
268 | XFS_SBS_MAGICNUM, XFS_SBS_BLOCKSIZE, XFS_SBS_DBLOCKS, XFS_SBS_RBLOCKS, | ||
269 | XFS_SBS_REXTENTS, XFS_SBS_UUID, XFS_SBS_LOGSTART, XFS_SBS_ROOTINO, | ||
270 | XFS_SBS_RBMINO, XFS_SBS_RSUMINO, XFS_SBS_REXTSIZE, XFS_SBS_AGBLOCKS, | ||
271 | XFS_SBS_AGCOUNT, XFS_SBS_RBMBLOCKS, XFS_SBS_LOGBLOCKS, | ||
272 | XFS_SBS_VERSIONNUM, XFS_SBS_SECTSIZE, XFS_SBS_INODESIZE, | ||
273 | XFS_SBS_INOPBLOCK, XFS_SBS_FNAME, XFS_SBS_BLOCKLOG, | ||
274 | XFS_SBS_SECTLOG, XFS_SBS_INODELOG, XFS_SBS_INOPBLOG, XFS_SBS_AGBLKLOG, | ||
275 | XFS_SBS_REXTSLOG, XFS_SBS_INPROGRESS, XFS_SBS_IMAX_PCT, XFS_SBS_ICOUNT, | ||
276 | XFS_SBS_IFREE, XFS_SBS_FDBLOCKS, XFS_SBS_FREXTENTS, XFS_SBS_UQUOTINO, | ||
277 | XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN, | ||
278 | XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG, | ||
279 | XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT, | ||
280 | XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2, XFS_SBS_FEATURES_COMPAT, | ||
281 | XFS_SBS_FEATURES_RO_COMPAT, XFS_SBS_FEATURES_INCOMPAT, | ||
282 | XFS_SBS_FEATURES_LOG_INCOMPAT, XFS_SBS_CRC, XFS_SBS_PAD, | ||
283 | XFS_SBS_PQUOTINO, XFS_SBS_LSN, | ||
284 | XFS_SBS_FIELDCOUNT | ||
285 | } xfs_sb_field_t; | ||
286 | |||
287 | /* | ||
288 | * Mask values, defined based on the xfs_sb_field_t values. | ||
289 | * Only define the ones we're using. | ||
290 | */ | ||
291 | #define XFS_SB_MVAL(x) (1LL << XFS_SBS_ ## x) | ||
292 | #define XFS_SB_UUID XFS_SB_MVAL(UUID) | ||
293 | #define XFS_SB_FNAME XFS_SB_MVAL(FNAME) | ||
294 | #define XFS_SB_ROOTINO XFS_SB_MVAL(ROOTINO) | ||
295 | #define XFS_SB_RBMINO XFS_SB_MVAL(RBMINO) | ||
296 | #define XFS_SB_RSUMINO XFS_SB_MVAL(RSUMINO) | ||
297 | #define XFS_SB_VERSIONNUM XFS_SB_MVAL(VERSIONNUM) | ||
298 | #define XFS_SB_UQUOTINO XFS_SB_MVAL(UQUOTINO) | ||
299 | #define XFS_SB_GQUOTINO XFS_SB_MVAL(GQUOTINO) | ||
300 | #define XFS_SB_QFLAGS XFS_SB_MVAL(QFLAGS) | ||
301 | #define XFS_SB_SHARED_VN XFS_SB_MVAL(SHARED_VN) | ||
302 | #define XFS_SB_UNIT XFS_SB_MVAL(UNIT) | ||
303 | #define XFS_SB_WIDTH XFS_SB_MVAL(WIDTH) | ||
304 | #define XFS_SB_ICOUNT XFS_SB_MVAL(ICOUNT) | ||
305 | #define XFS_SB_IFREE XFS_SB_MVAL(IFREE) | ||
306 | #define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS) | ||
307 | #define XFS_SB_FEATURES2 XFS_SB_MVAL(FEATURES2) | ||
308 | #define XFS_SB_BAD_FEATURES2 XFS_SB_MVAL(BAD_FEATURES2) | ||
309 | #define XFS_SB_FEATURES_COMPAT XFS_SB_MVAL(FEATURES_COMPAT) | ||
310 | #define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT) | ||
311 | #define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT) | ||
312 | #define XFS_SB_FEATURES_LOG_INCOMPAT XFS_SB_MVAL(FEATURES_LOG_INCOMPAT) | ||
313 | #define XFS_SB_CRC XFS_SB_MVAL(CRC) | ||
314 | #define XFS_SB_PQUOTINO XFS_SB_MVAL(PQUOTINO) | ||
315 | #define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT) | ||
316 | #define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1) | ||
317 | #define XFS_SB_MOD_BITS \ | ||
318 | (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \ | ||
319 | XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \ | ||
320 | XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \ | ||
321 | XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \ | ||
322 | XFS_SB_BAD_FEATURES2 | XFS_SB_FEATURES_COMPAT | \ | ||
323 | XFS_SB_FEATURES_RO_COMPAT | XFS_SB_FEATURES_INCOMPAT | \ | ||
324 | XFS_SB_FEATURES_LOG_INCOMPAT | XFS_SB_PQUOTINO) | ||
325 | |||
326 | |||
327 | /* | ||
328 | * Misc. Flags - warning - these will be cleared by xfs_repair unless | ||
329 | * a feature bit is set when the flag is used. | ||
330 | */ | ||
331 | #define XFS_SBF_NOFLAGS 0x00 /* no flags set */ | ||
332 | #define XFS_SBF_READONLY 0x01 /* only read-only mounts allowed */ | ||
333 | |||
334 | /* | ||
335 | * define max. shared version we can interoperate with | ||
336 | */ | ||
337 | #define XFS_SB_MAX_SHARED_VN 0 | ||
338 | |||
339 | #define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS) | ||
340 | |||
341 | /* | ||
342 | * The first XFS version we support is a v4 superblock with V2 directories. | ||
343 | */ | ||
344 | static inline bool xfs_sb_good_v4_features(struct xfs_sb *sbp) | ||
345 | { | ||
346 | if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT)) | ||
347 | return false; | ||
348 | |||
349 | /* check for unknown features in the fs */ | ||
350 | if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) || | ||
351 | ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && | ||
352 | (sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS))) | ||
353 | return false; | ||
354 | |||
355 | return true; | ||
356 | } | ||
357 | |||
358 | static inline bool xfs_sb_good_version(struct xfs_sb *sbp) | ||
359 | { | ||
360 | if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) | ||
361 | return true; | ||
362 | if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) | ||
363 | return xfs_sb_good_v4_features(sbp); | ||
364 | return false; | ||
365 | } | ||
366 | |||
367 | /* | ||
368 | * Detect a mismatched features2 field. Older kernels read/wrote | ||
369 | * this into the wrong slot, so to be safe we keep them in sync. | ||
370 | */ | ||
371 | static inline bool xfs_sb_has_mismatched_features2(struct xfs_sb *sbp) | ||
372 | { | ||
373 | return sbp->sb_bad_features2 != sbp->sb_features2; | ||
374 | } | ||
375 | |||
376 | static inline bool xfs_sb_version_hasattr(struct xfs_sb *sbp) | ||
377 | { | ||
378 | return (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT); | ||
379 | } | ||
380 | |||
381 | static inline void xfs_sb_version_addattr(struct xfs_sb *sbp) | ||
382 | { | ||
383 | sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT; | ||
384 | } | ||
385 | |||
386 | static inline bool xfs_sb_version_hasquota(struct xfs_sb *sbp) | ||
387 | { | ||
388 | return (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT); | ||
389 | } | ||
390 | |||
391 | static inline void xfs_sb_version_addquota(struct xfs_sb *sbp) | ||
392 | { | ||
393 | sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT; | ||
394 | } | ||
395 | |||
396 | static inline bool xfs_sb_version_hasalign(struct xfs_sb *sbp) | ||
397 | { | ||
398 | return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || | ||
399 | (sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT)); | ||
400 | } | ||
401 | |||
402 | static inline bool xfs_sb_version_hasdalign(struct xfs_sb *sbp) | ||
403 | { | ||
404 | return (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT); | ||
405 | } | ||
406 | |||
407 | static inline bool xfs_sb_version_haslogv2(struct xfs_sb *sbp) | ||
408 | { | ||
409 | return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || | ||
410 | (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT); | ||
411 | } | ||
412 | |||
413 | static inline bool xfs_sb_version_hasextflgbit(struct xfs_sb *sbp) | ||
414 | { | ||
415 | return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || | ||
416 | (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT); | ||
417 | } | ||
418 | |||
419 | static inline bool xfs_sb_version_hassector(struct xfs_sb *sbp) | ||
420 | { | ||
421 | return (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT); | ||
422 | } | ||
423 | |||
424 | static inline bool xfs_sb_version_hasasciici(struct xfs_sb *sbp) | ||
425 | { | ||
426 | return (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT); | ||
427 | } | ||
428 | |||
429 | static inline bool xfs_sb_version_hasmorebits(struct xfs_sb *sbp) | ||
430 | { | ||
431 | return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || | ||
432 | (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT); | ||
433 | } | ||
434 | |||
435 | /* | ||
436 | * sb_features2 bit version macros. | ||
437 | */ | ||
438 | static inline bool xfs_sb_version_haslazysbcount(struct xfs_sb *sbp) | ||
439 | { | ||
440 | return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || | ||
441 | (xfs_sb_version_hasmorebits(sbp) && | ||
442 | (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT)); | ||
443 | } | ||
444 | |||
445 | static inline bool xfs_sb_version_hasattr2(struct xfs_sb *sbp) | ||
446 | { | ||
447 | return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || | ||
448 | (xfs_sb_version_hasmorebits(sbp) && | ||
449 | (sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT)); | ||
450 | } | ||
451 | |||
452 | static inline void xfs_sb_version_addattr2(struct xfs_sb *sbp) | ||
453 | { | ||
454 | sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; | ||
455 | sbp->sb_features2 |= XFS_SB_VERSION2_ATTR2BIT; | ||
456 | sbp->sb_bad_features2 |= XFS_SB_VERSION2_ATTR2BIT; | ||
457 | } | ||
458 | |||
459 | static inline void xfs_sb_version_removeattr2(struct xfs_sb *sbp) | ||
460 | { | ||
461 | sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT; | ||
462 | sbp->sb_bad_features2 &= ~XFS_SB_VERSION2_ATTR2BIT; | ||
463 | if (!sbp->sb_features2) | ||
464 | sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT; | ||
465 | } | ||
466 | |||
467 | static inline bool xfs_sb_version_hasprojid32bit(struct xfs_sb *sbp) | ||
468 | { | ||
469 | return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || | ||
470 | (xfs_sb_version_hasmorebits(sbp) && | ||
471 | (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT)); | ||
472 | } | ||
473 | |||
474 | static inline void xfs_sb_version_addprojid32bit(struct xfs_sb *sbp) | ||
475 | { | ||
476 | sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; | ||
477 | sbp->sb_features2 |= XFS_SB_VERSION2_PROJID32BIT; | ||
478 | sbp->sb_bad_features2 |= XFS_SB_VERSION2_PROJID32BIT; | ||
479 | } | ||
480 | |||
481 | /* | ||
482 | * Extended v5 superblock feature masks. These are to be used for new v5 | ||
483 | * superblock features only. | ||
484 | * | ||
485 | * Compat features are new features that old kernels will not notice or affect | ||
486 | * and so can mount read-write without issues. | ||
487 | * | ||
488 | * RO-Compat (read only) are features that old kernels can read but will break | ||
489 | * if they write. Hence only read-only mounts of such filesystems are allowed on | ||
490 | * kernels that don't support the feature bit. | ||
491 | * | ||
492 | * InCompat features are features which old kernels will not understand and so | ||
493 | * must not mount. | ||
494 | * | ||
495 | * Log-InCompat features are for changes to log formats or new transactions that | ||
496 | * can't be replayed on older kernels. The fields are set when the filesystem is | ||
497 | * mounted, and a clean unmount clears the fields. | ||
498 | */ | ||
499 | #define XFS_SB_FEAT_COMPAT_ALL 0 | ||
500 | #define XFS_SB_FEAT_COMPAT_UNKNOWN ~XFS_SB_FEAT_COMPAT_ALL | ||
501 | static inline bool | ||
502 | xfs_sb_has_compat_feature( | ||
503 | struct xfs_sb *sbp, | ||
504 | __uint32_t feature) | ||
505 | { | ||
506 | return (sbp->sb_features_compat & feature) != 0; | ||
507 | } | ||
508 | |||
509 | #define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */ | ||
510 | #define XFS_SB_FEAT_RO_COMPAT_ALL \ | ||
511 | (XFS_SB_FEAT_RO_COMPAT_FINOBT) | ||
512 | #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL | ||
513 | static inline bool | ||
514 | xfs_sb_has_ro_compat_feature( | ||
515 | struct xfs_sb *sbp, | ||
516 | __uint32_t feature) | ||
517 | { | ||
518 | return (sbp->sb_features_ro_compat & feature) != 0; | ||
519 | } | ||
520 | |||
521 | #define XFS_SB_FEAT_INCOMPAT_FTYPE (1 << 0) /* filetype in dirent */ | ||
522 | #define XFS_SB_FEAT_INCOMPAT_ALL \ | ||
523 | (XFS_SB_FEAT_INCOMPAT_FTYPE) | ||
524 | |||
525 | #define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL | ||
526 | static inline bool | ||
527 | xfs_sb_has_incompat_feature( | ||
528 | struct xfs_sb *sbp, | ||
529 | __uint32_t feature) | ||
530 | { | ||
531 | return (sbp->sb_features_incompat & feature) != 0; | ||
532 | } | ||
533 | |||
534 | #define XFS_SB_FEAT_INCOMPAT_LOG_ALL 0 | ||
535 | #define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_LOG_ALL | ||
536 | static inline bool | ||
537 | xfs_sb_has_incompat_log_feature( | ||
538 | struct xfs_sb *sbp, | ||
539 | __uint32_t feature) | ||
540 | { | ||
541 | return (sbp->sb_features_log_incompat & feature) != 0; | ||
542 | } | ||
543 | |||
544 | /* | ||
545 | * V5 superblock specific feature checks | ||
546 | */ | ||
547 | static inline int xfs_sb_version_hascrc(struct xfs_sb *sbp) | ||
548 | { | ||
549 | return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; | ||
550 | } | ||
551 | |||
552 | static inline int xfs_sb_version_has_pquotino(struct xfs_sb *sbp) | ||
553 | { | ||
554 | return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; | ||
555 | } | ||
556 | |||
557 | static inline int xfs_sb_version_hasftype(struct xfs_sb *sbp) | ||
558 | { | ||
559 | return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && | ||
560 | xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_FTYPE)) || | ||
561 | (xfs_sb_version_hasmorebits(sbp) && | ||
562 | (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE)); | ||
563 | } | ||
564 | |||
565 | static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp) | ||
566 | { | ||
567 | return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) && | ||
568 | (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT); | ||
569 | } | ||
570 | |||
571 | /* | ||
572 | * end of superblock version macros | ||
573 | */ | ||
574 | |||
575 | static inline bool | ||
576 | xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino) | ||
577 | { | ||
578 | return (ino == sbp->sb_uquotino || | ||
579 | ino == sbp->sb_gquotino || | ||
580 | ino == sbp->sb_pquotino); | ||
581 | } | ||
582 | |||
583 | #define XFS_SB_DADDR ((xfs_daddr_t)0) /* daddr in filesystem/ag */ | ||
584 | #define XFS_SB_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_SB_DADDR) | ||
585 | #define XFS_BUF_TO_SBP(bp) ((xfs_dsb_t *)((bp)->b_addr)) | ||
586 | |||
587 | #define XFS_HDR_BLOCK(mp,d) ((xfs_agblock_t)XFS_BB_TO_FSBT(mp,d)) | ||
588 | #define XFS_DADDR_TO_FSB(mp,d) XFS_AGB_TO_FSB(mp, \ | ||
589 | xfs_daddr_to_agno(mp,d), xfs_daddr_to_agbno(mp,d)) | ||
590 | #define XFS_FSB_TO_DADDR(mp,fsbno) XFS_AGB_TO_DADDR(mp, \ | ||
591 | XFS_FSB_TO_AGNO(mp,fsbno), XFS_FSB_TO_AGBNO(mp,fsbno)) | ||
592 | |||
593 | /* | ||
594 | * File system sector to basic block conversions. | ||
595 | */ | ||
596 | #define XFS_FSS_TO_BB(mp,sec) ((sec) << (mp)->m_sectbb_log) | ||
597 | |||
598 | /* | ||
599 | * File system block to basic block conversions. | ||
600 | */ | ||
601 | #define XFS_FSB_TO_BB(mp,fsbno) ((fsbno) << (mp)->m_blkbb_log) | ||
602 | #define XFS_BB_TO_FSB(mp,bb) \ | ||
603 | (((bb) + (XFS_FSB_TO_BB(mp,1) - 1)) >> (mp)->m_blkbb_log) | ||
604 | #define XFS_BB_TO_FSBT(mp,bb) ((bb) >> (mp)->m_blkbb_log) | ||
605 | |||
606 | /* | ||
607 | * File system block to byte conversions. | ||
608 | */ | ||
609 | #define XFS_FSB_TO_B(mp,fsbno) ((xfs_fsize_t)(fsbno) << (mp)->m_sb.sb_blocklog) | ||
610 | #define XFS_B_TO_FSB(mp,b) \ | ||
611 | ((((__uint64_t)(b)) + (mp)->m_blockmask) >> (mp)->m_sb.sb_blocklog) | ||
612 | #define XFS_B_TO_FSBT(mp,b) (((__uint64_t)(b)) >> (mp)->m_sb.sb_blocklog) | ||
613 | #define XFS_B_FSB_OFFSET(mp,b) ((b) & (mp)->m_blockmask) | ||
614 | |||
615 | /* | ||
616 | * Allocation group header | ||
617 | * | ||
618 | * This is divided into three structures, placed in sequential 512-byte | ||
619 | * buffers after a copy of the superblock (also in a 512-byte buffer). | ||
620 | */ | ||
621 | #define XFS_AGF_MAGIC 0x58414746 /* 'XAGF' */ | ||
622 | #define XFS_AGI_MAGIC 0x58414749 /* 'XAGI' */ | ||
623 | #define XFS_AGFL_MAGIC 0x5841464c /* 'XAFL' */ | ||
624 | #define XFS_AGF_VERSION 1 | ||
625 | #define XFS_AGI_VERSION 1 | ||
626 | |||
627 | #define XFS_AGF_GOOD_VERSION(v) ((v) == XFS_AGF_VERSION) | ||
628 | #define XFS_AGI_GOOD_VERSION(v) ((v) == XFS_AGI_VERSION) | ||
629 | |||
630 | /* | ||
631 | * Btree number 0 is bno, 1 is cnt. This value gives the size of the | ||
632 | * arrays below. | ||
633 | */ | ||
634 | #define XFS_BTNUM_AGF ((int)XFS_BTNUM_CNTi + 1) | ||
635 | |||
636 | /* | ||
637 | * The second word of agf_levels in the first a.g. overlaps the EFS | ||
638 | * superblock's magic number. Since the magic numbers valid for EFS | ||
639 | * are > 64k, our value cannot be confused for an EFS superblock's. | ||
640 | */ | ||
641 | |||
642 | typedef struct xfs_agf { | ||
643 | /* | ||
644 | * Common allocation group header information | ||
645 | */ | ||
646 | __be32 agf_magicnum; /* magic number == XFS_AGF_MAGIC */ | ||
647 | __be32 agf_versionnum; /* header version == XFS_AGF_VERSION */ | ||
648 | __be32 agf_seqno; /* sequence # starting from 0 */ | ||
649 | __be32 agf_length; /* size in blocks of a.g. */ | ||
650 | /* | ||
651 | * Freespace information | ||
652 | */ | ||
653 | __be32 agf_roots[XFS_BTNUM_AGF]; /* root blocks */ | ||
654 | __be32 agf_spare0; /* spare field */ | ||
655 | __be32 agf_levels[XFS_BTNUM_AGF]; /* btree levels */ | ||
656 | __be32 agf_spare1; /* spare field */ | ||
657 | |||
658 | __be32 agf_flfirst; /* first freelist block's index */ | ||
659 | __be32 agf_fllast; /* last freelist block's index */ | ||
660 | __be32 agf_flcount; /* count of blocks in freelist */ | ||
661 | __be32 agf_freeblks; /* total free blocks */ | ||
662 | |||
663 | __be32 agf_longest; /* longest free space */ | ||
664 | __be32 agf_btreeblks; /* # of blocks held in AGF btrees */ | ||
665 | uuid_t agf_uuid; /* uuid of filesystem */ | ||
666 | |||
667 | /* | ||
668 | * reserve some contiguous space for future logged fields before we add | ||
669 | * the unlogged fields. This makes the range logging via flags and | ||
670 | * structure offsets much simpler. | ||
671 | */ | ||
672 | __be64 agf_spare64[16]; | ||
673 | |||
674 | /* unlogged fields, written during buffer writeback. */ | ||
675 | __be64 agf_lsn; /* last write sequence */ | ||
676 | __be32 agf_crc; /* crc of agf sector */ | ||
677 | __be32 agf_spare2; | ||
678 | |||
679 | /* structure must be padded to 64 bit alignment */ | ||
680 | } xfs_agf_t; | ||
681 | |||
682 | #define XFS_AGF_CRC_OFF offsetof(struct xfs_agf, agf_crc) | ||
683 | |||
684 | #define XFS_AGF_MAGICNUM 0x00000001 | ||
685 | #define XFS_AGF_VERSIONNUM 0x00000002 | ||
686 | #define XFS_AGF_SEQNO 0x00000004 | ||
687 | #define XFS_AGF_LENGTH 0x00000008 | ||
688 | #define XFS_AGF_ROOTS 0x00000010 | ||
689 | #define XFS_AGF_LEVELS 0x00000020 | ||
690 | #define XFS_AGF_FLFIRST 0x00000040 | ||
691 | #define XFS_AGF_FLLAST 0x00000080 | ||
692 | #define XFS_AGF_FLCOUNT 0x00000100 | ||
693 | #define XFS_AGF_FREEBLKS 0x00000200 | ||
694 | #define XFS_AGF_LONGEST 0x00000400 | ||
695 | #define XFS_AGF_BTREEBLKS 0x00000800 | ||
696 | #define XFS_AGF_UUID 0x00001000 | ||
697 | #define XFS_AGF_NUM_BITS 13 | ||
698 | #define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) | ||
699 | |||
700 | #define XFS_AGF_FLAGS \ | ||
701 | { XFS_AGF_MAGICNUM, "MAGICNUM" }, \ | ||
702 | { XFS_AGF_VERSIONNUM, "VERSIONNUM" }, \ | ||
703 | { XFS_AGF_SEQNO, "SEQNO" }, \ | ||
704 | { XFS_AGF_LENGTH, "LENGTH" }, \ | ||
705 | { XFS_AGF_ROOTS, "ROOTS" }, \ | ||
706 | { XFS_AGF_LEVELS, "LEVELS" }, \ | ||
707 | { XFS_AGF_FLFIRST, "FLFIRST" }, \ | ||
708 | { XFS_AGF_FLLAST, "FLLAST" }, \ | ||
709 | { XFS_AGF_FLCOUNT, "FLCOUNT" }, \ | ||
710 | { XFS_AGF_FREEBLKS, "FREEBLKS" }, \ | ||
711 | { XFS_AGF_LONGEST, "LONGEST" }, \ | ||
712 | { XFS_AGF_BTREEBLKS, "BTREEBLKS" }, \ | ||
713 | { XFS_AGF_UUID, "UUID" } | ||
714 | |||
715 | /* disk block (xfs_daddr_t) in the AG */ | ||
716 | #define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) | ||
717 | #define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp)) | ||
718 | #define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)((bp)->b_addr)) | ||
719 | |||
720 | /* | ||
721 | * Size of the unlinked inode hash table in the agi. | ||
722 | */ | ||
723 | #define XFS_AGI_UNLINKED_BUCKETS 64 | ||
724 | |||
725 | typedef struct xfs_agi { | ||
726 | /* | ||
727 | * Common allocation group header information | ||
728 | */ | ||
729 | __be32 agi_magicnum; /* magic number == XFS_AGI_MAGIC */ | ||
730 | __be32 agi_versionnum; /* header version == XFS_AGI_VERSION */ | ||
731 | __be32 agi_seqno; /* sequence # starting from 0 */ | ||
732 | __be32 agi_length; /* size in blocks of a.g. */ | ||
733 | /* | ||
734 | * Inode information | ||
735 | * Inodes are mapped by interpreting the inode number, so no | ||
736 | * mapping data is needed here. | ||
737 | */ | ||
738 | __be32 agi_count; /* count of allocated inodes */ | ||
739 | __be32 agi_root; /* root of inode btree */ | ||
740 | __be32 agi_level; /* levels in inode btree */ | ||
741 | __be32 agi_freecount; /* number of free inodes */ | ||
742 | |||
743 | __be32 agi_newino; /* new inode just allocated */ | ||
744 | __be32 agi_dirino; /* last directory inode chunk */ | ||
745 | /* | ||
746 | * Hash table of inodes which have been unlinked but are | ||
747 | * still being referenced. | ||
748 | */ | ||
749 | __be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS]; | ||
750 | /* | ||
751 | * This marks the end of logging region 1 and start of logging region 2. | ||
752 | */ | ||
753 | uuid_t agi_uuid; /* uuid of filesystem */ | ||
754 | __be32 agi_crc; /* crc of agi sector */ | ||
755 | __be32 agi_pad32; | ||
756 | __be64 agi_lsn; /* last write sequence */ | ||
757 | |||
758 | __be32 agi_free_root; /* root of the free inode btree */ | ||
759 | __be32 agi_free_level;/* levels in free inode btree */ | ||
760 | |||
761 | /* structure must be padded to 64 bit alignment */ | ||
762 | } xfs_agi_t; | ||
763 | |||
764 | #define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc) | ||
765 | |||
766 | #define XFS_AGI_MAGICNUM (1 << 0) | ||
767 | #define XFS_AGI_VERSIONNUM (1 << 1) | ||
768 | #define XFS_AGI_SEQNO (1 << 2) | ||
769 | #define XFS_AGI_LENGTH (1 << 3) | ||
770 | #define XFS_AGI_COUNT (1 << 4) | ||
771 | #define XFS_AGI_ROOT (1 << 5) | ||
772 | #define XFS_AGI_LEVEL (1 << 6) | ||
773 | #define XFS_AGI_FREECOUNT (1 << 7) | ||
774 | #define XFS_AGI_NEWINO (1 << 8) | ||
775 | #define XFS_AGI_DIRINO (1 << 9) | ||
776 | #define XFS_AGI_UNLINKED (1 << 10) | ||
777 | #define XFS_AGI_NUM_BITS_R1 11 /* end of the 1st agi logging region */ | ||
778 | #define XFS_AGI_ALL_BITS_R1 ((1 << XFS_AGI_NUM_BITS_R1) - 1) | ||
779 | #define XFS_AGI_FREE_ROOT (1 << 11) | ||
780 | #define XFS_AGI_FREE_LEVEL (1 << 12) | ||
781 | #define XFS_AGI_NUM_BITS_R2 13 | ||
782 | |||
783 | /* disk block (xfs_daddr_t) in the AG */ | ||
784 | #define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) | ||
785 | #define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp)) | ||
786 | #define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)((bp)->b_addr)) | ||
787 | |||
788 | /* | ||
789 | * The third a.g. block contains the a.g. freelist, an array | ||
790 | * of block pointers to blocks owned by the allocation btree code. | ||
791 | */ | ||
792 | #define XFS_AGFL_DADDR(mp) ((xfs_daddr_t)(3 << (mp)->m_sectbb_log)) | ||
793 | #define XFS_AGFL_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp)) | ||
794 | #define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)((bp)->b_addr)) | ||
795 | |||
796 | #define XFS_BUF_TO_AGFL_BNO(mp, bp) \ | ||
797 | (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \ | ||
798 | &(XFS_BUF_TO_AGFL(bp)->agfl_bno[0]) : \ | ||
799 | (__be32 *)(bp)->b_addr) | ||
800 | |||
801 | /* | ||
802 | * Size of the AGFL. For CRC-enabled filesystes we steal a couple of | ||
803 | * slots in the beginning of the block for a proper header with the | ||
804 | * location information and CRC. | ||
805 | */ | ||
806 | #define XFS_AGFL_SIZE(mp) \ | ||
807 | (((mp)->m_sb.sb_sectsize - \ | ||
808 | (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \ | ||
809 | sizeof(struct xfs_agfl) : 0)) / \ | ||
810 | sizeof(xfs_agblock_t)) | ||
811 | |||
812 | typedef struct xfs_agfl { | ||
813 | __be32 agfl_magicnum; | ||
814 | __be32 agfl_seqno; | ||
815 | uuid_t agfl_uuid; | ||
816 | __be64 agfl_lsn; | ||
817 | __be32 agfl_crc; | ||
818 | __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */ | ||
819 | } xfs_agfl_t; | ||
820 | |||
821 | #define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc) | ||
822 | |||
823 | |||
824 | #define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) | ||
825 | #define XFS_MIN_FREELIST_RAW(bl,cl,mp) \ | ||
826 | (MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + MIN(cl + 1, XFS_AG_MAXLEVELS(mp))) | ||
827 | #define XFS_MIN_FREELIST(a,mp) \ | ||
828 | (XFS_MIN_FREELIST_RAW( \ | ||
829 | be32_to_cpu((a)->agf_levels[XFS_BTNUM_BNOi]), \ | ||
830 | be32_to_cpu((a)->agf_levels[XFS_BTNUM_CNTi]), mp)) | ||
831 | #define XFS_MIN_FREELIST_PAG(pag,mp) \ | ||
832 | (XFS_MIN_FREELIST_RAW( \ | ||
833 | (unsigned int)(pag)->pagf_levels[XFS_BTNUM_BNOi], \ | ||
834 | (unsigned int)(pag)->pagf_levels[XFS_BTNUM_CNTi], mp)) | ||
835 | |||
836 | #define XFS_AGB_TO_FSB(mp,agno,agbno) \ | ||
837 | (((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno)) | ||
838 | #define XFS_FSB_TO_AGNO(mp,fsbno) \ | ||
839 | ((xfs_agnumber_t)((fsbno) >> (mp)->m_sb.sb_agblklog)) | ||
840 | #define XFS_FSB_TO_AGBNO(mp,fsbno) \ | ||
841 | ((xfs_agblock_t)((fsbno) & xfs_mask32lo((mp)->m_sb.sb_agblklog))) | ||
842 | #define XFS_AGB_TO_DADDR(mp,agno,agbno) \ | ||
843 | ((xfs_daddr_t)XFS_FSB_TO_BB(mp, \ | ||
844 | (xfs_fsblock_t)(agno) * (mp)->m_sb.sb_agblocks + (agbno))) | ||
845 | #define XFS_AG_DADDR(mp,agno,d) (XFS_AGB_TO_DADDR(mp, agno, 0) + (d)) | ||
846 | |||
847 | /* | ||
848 | * For checking for bad ranges of xfs_daddr_t's, covering multiple | ||
849 | * allocation groups or a single xfs_daddr_t that's a superblock copy. | ||
850 | */ | ||
851 | #define XFS_AG_CHECK_DADDR(mp,d,len) \ | ||
852 | ((len) == 1 ? \ | ||
853 | ASSERT((d) == XFS_SB_DADDR || \ | ||
854 | xfs_daddr_to_agbno(mp, d) != XFS_SB_DADDR) : \ | ||
855 | ASSERT(xfs_daddr_to_agno(mp, d) == \ | ||
856 | xfs_daddr_to_agno(mp, (d) + (len) - 1))) | ||
857 | |||
858 | typedef struct xfs_timestamp { | ||
859 | __be32 t_sec; /* timestamp seconds */ | ||
860 | __be32 t_nsec; /* timestamp nanoseconds */ | ||
861 | } xfs_timestamp_t; | ||
862 | |||
863 | /* | ||
864 | * On-disk inode structure. | ||
865 | * | ||
866 | * This is just the header or "dinode core", the inode is expanded to fill a | ||
867 | * variable size the leftover area split into a data and an attribute fork. | ||
868 | * The format of the data and attribute fork depends on the format of the | ||
869 | * inode as indicated by di_format and di_aformat. To access the data and | ||
870 | * attribute use the XFS_DFORK_DPTR, XFS_DFORK_APTR, and XFS_DFORK_PTR macros | ||
871 | * below. | ||
872 | * | ||
873 | * There is a very similar struct icdinode in xfs_inode which matches the | ||
874 | * layout of the first 96 bytes of this structure, but is kept in native | ||
875 | * format instead of big endian. | ||
876 | * | ||
877 | * Note: di_flushiter is only used by v1/2 inodes - it's effectively a zeroed | ||
878 | * padding field for v3 inodes. | ||
879 | */ | ||
880 | #define XFS_DINODE_MAGIC 0x494e /* 'IN' */ | ||
881 | #define XFS_DINODE_GOOD_VERSION(v) ((v) >= 1 && (v) <= 3) | ||
882 | typedef struct xfs_dinode { | ||
883 | __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ | ||
884 | __be16 di_mode; /* mode and type of file */ | ||
885 | __u8 di_version; /* inode version */ | ||
886 | __u8 di_format; /* format of di_c data */ | ||
887 | __be16 di_onlink; /* old number of links to file */ | ||
888 | __be32 di_uid; /* owner's user id */ | ||
889 | __be32 di_gid; /* owner's group id */ | ||
890 | __be32 di_nlink; /* number of links to file */ | ||
891 | __be16 di_projid_lo; /* lower part of owner's project id */ | ||
892 | __be16 di_projid_hi; /* higher part owner's project id */ | ||
893 | __u8 di_pad[6]; /* unused, zeroed space */ | ||
894 | __be16 di_flushiter; /* incremented on flush */ | ||
895 | xfs_timestamp_t di_atime; /* time last accessed */ | ||
896 | xfs_timestamp_t di_mtime; /* time last modified */ | ||
897 | xfs_timestamp_t di_ctime; /* time created/inode modified */ | ||
898 | __be64 di_size; /* number of bytes in file */ | ||
899 | __be64 di_nblocks; /* # of direct & btree blocks used */ | ||
900 | __be32 di_extsize; /* basic/minimum extent size for file */ | ||
901 | __be32 di_nextents; /* number of extents in data fork */ | ||
902 | __be16 di_anextents; /* number of extents in attribute fork*/ | ||
903 | __u8 di_forkoff; /* attr fork offs, <<3 for 64b align */ | ||
904 | __s8 di_aformat; /* format of attr fork's data */ | ||
905 | __be32 di_dmevmask; /* DMIG event mask */ | ||
906 | __be16 di_dmstate; /* DMIG state info */ | ||
907 | __be16 di_flags; /* random flags, XFS_DIFLAG_... */ | ||
908 | __be32 di_gen; /* generation number */ | ||
909 | |||
910 | /* di_next_unlinked is the only non-core field in the old dinode */ | ||
911 | __be32 di_next_unlinked;/* agi unlinked list ptr */ | ||
912 | |||
913 | /* start of the extended dinode, writable fields */ | ||
914 | __le32 di_crc; /* CRC of the inode */ | ||
915 | __be64 di_changecount; /* number of attribute changes */ | ||
916 | __be64 di_lsn; /* flush sequence */ | ||
917 | __be64 di_flags2; /* more random flags */ | ||
918 | __u8 di_pad2[16]; /* more padding for future expansion */ | ||
919 | |||
920 | /* fields only written to during inode creation */ | ||
921 | xfs_timestamp_t di_crtime; /* time created */ | ||
922 | __be64 di_ino; /* inode number */ | ||
923 | uuid_t di_uuid; /* UUID of the filesystem */ | ||
924 | |||
925 | /* structure must be padded to 64 bit alignment */ | ||
926 | } xfs_dinode_t; | ||
927 | |||
928 | #define XFS_DINODE_CRC_OFF offsetof(struct xfs_dinode, di_crc) | ||
929 | |||
930 | #define DI_MAX_FLUSH 0xffff | ||
931 | |||
932 | /* | ||
933 | * Size of the core inode on disk. Version 1 and 2 inodes have | ||
934 | * the same size, but version 3 has grown a few additional fields. | ||
935 | */ | ||
936 | static inline uint xfs_dinode_size(int version) | ||
937 | { | ||
938 | if (version == 3) | ||
939 | return sizeof(struct xfs_dinode); | ||
940 | return offsetof(struct xfs_dinode, di_crc); | ||
941 | } | ||
942 | |||
943 | /* | ||
944 | * The 32 bit link count in the inode theoretically maxes out at UINT_MAX. | ||
945 | * Since the pathconf interface is signed, we use 2^31 - 1 instead. | ||
946 | * The old inode format had a 16 bit link count, so its maximum is USHRT_MAX. | ||
947 | */ | ||
948 | #define XFS_MAXLINK ((1U << 31) - 1U) | ||
949 | #define XFS_MAXLINK_1 65535U | ||
950 | |||
951 | /* | ||
952 | * Values for di_format | ||
953 | */ | ||
954 | typedef enum xfs_dinode_fmt { | ||
955 | XFS_DINODE_FMT_DEV, /* xfs_dev_t */ | ||
956 | XFS_DINODE_FMT_LOCAL, /* bulk data */ | ||
957 | XFS_DINODE_FMT_EXTENTS, /* struct xfs_bmbt_rec */ | ||
958 | XFS_DINODE_FMT_BTREE, /* struct xfs_bmdr_block */ | ||
959 | XFS_DINODE_FMT_UUID /* uuid_t */ | ||
960 | } xfs_dinode_fmt_t; | ||
961 | |||
962 | /* | ||
963 | * Inode minimum and maximum sizes. | ||
964 | */ | ||
965 | #define XFS_DINODE_MIN_LOG 8 | ||
966 | #define XFS_DINODE_MAX_LOG 11 | ||
967 | #define XFS_DINODE_MIN_SIZE (1 << XFS_DINODE_MIN_LOG) | ||
968 | #define XFS_DINODE_MAX_SIZE (1 << XFS_DINODE_MAX_LOG) | ||
969 | |||
970 | /* | ||
971 | * Inode size for given fs. | ||
972 | */ | ||
973 | #define XFS_LITINO(mp, version) \ | ||
974 | ((int)(((mp)->m_sb.sb_inodesize) - xfs_dinode_size(version))) | ||
975 | |||
976 | /* | ||
977 | * Inode data & attribute fork sizes, per inode. | ||
978 | */ | ||
979 | #define XFS_DFORK_Q(dip) ((dip)->di_forkoff != 0) | ||
980 | #define XFS_DFORK_BOFF(dip) ((int)((dip)->di_forkoff << 3)) | ||
981 | |||
982 | #define XFS_DFORK_DSIZE(dip,mp) \ | ||
983 | (XFS_DFORK_Q(dip) ? \ | ||
984 | XFS_DFORK_BOFF(dip) : \ | ||
985 | XFS_LITINO(mp, (dip)->di_version)) | ||
986 | #define XFS_DFORK_ASIZE(dip,mp) \ | ||
987 | (XFS_DFORK_Q(dip) ? \ | ||
988 | XFS_LITINO(mp, (dip)->di_version) - XFS_DFORK_BOFF(dip) : \ | ||
989 | 0) | ||
990 | #define XFS_DFORK_SIZE(dip,mp,w) \ | ||
991 | ((w) == XFS_DATA_FORK ? \ | ||
992 | XFS_DFORK_DSIZE(dip, mp) : \ | ||
993 | XFS_DFORK_ASIZE(dip, mp)) | ||
994 | |||
995 | /* | ||
996 | * Return pointers to the data or attribute forks. | ||
997 | */ | ||
998 | #define XFS_DFORK_DPTR(dip) \ | ||
999 | ((char *)dip + xfs_dinode_size(dip->di_version)) | ||
1000 | #define XFS_DFORK_APTR(dip) \ | ||
1001 | (XFS_DFORK_DPTR(dip) + XFS_DFORK_BOFF(dip)) | ||
1002 | #define XFS_DFORK_PTR(dip,w) \ | ||
1003 | ((w) == XFS_DATA_FORK ? XFS_DFORK_DPTR(dip) : XFS_DFORK_APTR(dip)) | ||
1004 | |||
1005 | #define XFS_DFORK_FORMAT(dip,w) \ | ||
1006 | ((w) == XFS_DATA_FORK ? \ | ||
1007 | (dip)->di_format : \ | ||
1008 | (dip)->di_aformat) | ||
1009 | #define XFS_DFORK_NEXTENTS(dip,w) \ | ||
1010 | ((w) == XFS_DATA_FORK ? \ | ||
1011 | be32_to_cpu((dip)->di_nextents) : \ | ||
1012 | be16_to_cpu((dip)->di_anextents)) | ||
1013 | |||
1014 | /* | ||
1015 | * For block and character special files the 32bit dev_t is stored at the | ||
1016 | * beginning of the data fork. | ||
1017 | */ | ||
1018 | static inline xfs_dev_t xfs_dinode_get_rdev(struct xfs_dinode *dip) | ||
1019 | { | ||
1020 | return be32_to_cpu(*(__be32 *)XFS_DFORK_DPTR(dip)); | ||
1021 | } | ||
1022 | |||
1023 | static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) | ||
1024 | { | ||
1025 | *(__be32 *)XFS_DFORK_DPTR(dip) = cpu_to_be32(rdev); | ||
1026 | } | ||
1027 | |||
1028 | /* | ||
1029 | * Values for di_flags | ||
1030 | * There should be a one-to-one correspondence between these flags and the | ||
1031 | * XFS_XFLAG_s. | ||
1032 | */ | ||
1033 | #define XFS_DIFLAG_REALTIME_BIT 0 /* file's blocks come from rt area */ | ||
1034 | #define XFS_DIFLAG_PREALLOC_BIT 1 /* file space has been preallocated */ | ||
1035 | #define XFS_DIFLAG_NEWRTBM_BIT 2 /* for rtbitmap inode, new format */ | ||
1036 | #define XFS_DIFLAG_IMMUTABLE_BIT 3 /* inode is immutable */ | ||
1037 | #define XFS_DIFLAG_APPEND_BIT 4 /* inode is append-only */ | ||
1038 | #define XFS_DIFLAG_SYNC_BIT 5 /* inode is written synchronously */ | ||
1039 | #define XFS_DIFLAG_NOATIME_BIT 6 /* do not update atime */ | ||
1040 | #define XFS_DIFLAG_NODUMP_BIT 7 /* do not dump */ | ||
1041 | #define XFS_DIFLAG_RTINHERIT_BIT 8 /* create with realtime bit set */ | ||
1042 | #define XFS_DIFLAG_PROJINHERIT_BIT 9 /* create with parents projid */ | ||
1043 | #define XFS_DIFLAG_NOSYMLINKS_BIT 10 /* disallow symlink creation */ | ||
1044 | #define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */ | ||
1045 | #define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */ | ||
1046 | #define XFS_DIFLAG_NODEFRAG_BIT 13 /* do not reorganize/defragment */ | ||
1047 | #define XFS_DIFLAG_FILESTREAM_BIT 14 /* use filestream allocator */ | ||
1048 | #define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) | ||
1049 | #define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) | ||
1050 | #define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT) | ||
1051 | #define XFS_DIFLAG_IMMUTABLE (1 << XFS_DIFLAG_IMMUTABLE_BIT) | ||
1052 | #define XFS_DIFLAG_APPEND (1 << XFS_DIFLAG_APPEND_BIT) | ||
1053 | #define XFS_DIFLAG_SYNC (1 << XFS_DIFLAG_SYNC_BIT) | ||
1054 | #define XFS_DIFLAG_NOATIME (1 << XFS_DIFLAG_NOATIME_BIT) | ||
1055 | #define XFS_DIFLAG_NODUMP (1 << XFS_DIFLAG_NODUMP_BIT) | ||
1056 | #define XFS_DIFLAG_RTINHERIT (1 << XFS_DIFLAG_RTINHERIT_BIT) | ||
1057 | #define XFS_DIFLAG_PROJINHERIT (1 << XFS_DIFLAG_PROJINHERIT_BIT) | ||
1058 | #define XFS_DIFLAG_NOSYMLINKS (1 << XFS_DIFLAG_NOSYMLINKS_BIT) | ||
1059 | #define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT) | ||
1060 | #define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT) | ||
1061 | #define XFS_DIFLAG_NODEFRAG (1 << XFS_DIFLAG_NODEFRAG_BIT) | ||
1062 | #define XFS_DIFLAG_FILESTREAM (1 << XFS_DIFLAG_FILESTREAM_BIT) | ||
1063 | |||
1064 | #define XFS_DIFLAG_ANY \ | ||
1065 | (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \ | ||
1066 | XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \ | ||
1067 | XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \ | ||
1068 | XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \ | ||
1069 | XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM) | ||
1070 | |||
1071 | /* | ||
1072 | * Inode number format: | ||
1073 | * low inopblog bits - offset in block | ||
1074 | * next agblklog bits - block number in ag | ||
1075 | * next agno_log bits - ag number | ||
1076 | * high agno_log-agblklog-inopblog bits - 0 | ||
1077 | */ | ||
1078 | #define XFS_INO_MASK(k) (__uint32_t)((1ULL << (k)) - 1) | ||
1079 | #define XFS_INO_OFFSET_BITS(mp) (mp)->m_sb.sb_inopblog | ||
1080 | #define XFS_INO_AGBNO_BITS(mp) (mp)->m_sb.sb_agblklog | ||
1081 | #define XFS_INO_AGINO_BITS(mp) (mp)->m_agino_log | ||
1082 | #define XFS_INO_AGNO_BITS(mp) (mp)->m_agno_log | ||
1083 | #define XFS_INO_BITS(mp) \ | ||
1084 | XFS_INO_AGNO_BITS(mp) + XFS_INO_AGINO_BITS(mp) | ||
1085 | #define XFS_INO_TO_AGNO(mp,i) \ | ||
1086 | ((xfs_agnumber_t)((i) >> XFS_INO_AGINO_BITS(mp))) | ||
1087 | #define XFS_INO_TO_AGINO(mp,i) \ | ||
1088 | ((xfs_agino_t)(i) & XFS_INO_MASK(XFS_INO_AGINO_BITS(mp))) | ||
1089 | #define XFS_INO_TO_AGBNO(mp,i) \ | ||
1090 | (((xfs_agblock_t)(i) >> XFS_INO_OFFSET_BITS(mp)) & \ | ||
1091 | XFS_INO_MASK(XFS_INO_AGBNO_BITS(mp))) | ||
1092 | #define XFS_INO_TO_OFFSET(mp,i) \ | ||
1093 | ((int)(i) & XFS_INO_MASK(XFS_INO_OFFSET_BITS(mp))) | ||
1094 | #define XFS_INO_TO_FSB(mp,i) \ | ||
1095 | XFS_AGB_TO_FSB(mp, XFS_INO_TO_AGNO(mp,i), XFS_INO_TO_AGBNO(mp,i)) | ||
1096 | #define XFS_AGINO_TO_INO(mp,a,i) \ | ||
1097 | (((xfs_ino_t)(a) << XFS_INO_AGINO_BITS(mp)) | (i)) | ||
1098 | #define XFS_AGINO_TO_AGBNO(mp,i) ((i) >> XFS_INO_OFFSET_BITS(mp)) | ||
1099 | #define XFS_AGINO_TO_OFFSET(mp,i) \ | ||
1100 | ((i) & XFS_INO_MASK(XFS_INO_OFFSET_BITS(mp))) | ||
1101 | #define XFS_OFFBNO_TO_AGINO(mp,b,o) \ | ||
1102 | ((xfs_agino_t)(((b) << XFS_INO_OFFSET_BITS(mp)) | (o))) | ||
1103 | |||
1104 | #define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 56) - 1ULL)) | ||
1105 | #define XFS_MAXINUMBER_32 ((xfs_ino_t)((1ULL << 32) - 1ULL)) | ||
1106 | |||
1107 | /* | ||
37 | * RealTime Device format definitions | 1108 | * RealTime Device format definitions |
38 | */ | 1109 | */ |
39 | 1110 | ||
@@ -413,4 +1484,40 @@ struct xfs_btree_block { | |||
413 | #define XFS_BTREE_LBLOCK_CRC_OFF \ | 1484 | #define XFS_BTREE_LBLOCK_CRC_OFF \ |
414 | offsetof(struct xfs_btree_block, bb_u.l.bb_crc) | 1485 | offsetof(struct xfs_btree_block, bb_u.l.bb_crc) |
415 | 1486 | ||
1487 | /* | ||
1488 | * On-disk XFS access control list structure. | ||
1489 | */ | ||
1490 | struct xfs_acl_entry { | ||
1491 | __be32 ae_tag; | ||
1492 | __be32 ae_id; | ||
1493 | __be16 ae_perm; | ||
1494 | __be16 ae_pad; /* fill the implicit hole in the structure */ | ||
1495 | }; | ||
1496 | |||
1497 | struct xfs_acl { | ||
1498 | __be32 acl_cnt; | ||
1499 | struct xfs_acl_entry acl_entry[0]; | ||
1500 | }; | ||
1501 | |||
1502 | /* | ||
1503 | * The number of ACL entries allowed is defined by the on-disk format. | ||
1504 | * For v4 superblocks, that is limited to 25 entries. For v5 superblocks, it is | ||
1505 | * limited only by the maximum size of the xattr that stores the information. | ||
1506 | */ | ||
1507 | #define XFS_ACL_MAX_ENTRIES(mp) \ | ||
1508 | (xfs_sb_version_hascrc(&mp->m_sb) \ | ||
1509 | ? (XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \ | ||
1510 | sizeof(struct xfs_acl_entry) \ | ||
1511 | : 25) | ||
1512 | |||
1513 | #define XFS_ACL_MAX_SIZE(mp) \ | ||
1514 | (sizeof(struct xfs_acl) + \ | ||
1515 | sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp))) | ||
1516 | |||
1517 | /* On-disk XFS extended attribute names */ | ||
1518 | #define SGI_ACL_FILE (unsigned char *)"SGI_ACL_FILE" | ||
1519 | #define SGI_ACL_DEFAULT (unsigned char *)"SGI_ACL_DEFAULT" | ||
1520 | #define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1) | ||
1521 | #define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) | ||
1522 | |||
416 | #endif /* __XFS_FORMAT_H__ */ | 1523 | #endif /* __XFS_FORMAT_H__ */ |
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 23dcb72fc5e6..116ef1ddb3e3 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c | |||
@@ -22,9 +22,7 @@ | |||
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_bit.h" | 24 | #include "xfs_bit.h" |
25 | #include "xfs_inum.h" | ||
26 | #include "xfs_sb.h" | 25 | #include "xfs_sb.h" |
27 | #include "xfs_ag.h" | ||
28 | #include "xfs_mount.h" | 26 | #include "xfs_mount.h" |
29 | #include "xfs_inode.h" | 27 | #include "xfs_inode.h" |
30 | #include "xfs_btree.h" | 28 | #include "xfs_btree.h" |
@@ -39,7 +37,6 @@ | |||
39 | #include "xfs_buf_item.h" | 37 | #include "xfs_buf_item.h" |
40 | #include "xfs_icreate_item.h" | 38 | #include "xfs_icreate_item.h" |
41 | #include "xfs_icache.h" | 39 | #include "xfs_icache.h" |
42 | #include "xfs_dinode.h" | ||
43 | #include "xfs_trace.h" | 40 | #include "xfs_trace.h" |
44 | 41 | ||
45 | 42 | ||
@@ -48,12 +45,12 @@ | |||
48 | */ | 45 | */ |
49 | static inline int | 46 | static inline int |
50 | xfs_ialloc_cluster_alignment( | 47 | xfs_ialloc_cluster_alignment( |
51 | xfs_alloc_arg_t *args) | 48 | struct xfs_mount *mp) |
52 | { | 49 | { |
53 | if (xfs_sb_version_hasalign(&args->mp->m_sb) && | 50 | if (xfs_sb_version_hasalign(&mp->m_sb) && |
54 | args->mp->m_sb.sb_inoalignmt >= | 51 | mp->m_sb.sb_inoalignmt >= |
55 | XFS_B_TO_FSBT(args->mp, args->mp->m_inode_cluster_size)) | 52 | XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) |
56 | return args->mp->m_sb.sb_inoalignmt; | 53 | return mp->m_sb.sb_inoalignmt; |
57 | return 1; | 54 | return 1; |
58 | } | 55 | } |
59 | 56 | ||
@@ -412,7 +409,7 @@ xfs_ialloc_ag_alloc( | |||
412 | * but not to use them in the actual exact allocation. | 409 | * but not to use them in the actual exact allocation. |
413 | */ | 410 | */ |
414 | args.alignment = 1; | 411 | args.alignment = 1; |
415 | args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1; | 412 | args.minalignslop = xfs_ialloc_cluster_alignment(args.mp) - 1; |
416 | 413 | ||
417 | /* Allow space for the inode btree to split. */ | 414 | /* Allow space for the inode btree to split. */ |
418 | args.minleft = args.mp->m_in_maxlevels - 1; | 415 | args.minleft = args.mp->m_in_maxlevels - 1; |
@@ -448,7 +445,7 @@ xfs_ialloc_ag_alloc( | |||
448 | args.alignment = args.mp->m_dalign; | 445 | args.alignment = args.mp->m_dalign; |
449 | isaligned = 1; | 446 | isaligned = 1; |
450 | } else | 447 | } else |
451 | args.alignment = xfs_ialloc_cluster_alignment(&args); | 448 | args.alignment = xfs_ialloc_cluster_alignment(args.mp); |
452 | /* | 449 | /* |
453 | * Need to figure out where to allocate the inode blocks. | 450 | * Need to figure out where to allocate the inode blocks. |
454 | * Ideally they should be spaced out through the a.g. | 451 | * Ideally they should be spaced out through the a.g. |
@@ -477,7 +474,7 @@ xfs_ialloc_ag_alloc( | |||
477 | args.type = XFS_ALLOCTYPE_NEAR_BNO; | 474 | args.type = XFS_ALLOCTYPE_NEAR_BNO; |
478 | args.agbno = be32_to_cpu(agi->agi_root); | 475 | args.agbno = be32_to_cpu(agi->agi_root); |
479 | args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); | 476 | args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); |
480 | args.alignment = xfs_ialloc_cluster_alignment(&args); | 477 | args.alignment = xfs_ialloc_cluster_alignment(args.mp); |
481 | if ((error = xfs_alloc_vextent(&args))) | 478 | if ((error = xfs_alloc_vextent(&args))) |
482 | return error; | 479 | return error; |
483 | } | 480 | } |
@@ -632,10 +629,24 @@ xfs_ialloc_ag_select( | |||
632 | } | 629 | } |
633 | 630 | ||
634 | /* | 631 | /* |
635 | * Is there enough free space for the file plus a block of | 632 | * Check that there is enough free space for the file plus a |
636 | * inodes? (if we need to allocate some)? | 633 | * chunk of inodes if we need to allocate some. If this is the |
634 | * first pass across the AGs, take into account the potential | ||
635 | * space needed for alignment of inode chunks when checking the | ||
636 | * longest contiguous free space in the AG - this prevents us | ||
637 | * from getting ENOSPC because we have free space larger than | ||
638 | * m_ialloc_blks but alignment constraints prevent us from using | ||
639 | * it. | ||
640 | * | ||
641 | * If we can't find an AG with space for full alignment slack to | ||
642 | * be taken into account, we must be near ENOSPC in all AGs. | ||
643 | * Hence we don't include alignment for the second pass and so | ||
644 | * if we fail allocation due to alignment issues then it is most | ||
645 | * likely a real ENOSPC condition. | ||
637 | */ | 646 | */ |
638 | ineed = mp->m_ialloc_blks; | 647 | ineed = mp->m_ialloc_blks; |
648 | if (flags && ineed > 1) | ||
649 | ineed += xfs_ialloc_cluster_alignment(mp); | ||
639 | longest = pag->pagf_longest; | 650 | longest = pag->pagf_longest; |
640 | if (!longest) | 651 | if (!longest) |
641 | longest = pag->pagf_flcount > 0; | 652 | longest = pag->pagf_flcount > 0; |
@@ -1137,11 +1148,7 @@ xfs_dialloc_ag_update_inobt( | |||
1137 | XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) && | 1148 | XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) && |
1138 | (rec.ir_freecount == frec->ir_freecount)); | 1149 | (rec.ir_freecount == frec->ir_freecount)); |
1139 | 1150 | ||
1140 | error = xfs_inobt_update(cur, &rec); | 1151 | return xfs_inobt_update(cur, &rec); |
1141 | if (error) | ||
1142 | return error; | ||
1143 | |||
1144 | return 0; | ||
1145 | } | 1152 | } |
1146 | 1153 | ||
1147 | /* | 1154 | /* |
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index 95ad1c002d60..100007d56449 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h | |||
@@ -160,4 +160,8 @@ int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp, | |||
160 | xfs_agnumber_t agno, xfs_agblock_t agbno, | 160 | xfs_agnumber_t agno, xfs_agblock_t agbno, |
161 | xfs_agblock_t length, unsigned int gen); | 161 | xfs_agblock_t length, unsigned int gen); |
162 | 162 | ||
163 | int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, | ||
164 | xfs_agnumber_t agno, struct xfs_buf **bpp); | ||
165 | |||
166 | |||
163 | #endif /* __XFS_IALLOC_H__ */ | 167 | #endif /* __XFS_IALLOC_H__ */ |
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index c9b06f30fe86..964c465ca69c 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c | |||
@@ -22,8 +22,6 @@ | |||
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_bit.h" | 24 | #include "xfs_bit.h" |
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
28 | #include "xfs_inode.h" | 26 | #include "xfs_inode.h" |
29 | #include "xfs_btree.h" | 27 | #include "xfs_btree.h" |
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index f18fd2da49f7..002b6b3a1988 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c | |||
@@ -21,8 +21,6 @@ | |||
21 | #include "xfs_format.h" | 21 | #include "xfs_format.h" |
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
27 | #include "xfs_inode.h" | 25 | #include "xfs_inode.h" |
28 | #include "xfs_error.h" | 26 | #include "xfs_error.h" |
@@ -30,7 +28,6 @@ | |||
30 | #include "xfs_icache.h" | 28 | #include "xfs_icache.h" |
31 | #include "xfs_trans.h" | 29 | #include "xfs_trans.h" |
32 | #include "xfs_ialloc.h" | 30 | #include "xfs_ialloc.h" |
33 | #include "xfs_dinode.h" | ||
34 | 31 | ||
35 | /* | 32 | /* |
36 | * Check that none of the inode's in the buffer have a next | 33 | * Check that none of the inode's in the buffer have a next |
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 6a00f7fed69d..0defbd02f62d 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c | |||
@@ -22,9 +22,6 @@ | |||
22 | #include "xfs_format.h" | 22 | #include "xfs_format.h" |
23 | #include "xfs_log_format.h" | 23 | #include "xfs_log_format.h" |
24 | #include "xfs_trans_resv.h" | 24 | #include "xfs_trans_resv.h" |
25 | #include "xfs_inum.h" | ||
26 | #include "xfs_sb.h" | ||
27 | #include "xfs_ag.h" | ||
28 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
29 | #include "xfs_inode.h" | 26 | #include "xfs_inode.h" |
30 | #include "xfs_trans.h" | 27 | #include "xfs_trans.h" |
@@ -34,7 +31,6 @@ | |||
34 | #include "xfs_error.h" | 31 | #include "xfs_error.h" |
35 | #include "xfs_trace.h" | 32 | #include "xfs_trace.h" |
36 | #include "xfs_attr_sf.h" | 33 | #include "xfs_attr_sf.h" |
37 | #include "xfs_dinode.h" | ||
38 | 34 | ||
39 | kmem_zone_t *xfs_ifork_zone; | 35 | kmem_zone_t *xfs_ifork_zone; |
40 | 36 | ||
diff --git a/fs/xfs/libxfs/xfs_inum.h b/fs/xfs/libxfs/xfs_inum.h deleted file mode 100644 index 4ff2278e147a..000000000000 --- a/fs/xfs/libxfs/xfs_inum.h +++ /dev/null | |||
@@ -1,60 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_INUM_H__ | ||
19 | #define __XFS_INUM_H__ | ||
20 | |||
21 | /* | ||
22 | * Inode number format: | ||
23 | * low inopblog bits - offset in block | ||
24 | * next agblklog bits - block number in ag | ||
25 | * next agno_log bits - ag number | ||
26 | * high agno_log-agblklog-inopblog bits - 0 | ||
27 | */ | ||
28 | |||
29 | struct xfs_mount; | ||
30 | |||
31 | #define XFS_INO_MASK(k) (__uint32_t)((1ULL << (k)) - 1) | ||
32 | #define XFS_INO_OFFSET_BITS(mp) (mp)->m_sb.sb_inopblog | ||
33 | #define XFS_INO_AGBNO_BITS(mp) (mp)->m_sb.sb_agblklog | ||
34 | #define XFS_INO_AGINO_BITS(mp) (mp)->m_agino_log | ||
35 | #define XFS_INO_AGNO_BITS(mp) (mp)->m_agno_log | ||
36 | #define XFS_INO_BITS(mp) \ | ||
37 | XFS_INO_AGNO_BITS(mp) + XFS_INO_AGINO_BITS(mp) | ||
38 | #define XFS_INO_TO_AGNO(mp,i) \ | ||
39 | ((xfs_agnumber_t)((i) >> XFS_INO_AGINO_BITS(mp))) | ||
40 | #define XFS_INO_TO_AGINO(mp,i) \ | ||
41 | ((xfs_agino_t)(i) & XFS_INO_MASK(XFS_INO_AGINO_BITS(mp))) | ||
42 | #define XFS_INO_TO_AGBNO(mp,i) \ | ||
43 | (((xfs_agblock_t)(i) >> XFS_INO_OFFSET_BITS(mp)) & \ | ||
44 | XFS_INO_MASK(XFS_INO_AGBNO_BITS(mp))) | ||
45 | #define XFS_INO_TO_OFFSET(mp,i) \ | ||
46 | ((int)(i) & XFS_INO_MASK(XFS_INO_OFFSET_BITS(mp))) | ||
47 | #define XFS_INO_TO_FSB(mp,i) \ | ||
48 | XFS_AGB_TO_FSB(mp, XFS_INO_TO_AGNO(mp,i), XFS_INO_TO_AGBNO(mp,i)) | ||
49 | #define XFS_AGINO_TO_INO(mp,a,i) \ | ||
50 | (((xfs_ino_t)(a) << XFS_INO_AGINO_BITS(mp)) | (i)) | ||
51 | #define XFS_AGINO_TO_AGBNO(mp,i) ((i) >> XFS_INO_OFFSET_BITS(mp)) | ||
52 | #define XFS_AGINO_TO_OFFSET(mp,i) \ | ||
53 | ((i) & XFS_INO_MASK(XFS_INO_OFFSET_BITS(mp))) | ||
54 | #define XFS_OFFBNO_TO_AGINO(mp,b,o) \ | ||
55 | ((xfs_agino_t)(((b) << XFS_INO_OFFSET_BITS(mp)) | (o))) | ||
56 | |||
57 | #define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 56) - 1ULL)) | ||
58 | #define XFS_MAXINUMBER_32 ((xfs_ino_t)((1ULL << 32) - 1ULL)) | ||
59 | |||
60 | #endif /* __XFS_INUM_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index aff12f2d4428..265314690415 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h | |||
@@ -361,7 +361,7 @@ typedef struct xfs_ictimestamp { | |||
361 | 361 | ||
362 | /* | 362 | /* |
363 | * NOTE: This structure must be kept identical to struct xfs_dinode | 363 | * NOTE: This structure must be kept identical to struct xfs_dinode |
364 | * in xfs_dinode.h except for the endianness annotations. | 364 | * except for the endianness annotations. |
365 | */ | 365 | */ |
366 | typedef struct xfs_icdinode { | 366 | typedef struct xfs_icdinode { |
367 | __uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */ | 367 | __uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */ |
diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c index ee7e0e80246b..c10597973333 100644 --- a/fs/xfs/libxfs/xfs_log_rlimit.c +++ b/fs/xfs/libxfs/xfs_log_rlimit.c | |||
@@ -21,8 +21,6 @@ | |||
21 | #include "xfs_format.h" | 21 | #include "xfs_format.h" |
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_ag.h" | ||
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
27 | #include "xfs_da_format.h" | 25 | #include "xfs_da_format.h" |
28 | #include "xfs_trans_space.h" | 26 | #include "xfs_trans_space.h" |
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 7c818f1e4484..9b59ffa1fc19 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c | |||
@@ -22,8 +22,6 @@ | |||
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_bit.h" | 24 | #include "xfs_bit.h" |
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
28 | #include "xfs_inode.h" | 26 | #include "xfs_inode.h" |
29 | #include "xfs_bmap.h" | 27 | #include "xfs_bmap.h" |
@@ -36,7 +34,6 @@ | |||
36 | #include "xfs_trace.h" | 34 | #include "xfs_trace.h" |
37 | #include "xfs_buf.h" | 35 | #include "xfs_buf.h" |
38 | #include "xfs_icache.h" | 36 | #include "xfs_icache.h" |
39 | #include "xfs_dinode.h" | ||
40 | #include "xfs_rtalloc.h" | 37 | #include "xfs_rtalloc.h" |
41 | 38 | ||
42 | 39 | ||
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 5f902fa7913f..752915fa775a 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_bit.h" | 24 | #include "xfs_bit.h" |
25 | #include "xfs_sb.h" | 25 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | 26 | #include "xfs_mount.h" |
28 | #include "xfs_inode.h" | 27 | #include "xfs_inode.h" |
29 | #include "xfs_ialloc.h" | 28 | #include "xfs_ialloc.h" |
@@ -33,7 +32,6 @@ | |||
33 | #include "xfs_cksum.h" | 32 | #include "xfs_cksum.h" |
34 | #include "xfs_trans.h" | 33 | #include "xfs_trans.h" |
35 | #include "xfs_buf_item.h" | 34 | #include "xfs_buf_item.h" |
36 | #include "xfs_dinode.h" | ||
37 | #include "xfs_bmap_btree.h" | 35 | #include "xfs_bmap_btree.h" |
38 | #include "xfs_alloc_btree.h" | 36 | #include "xfs_alloc_btree.h" |
39 | #include "xfs_ialloc_btree.h" | 37 | #include "xfs_ialloc_btree.h" |
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 2e739708afd3..8eb1c54bafbf 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h | |||
@@ -19,590 +19,6 @@ | |||
19 | #define __XFS_SB_H__ | 19 | #define __XFS_SB_H__ |
20 | 20 | ||
21 | /* | 21 | /* |
22 | * Super block | ||
23 | * Fits into a sector-sized buffer at address 0 of each allocation group. | ||
24 | * Only the first of these is ever updated except during growfs. | ||
25 | */ | ||
26 | |||
27 | struct xfs_buf; | ||
28 | struct xfs_mount; | ||
29 | struct xfs_trans; | ||
30 | |||
31 | #define XFS_SB_MAGIC 0x58465342 /* 'XFSB' */ | ||
32 | #define XFS_SB_VERSION_1 1 /* 5.3, 6.0.1, 6.1 */ | ||
33 | #define XFS_SB_VERSION_2 2 /* 6.2 - attributes */ | ||
34 | #define XFS_SB_VERSION_3 3 /* 6.2 - new inode version */ | ||
35 | #define XFS_SB_VERSION_4 4 /* 6.2+ - bitmask version */ | ||
36 | #define XFS_SB_VERSION_5 5 /* CRC enabled filesystem */ | ||
37 | #define XFS_SB_VERSION_NUMBITS 0x000f | ||
38 | #define XFS_SB_VERSION_ALLFBITS 0xfff0 | ||
39 | #define XFS_SB_VERSION_ATTRBIT 0x0010 | ||
40 | #define XFS_SB_VERSION_NLINKBIT 0x0020 | ||
41 | #define XFS_SB_VERSION_QUOTABIT 0x0040 | ||
42 | #define XFS_SB_VERSION_ALIGNBIT 0x0080 | ||
43 | #define XFS_SB_VERSION_DALIGNBIT 0x0100 | ||
44 | #define XFS_SB_VERSION_SHAREDBIT 0x0200 | ||
45 | #define XFS_SB_VERSION_LOGV2BIT 0x0400 | ||
46 | #define XFS_SB_VERSION_SECTORBIT 0x0800 | ||
47 | #define XFS_SB_VERSION_EXTFLGBIT 0x1000 | ||
48 | #define XFS_SB_VERSION_DIRV2BIT 0x2000 | ||
49 | #define XFS_SB_VERSION_BORGBIT 0x4000 /* ASCII only case-insens. */ | ||
50 | #define XFS_SB_VERSION_MOREBITSBIT 0x8000 | ||
51 | |||
52 | /* | ||
53 | * Supported feature bit list is just all bits in the versionnum field because | ||
54 | * we've used them all up and understand them all. Except, of course, for the | ||
55 | * shared superblock bit, which nobody knows what it does and so is unsupported. | ||
56 | */ | ||
57 | #define XFS_SB_VERSION_OKBITS \ | ||
58 | ((XFS_SB_VERSION_NUMBITS | XFS_SB_VERSION_ALLFBITS) & \ | ||
59 | ~XFS_SB_VERSION_SHAREDBIT) | ||
60 | |||
61 | /* | ||
62 | * There are two words to hold XFS "feature" bits: the original | ||
63 | * word, sb_versionnum, and sb_features2. Whenever a bit is set in | ||
64 | * sb_features2, the feature bit XFS_SB_VERSION_MOREBITSBIT must be set. | ||
65 | * | ||
66 | * These defines represent bits in sb_features2. | ||
67 | */ | ||
68 | #define XFS_SB_VERSION2_RESERVED1BIT 0x00000001 | ||
69 | #define XFS_SB_VERSION2_LAZYSBCOUNTBIT 0x00000002 /* Superblk counters */ | ||
70 | #define XFS_SB_VERSION2_RESERVED4BIT 0x00000004 | ||
71 | #define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ | ||
72 | #define XFS_SB_VERSION2_PARENTBIT 0x00000010 /* parent pointers */ | ||
73 | #define XFS_SB_VERSION2_PROJID32BIT 0x00000080 /* 32 bit project id */ | ||
74 | #define XFS_SB_VERSION2_CRCBIT 0x00000100 /* metadata CRCs */ | ||
75 | #define XFS_SB_VERSION2_FTYPE 0x00000200 /* inode type in dir */ | ||
76 | |||
77 | #define XFS_SB_VERSION2_OKBITS \ | ||
78 | (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ | ||
79 | XFS_SB_VERSION2_ATTR2BIT | \ | ||
80 | XFS_SB_VERSION2_PROJID32BIT | \ | ||
81 | XFS_SB_VERSION2_FTYPE) | ||
82 | |||
83 | /* | ||
84 | * Superblock - in core version. Must match the ondisk version below. | ||
85 | * Must be padded to 64 bit alignment. | ||
86 | */ | ||
87 | typedef struct xfs_sb { | ||
88 | __uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */ | ||
89 | __uint32_t sb_blocksize; /* logical block size, bytes */ | ||
90 | xfs_rfsblock_t sb_dblocks; /* number of data blocks */ | ||
91 | xfs_rfsblock_t sb_rblocks; /* number of realtime blocks */ | ||
92 | xfs_rtblock_t sb_rextents; /* number of realtime extents */ | ||
93 | uuid_t sb_uuid; /* file system unique id */ | ||
94 | xfs_fsblock_t sb_logstart; /* starting block of log if internal */ | ||
95 | xfs_ino_t sb_rootino; /* root inode number */ | ||
96 | xfs_ino_t sb_rbmino; /* bitmap inode for realtime extents */ | ||
97 | xfs_ino_t sb_rsumino; /* summary inode for rt bitmap */ | ||
98 | xfs_agblock_t sb_rextsize; /* realtime extent size, blocks */ | ||
99 | xfs_agblock_t sb_agblocks; /* size of an allocation group */ | ||
100 | xfs_agnumber_t sb_agcount; /* number of allocation groups */ | ||
101 | xfs_extlen_t sb_rbmblocks; /* number of rt bitmap blocks */ | ||
102 | xfs_extlen_t sb_logblocks; /* number of log blocks */ | ||
103 | __uint16_t sb_versionnum; /* header version == XFS_SB_VERSION */ | ||
104 | __uint16_t sb_sectsize; /* volume sector size, bytes */ | ||
105 | __uint16_t sb_inodesize; /* inode size, bytes */ | ||
106 | __uint16_t sb_inopblock; /* inodes per block */ | ||
107 | char sb_fname[12]; /* file system name */ | ||
108 | __uint8_t sb_blocklog; /* log2 of sb_blocksize */ | ||
109 | __uint8_t sb_sectlog; /* log2 of sb_sectsize */ | ||
110 | __uint8_t sb_inodelog; /* log2 of sb_inodesize */ | ||
111 | __uint8_t sb_inopblog; /* log2 of sb_inopblock */ | ||
112 | __uint8_t sb_agblklog; /* log2 of sb_agblocks (rounded up) */ | ||
113 | __uint8_t sb_rextslog; /* log2 of sb_rextents */ | ||
114 | __uint8_t sb_inprogress; /* mkfs is in progress, don't mount */ | ||
115 | __uint8_t sb_imax_pct; /* max % of fs for inode space */ | ||
116 | /* statistics */ | ||
117 | /* | ||
118 | * These fields must remain contiguous. If you really | ||
119 | * want to change their layout, make sure you fix the | ||
120 | * code in xfs_trans_apply_sb_deltas(). | ||
121 | */ | ||
122 | __uint64_t sb_icount; /* allocated inodes */ | ||
123 | __uint64_t sb_ifree; /* free inodes */ | ||
124 | __uint64_t sb_fdblocks; /* free data blocks */ | ||
125 | __uint64_t sb_frextents; /* free realtime extents */ | ||
126 | /* | ||
127 | * End contiguous fields. | ||
128 | */ | ||
129 | xfs_ino_t sb_uquotino; /* user quota inode */ | ||
130 | xfs_ino_t sb_gquotino; /* group quota inode */ | ||
131 | __uint16_t sb_qflags; /* quota flags */ | ||
132 | __uint8_t sb_flags; /* misc. flags */ | ||
133 | __uint8_t sb_shared_vn; /* shared version number */ | ||
134 | xfs_extlen_t sb_inoalignmt; /* inode chunk alignment, fsblocks */ | ||
135 | __uint32_t sb_unit; /* stripe or raid unit */ | ||
136 | __uint32_t sb_width; /* stripe or raid width */ | ||
137 | __uint8_t sb_dirblklog; /* log2 of dir block size (fsbs) */ | ||
138 | __uint8_t sb_logsectlog; /* log2 of the log sector size */ | ||
139 | __uint16_t sb_logsectsize; /* sector size for the log, bytes */ | ||
140 | __uint32_t sb_logsunit; /* stripe unit size for the log */ | ||
141 | __uint32_t sb_features2; /* additional feature bits */ | ||
142 | |||
143 | /* | ||
144 | * bad features2 field as a result of failing to pad the sb | ||
145 | * structure to 64 bits. Some machines will be using this field | ||
146 | * for features2 bits. Easiest just to mark it bad and not use | ||
147 | * it for anything else. | ||
148 | */ | ||
149 | __uint32_t sb_bad_features2; | ||
150 | |||
151 | /* version 5 superblock fields start here */ | ||
152 | |||
153 | /* feature masks */ | ||
154 | __uint32_t sb_features_compat; | ||
155 | __uint32_t sb_features_ro_compat; | ||
156 | __uint32_t sb_features_incompat; | ||
157 | __uint32_t sb_features_log_incompat; | ||
158 | |||
159 | __uint32_t sb_crc; /* superblock crc */ | ||
160 | __uint32_t sb_pad; | ||
161 | |||
162 | xfs_ino_t sb_pquotino; /* project quota inode */ | ||
163 | xfs_lsn_t sb_lsn; /* last write sequence */ | ||
164 | |||
165 | /* must be padded to 64 bit alignment */ | ||
166 | } xfs_sb_t; | ||
167 | |||
168 | #define XFS_SB_CRC_OFF offsetof(struct xfs_sb, sb_crc) | ||
169 | |||
170 | /* | ||
171 | * Superblock - on disk version. Must match the in core version above. | ||
172 | * Must be padded to 64 bit alignment. | ||
173 | */ | ||
174 | typedef struct xfs_dsb { | ||
175 | __be32 sb_magicnum; /* magic number == XFS_SB_MAGIC */ | ||
176 | __be32 sb_blocksize; /* logical block size, bytes */ | ||
177 | __be64 sb_dblocks; /* number of data blocks */ | ||
178 | __be64 sb_rblocks; /* number of realtime blocks */ | ||
179 | __be64 sb_rextents; /* number of realtime extents */ | ||
180 | uuid_t sb_uuid; /* file system unique id */ | ||
181 | __be64 sb_logstart; /* starting block of log if internal */ | ||
182 | __be64 sb_rootino; /* root inode number */ | ||
183 | __be64 sb_rbmino; /* bitmap inode for realtime extents */ | ||
184 | __be64 sb_rsumino; /* summary inode for rt bitmap */ | ||
185 | __be32 sb_rextsize; /* realtime extent size, blocks */ | ||
186 | __be32 sb_agblocks; /* size of an allocation group */ | ||
187 | __be32 sb_agcount; /* number of allocation groups */ | ||
188 | __be32 sb_rbmblocks; /* number of rt bitmap blocks */ | ||
189 | __be32 sb_logblocks; /* number of log blocks */ | ||
190 | __be16 sb_versionnum; /* header version == XFS_SB_VERSION */ | ||
191 | __be16 sb_sectsize; /* volume sector size, bytes */ | ||
192 | __be16 sb_inodesize; /* inode size, bytes */ | ||
193 | __be16 sb_inopblock; /* inodes per block */ | ||
194 | char sb_fname[12]; /* file system name */ | ||
195 | __u8 sb_blocklog; /* log2 of sb_blocksize */ | ||
196 | __u8 sb_sectlog; /* log2 of sb_sectsize */ | ||
197 | __u8 sb_inodelog; /* log2 of sb_inodesize */ | ||
198 | __u8 sb_inopblog; /* log2 of sb_inopblock */ | ||
199 | __u8 sb_agblklog; /* log2 of sb_agblocks (rounded up) */ | ||
200 | __u8 sb_rextslog; /* log2 of sb_rextents */ | ||
201 | __u8 sb_inprogress; /* mkfs is in progress, don't mount */ | ||
202 | __u8 sb_imax_pct; /* max % of fs for inode space */ | ||
203 | /* statistics */ | ||
204 | /* | ||
205 | * These fields must remain contiguous. If you really | ||
206 | * want to change their layout, make sure you fix the | ||
207 | * code in xfs_trans_apply_sb_deltas(). | ||
208 | */ | ||
209 | __be64 sb_icount; /* allocated inodes */ | ||
210 | __be64 sb_ifree; /* free inodes */ | ||
211 | __be64 sb_fdblocks; /* free data blocks */ | ||
212 | __be64 sb_frextents; /* free realtime extents */ | ||
213 | /* | ||
214 | * End contiguous fields. | ||
215 | */ | ||
216 | __be64 sb_uquotino; /* user quota inode */ | ||
217 | __be64 sb_gquotino; /* group quota inode */ | ||
218 | __be16 sb_qflags; /* quota flags */ | ||
219 | __u8 sb_flags; /* misc. flags */ | ||
220 | __u8 sb_shared_vn; /* shared version number */ | ||
221 | __be32 sb_inoalignmt; /* inode chunk alignment, fsblocks */ | ||
222 | __be32 sb_unit; /* stripe or raid unit */ | ||
223 | __be32 sb_width; /* stripe or raid width */ | ||
224 | __u8 sb_dirblklog; /* log2 of dir block size (fsbs) */ | ||
225 | __u8 sb_logsectlog; /* log2 of the log sector size */ | ||
226 | __be16 sb_logsectsize; /* sector size for the log, bytes */ | ||
227 | __be32 sb_logsunit; /* stripe unit size for the log */ | ||
228 | __be32 sb_features2; /* additional feature bits */ | ||
229 | /* | ||
230 | * bad features2 field as a result of failing to pad the sb | ||
231 | * structure to 64 bits. Some machines will be using this field | ||
232 | * for features2 bits. Easiest just to mark it bad and not use | ||
233 | * it for anything else. | ||
234 | */ | ||
235 | __be32 sb_bad_features2; | ||
236 | |||
237 | /* version 5 superblock fields start here */ | ||
238 | |||
239 | /* feature masks */ | ||
240 | __be32 sb_features_compat; | ||
241 | __be32 sb_features_ro_compat; | ||
242 | __be32 sb_features_incompat; | ||
243 | __be32 sb_features_log_incompat; | ||
244 | |||
245 | __le32 sb_crc; /* superblock crc */ | ||
246 | __be32 sb_pad; | ||
247 | |||
248 | __be64 sb_pquotino; /* project quota inode */ | ||
249 | __be64 sb_lsn; /* last write sequence */ | ||
250 | |||
251 | /* must be padded to 64 bit alignment */ | ||
252 | } xfs_dsb_t; | ||
253 | |||
254 | /* | ||
255 | * Sequence number values for the fields. | ||
256 | */ | ||
257 | typedef enum { | ||
258 | XFS_SBS_MAGICNUM, XFS_SBS_BLOCKSIZE, XFS_SBS_DBLOCKS, XFS_SBS_RBLOCKS, | ||
259 | XFS_SBS_REXTENTS, XFS_SBS_UUID, XFS_SBS_LOGSTART, XFS_SBS_ROOTINO, | ||
260 | XFS_SBS_RBMINO, XFS_SBS_RSUMINO, XFS_SBS_REXTSIZE, XFS_SBS_AGBLOCKS, | ||
261 | XFS_SBS_AGCOUNT, XFS_SBS_RBMBLOCKS, XFS_SBS_LOGBLOCKS, | ||
262 | XFS_SBS_VERSIONNUM, XFS_SBS_SECTSIZE, XFS_SBS_INODESIZE, | ||
263 | XFS_SBS_INOPBLOCK, XFS_SBS_FNAME, XFS_SBS_BLOCKLOG, | ||
264 | XFS_SBS_SECTLOG, XFS_SBS_INODELOG, XFS_SBS_INOPBLOG, XFS_SBS_AGBLKLOG, | ||
265 | XFS_SBS_REXTSLOG, XFS_SBS_INPROGRESS, XFS_SBS_IMAX_PCT, XFS_SBS_ICOUNT, | ||
266 | XFS_SBS_IFREE, XFS_SBS_FDBLOCKS, XFS_SBS_FREXTENTS, XFS_SBS_UQUOTINO, | ||
267 | XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN, | ||
268 | XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG, | ||
269 | XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT, | ||
270 | XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2, XFS_SBS_FEATURES_COMPAT, | ||
271 | XFS_SBS_FEATURES_RO_COMPAT, XFS_SBS_FEATURES_INCOMPAT, | ||
272 | XFS_SBS_FEATURES_LOG_INCOMPAT, XFS_SBS_CRC, XFS_SBS_PAD, | ||
273 | XFS_SBS_PQUOTINO, XFS_SBS_LSN, | ||
274 | XFS_SBS_FIELDCOUNT | ||
275 | } xfs_sb_field_t; | ||
276 | |||
277 | /* | ||
278 | * Mask values, defined based on the xfs_sb_field_t values. | ||
279 | * Only define the ones we're using. | ||
280 | */ | ||
281 | #define XFS_SB_MVAL(x) (1LL << XFS_SBS_ ## x) | ||
282 | #define XFS_SB_UUID XFS_SB_MVAL(UUID) | ||
283 | #define XFS_SB_FNAME XFS_SB_MVAL(FNAME) | ||
284 | #define XFS_SB_ROOTINO XFS_SB_MVAL(ROOTINO) | ||
285 | #define XFS_SB_RBMINO XFS_SB_MVAL(RBMINO) | ||
286 | #define XFS_SB_RSUMINO XFS_SB_MVAL(RSUMINO) | ||
287 | #define XFS_SB_VERSIONNUM XFS_SB_MVAL(VERSIONNUM) | ||
288 | #define XFS_SB_UQUOTINO XFS_SB_MVAL(UQUOTINO) | ||
289 | #define XFS_SB_GQUOTINO XFS_SB_MVAL(GQUOTINO) | ||
290 | #define XFS_SB_QFLAGS XFS_SB_MVAL(QFLAGS) | ||
291 | #define XFS_SB_SHARED_VN XFS_SB_MVAL(SHARED_VN) | ||
292 | #define XFS_SB_UNIT XFS_SB_MVAL(UNIT) | ||
293 | #define XFS_SB_WIDTH XFS_SB_MVAL(WIDTH) | ||
294 | #define XFS_SB_ICOUNT XFS_SB_MVAL(ICOUNT) | ||
295 | #define XFS_SB_IFREE XFS_SB_MVAL(IFREE) | ||
296 | #define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS) | ||
297 | #define XFS_SB_FEATURES2 XFS_SB_MVAL(FEATURES2) | ||
298 | #define XFS_SB_BAD_FEATURES2 XFS_SB_MVAL(BAD_FEATURES2) | ||
299 | #define XFS_SB_FEATURES_COMPAT XFS_SB_MVAL(FEATURES_COMPAT) | ||
300 | #define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT) | ||
301 | #define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT) | ||
302 | #define XFS_SB_FEATURES_LOG_INCOMPAT XFS_SB_MVAL(FEATURES_LOG_INCOMPAT) | ||
303 | #define XFS_SB_CRC XFS_SB_MVAL(CRC) | ||
304 | #define XFS_SB_PQUOTINO XFS_SB_MVAL(PQUOTINO) | ||
305 | #define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT) | ||
306 | #define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1) | ||
307 | #define XFS_SB_MOD_BITS \ | ||
308 | (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \ | ||
309 | XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \ | ||
310 | XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \ | ||
311 | XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \ | ||
312 | XFS_SB_BAD_FEATURES2 | XFS_SB_FEATURES_COMPAT | \ | ||
313 | XFS_SB_FEATURES_RO_COMPAT | XFS_SB_FEATURES_INCOMPAT | \ | ||
314 | XFS_SB_FEATURES_LOG_INCOMPAT | XFS_SB_PQUOTINO) | ||
315 | |||
316 | |||
317 | /* | ||
318 | * Misc. Flags - warning - these will be cleared by xfs_repair unless | ||
319 | * a feature bit is set when the flag is used. | ||
320 | */ | ||
321 | #define XFS_SBF_NOFLAGS 0x00 /* no flags set */ | ||
322 | #define XFS_SBF_READONLY 0x01 /* only read-only mounts allowed */ | ||
323 | |||
324 | /* | ||
325 | * define max. shared version we can interoperate with | ||
326 | */ | ||
327 | #define XFS_SB_MAX_SHARED_VN 0 | ||
328 | |||
329 | #define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS) | ||
330 | |||
331 | /* | ||
332 | * The first XFS version we support is a v4 superblock with V2 directories. | ||
333 | */ | ||
334 | static inline bool xfs_sb_good_v4_features(struct xfs_sb *sbp) | ||
335 | { | ||
336 | if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT)) | ||
337 | return false; | ||
338 | |||
339 | /* check for unknown features in the fs */ | ||
340 | if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) || | ||
341 | ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && | ||
342 | (sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS))) | ||
343 | return false; | ||
344 | |||
345 | return true; | ||
346 | } | ||
347 | |||
348 | static inline bool xfs_sb_good_version(struct xfs_sb *sbp) | ||
349 | { | ||
350 | if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) | ||
351 | return true; | ||
352 | if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) | ||
353 | return xfs_sb_good_v4_features(sbp); | ||
354 | return false; | ||
355 | } | ||
356 | |||
357 | /* | ||
358 | * Detect a mismatched features2 field. Older kernels read/wrote | ||
359 | * this into the wrong slot, so to be safe we keep them in sync. | ||
360 | */ | ||
361 | static inline bool xfs_sb_has_mismatched_features2(struct xfs_sb *sbp) | ||
362 | { | ||
363 | return sbp->sb_bad_features2 != sbp->sb_features2; | ||
364 | } | ||
365 | |||
366 | static inline bool xfs_sb_version_hasattr(struct xfs_sb *sbp) | ||
367 | { | ||
368 | return (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT); | ||
369 | } | ||
370 | |||
371 | static inline void xfs_sb_version_addattr(struct xfs_sb *sbp) | ||
372 | { | ||
373 | sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT; | ||
374 | } | ||
375 | |||
376 | static inline bool xfs_sb_version_hasquota(struct xfs_sb *sbp) | ||
377 | { | ||
378 | return (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT); | ||
379 | } | ||
380 | |||
381 | static inline void xfs_sb_version_addquota(struct xfs_sb *sbp) | ||
382 | { | ||
383 | sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT; | ||
384 | } | ||
385 | |||
386 | static inline bool xfs_sb_version_hasalign(struct xfs_sb *sbp) | ||
387 | { | ||
388 | return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || | ||
389 | (sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT)); | ||
390 | } | ||
391 | |||
392 | static inline bool xfs_sb_version_hasdalign(struct xfs_sb *sbp) | ||
393 | { | ||
394 | return (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT); | ||
395 | } | ||
396 | |||
397 | static inline bool xfs_sb_version_haslogv2(struct xfs_sb *sbp) | ||
398 | { | ||
399 | return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || | ||
400 | (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT); | ||
401 | } | ||
402 | |||
403 | static inline bool xfs_sb_version_hasextflgbit(struct xfs_sb *sbp) | ||
404 | { | ||
405 | return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || | ||
406 | (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT); | ||
407 | } | ||
408 | |||
409 | static inline bool xfs_sb_version_hassector(struct xfs_sb *sbp) | ||
410 | { | ||
411 | return (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT); | ||
412 | } | ||
413 | |||
414 | static inline bool xfs_sb_version_hasasciici(struct xfs_sb *sbp) | ||
415 | { | ||
416 | return (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT); | ||
417 | } | ||
418 | |||
419 | static inline bool xfs_sb_version_hasmorebits(struct xfs_sb *sbp) | ||
420 | { | ||
421 | return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || | ||
422 | (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT); | ||
423 | } | ||
424 | |||
425 | /* | ||
426 | * sb_features2 bit version macros. | ||
427 | */ | ||
428 | static inline bool xfs_sb_version_haslazysbcount(struct xfs_sb *sbp) | ||
429 | { | ||
430 | return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || | ||
431 | (xfs_sb_version_hasmorebits(sbp) && | ||
432 | (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT)); | ||
433 | } | ||
434 | |||
435 | static inline bool xfs_sb_version_hasattr2(struct xfs_sb *sbp) | ||
436 | { | ||
437 | return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || | ||
438 | (xfs_sb_version_hasmorebits(sbp) && | ||
439 | (sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT)); | ||
440 | } | ||
441 | |||
442 | static inline void xfs_sb_version_addattr2(struct xfs_sb *sbp) | ||
443 | { | ||
444 | sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; | ||
445 | sbp->sb_features2 |= XFS_SB_VERSION2_ATTR2BIT; | ||
446 | sbp->sb_bad_features2 |= XFS_SB_VERSION2_ATTR2BIT; | ||
447 | } | ||
448 | |||
449 | static inline void xfs_sb_version_removeattr2(struct xfs_sb *sbp) | ||
450 | { | ||
451 | sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT; | ||
452 | sbp->sb_bad_features2 &= ~XFS_SB_VERSION2_ATTR2BIT; | ||
453 | if (!sbp->sb_features2) | ||
454 | sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT; | ||
455 | } | ||
456 | |||
457 | static inline bool xfs_sb_version_hasprojid32bit(struct xfs_sb *sbp) | ||
458 | { | ||
459 | return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || | ||
460 | (xfs_sb_version_hasmorebits(sbp) && | ||
461 | (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT)); | ||
462 | } | ||
463 | |||
464 | static inline void xfs_sb_version_addprojid32bit(struct xfs_sb *sbp) | ||
465 | { | ||
466 | sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; | ||
467 | sbp->sb_features2 |= XFS_SB_VERSION2_PROJID32BIT; | ||
468 | sbp->sb_bad_features2 |= XFS_SB_VERSION2_PROJID32BIT; | ||
469 | } | ||
470 | |||
471 | /* | ||
472 | * Extended v5 superblock feature masks. These are to be used for new v5 | ||
473 | * superblock features only. | ||
474 | * | ||
475 | * Compat features are new features that old kernels will not notice or affect | ||
476 | * and so can mount read-write without issues. | ||
477 | * | ||
478 | * RO-Compat (read only) are features that old kernels can read but will break | ||
479 | * if they write. Hence only read-only mounts of such filesystems are allowed on | ||
480 | * kernels that don't support the feature bit. | ||
481 | * | ||
482 | * InCompat features are features which old kernels will not understand and so | ||
483 | * must not mount. | ||
484 | * | ||
485 | * Log-InCompat features are for changes to log formats or new transactions that | ||
486 | * can't be replayed on older kernels. The fields are set when the filesystem is | ||
487 | * mounted, and a clean unmount clears the fields. | ||
488 | */ | ||
489 | #define XFS_SB_FEAT_COMPAT_ALL 0 | ||
490 | #define XFS_SB_FEAT_COMPAT_UNKNOWN ~XFS_SB_FEAT_COMPAT_ALL | ||
491 | static inline bool | ||
492 | xfs_sb_has_compat_feature( | ||
493 | struct xfs_sb *sbp, | ||
494 | __uint32_t feature) | ||
495 | { | ||
496 | return (sbp->sb_features_compat & feature) != 0; | ||
497 | } | ||
498 | |||
499 | #define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */ | ||
500 | #define XFS_SB_FEAT_RO_COMPAT_ALL \ | ||
501 | (XFS_SB_FEAT_RO_COMPAT_FINOBT) | ||
502 | #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL | ||
503 | static inline bool | ||
504 | xfs_sb_has_ro_compat_feature( | ||
505 | struct xfs_sb *sbp, | ||
506 | __uint32_t feature) | ||
507 | { | ||
508 | return (sbp->sb_features_ro_compat & feature) != 0; | ||
509 | } | ||
510 | |||
511 | #define XFS_SB_FEAT_INCOMPAT_FTYPE (1 << 0) /* filetype in dirent */ | ||
512 | #define XFS_SB_FEAT_INCOMPAT_ALL \ | ||
513 | (XFS_SB_FEAT_INCOMPAT_FTYPE) | ||
514 | |||
515 | #define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL | ||
516 | static inline bool | ||
517 | xfs_sb_has_incompat_feature( | ||
518 | struct xfs_sb *sbp, | ||
519 | __uint32_t feature) | ||
520 | { | ||
521 | return (sbp->sb_features_incompat & feature) != 0; | ||
522 | } | ||
523 | |||
524 | #define XFS_SB_FEAT_INCOMPAT_LOG_ALL 0 | ||
525 | #define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_LOG_ALL | ||
526 | static inline bool | ||
527 | xfs_sb_has_incompat_log_feature( | ||
528 | struct xfs_sb *sbp, | ||
529 | __uint32_t feature) | ||
530 | { | ||
531 | return (sbp->sb_features_log_incompat & feature) != 0; | ||
532 | } | ||
533 | |||
534 | /* | ||
535 | * V5 superblock specific feature checks | ||
536 | */ | ||
537 | static inline int xfs_sb_version_hascrc(struct xfs_sb *sbp) | ||
538 | { | ||
539 | return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; | ||
540 | } | ||
541 | |||
542 | static inline int xfs_sb_version_has_pquotino(struct xfs_sb *sbp) | ||
543 | { | ||
544 | return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; | ||
545 | } | ||
546 | |||
547 | static inline int xfs_sb_version_hasftype(struct xfs_sb *sbp) | ||
548 | { | ||
549 | return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && | ||
550 | xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_FTYPE)) || | ||
551 | (xfs_sb_version_hasmorebits(sbp) && | ||
552 | (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE)); | ||
553 | } | ||
554 | |||
555 | static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp) | ||
556 | { | ||
557 | return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) && | ||
558 | (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT); | ||
559 | } | ||
560 | |||
561 | /* | ||
562 | * end of superblock version macros | ||
563 | */ | ||
564 | |||
565 | static inline bool | ||
566 | xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino) | ||
567 | { | ||
568 | return (ino == sbp->sb_uquotino || | ||
569 | ino == sbp->sb_gquotino || | ||
570 | ino == sbp->sb_pquotino); | ||
571 | } | ||
572 | |||
573 | #define XFS_SB_DADDR ((xfs_daddr_t)0) /* daddr in filesystem/ag */ | ||
574 | #define XFS_SB_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_SB_DADDR) | ||
575 | #define XFS_BUF_TO_SBP(bp) ((xfs_dsb_t *)((bp)->b_addr)) | ||
576 | |||
577 | #define XFS_HDR_BLOCK(mp,d) ((xfs_agblock_t)XFS_BB_TO_FSBT(mp,d)) | ||
578 | #define XFS_DADDR_TO_FSB(mp,d) XFS_AGB_TO_FSB(mp, \ | ||
579 | xfs_daddr_to_agno(mp,d), xfs_daddr_to_agbno(mp,d)) | ||
580 | #define XFS_FSB_TO_DADDR(mp,fsbno) XFS_AGB_TO_DADDR(mp, \ | ||
581 | XFS_FSB_TO_AGNO(mp,fsbno), XFS_FSB_TO_AGBNO(mp,fsbno)) | ||
582 | |||
583 | /* | ||
584 | * File system sector to basic block conversions. | ||
585 | */ | ||
586 | #define XFS_FSS_TO_BB(mp,sec) ((sec) << (mp)->m_sectbb_log) | ||
587 | |||
588 | /* | ||
589 | * File system block to basic block conversions. | ||
590 | */ | ||
591 | #define XFS_FSB_TO_BB(mp,fsbno) ((fsbno) << (mp)->m_blkbb_log) | ||
592 | #define XFS_BB_TO_FSB(mp,bb) \ | ||
593 | (((bb) + (XFS_FSB_TO_BB(mp,1) - 1)) >> (mp)->m_blkbb_log) | ||
594 | #define XFS_BB_TO_FSBT(mp,bb) ((bb) >> (mp)->m_blkbb_log) | ||
595 | |||
596 | /* | ||
597 | * File system block to byte conversions. | ||
598 | */ | ||
599 | #define XFS_FSB_TO_B(mp,fsbno) ((xfs_fsize_t)(fsbno) << (mp)->m_sb.sb_blocklog) | ||
600 | #define XFS_B_TO_FSB(mp,b) \ | ||
601 | ((((__uint64_t)(b)) + (mp)->m_blockmask) >> (mp)->m_sb.sb_blocklog) | ||
602 | #define XFS_B_TO_FSBT(mp,b) (((__uint64_t)(b)) >> (mp)->m_sb.sb_blocklog) | ||
603 | #define XFS_B_FSB_OFFSET(mp,b) ((b) & (mp)->m_blockmask) | ||
604 | |||
605 | /* | ||
606 | * perag get/put wrappers for ref counting | 22 | * perag get/put wrappers for ref counting |
607 | */ | 23 | */ |
608 | extern struct xfs_perag *xfs_perag_get(struct xfs_mount *, xfs_agnumber_t); | 24 | extern struct xfs_perag *xfs_perag_get(struct xfs_mount *, xfs_agnumber_t); |
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index 5782f037eab4..c80c5236c3da 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c | |||
@@ -22,8 +22,6 @@ | |||
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_shared.h" | 23 | #include "xfs_shared.h" |
24 | #include "xfs_trans_resv.h" | 24 | #include "xfs_trans_resv.h" |
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_sb.h" | ||
27 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
28 | #include "xfs_bmap_btree.h" | 26 | #include "xfs_bmap_btree.h" |
29 | #include "xfs_inode.h" | 27 | #include "xfs_inode.h" |
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index f2bda7c76b8a..6c1330f29050 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c | |||
@@ -22,8 +22,6 @@ | |||
22 | #include "xfs_format.h" | 22 | #include "xfs_format.h" |
23 | #include "xfs_log_format.h" | 23 | #include "xfs_log_format.h" |
24 | #include "xfs_trans_resv.h" | 24 | #include "xfs_trans_resv.h" |
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
28 | #include "xfs_da_format.h" | 26 | #include "xfs_da_format.h" |
29 | #include "xfs_da_btree.h" | 27 | #include "xfs_da_btree.h" |
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index a65fa5dde6e9..4b641676f258 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c | |||
@@ -19,8 +19,6 @@ | |||
19 | #include "xfs_format.h" | 19 | #include "xfs_format.h" |
20 | #include "xfs_log_format.h" | 20 | #include "xfs_log_format.h" |
21 | #include "xfs_trans_resv.h" | 21 | #include "xfs_trans_resv.h" |
22 | #include "xfs_ag.h" | ||
23 | #include "xfs_sb.h" | ||
24 | #include "xfs_mount.h" | 22 | #include "xfs_mount.h" |
25 | #include "xfs_inode.h" | 23 | #include "xfs_inode.h" |
26 | #include "xfs_acl.h" | 24 | #include "xfs_acl.h" |
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 5dc163744511..3841b07f27bf 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h | |||
@@ -22,42 +22,6 @@ struct inode; | |||
22 | struct posix_acl; | 22 | struct posix_acl; |
23 | struct xfs_inode; | 23 | struct xfs_inode; |
24 | 24 | ||
25 | #define XFS_ACL_NOT_PRESENT (-1) | ||
26 | |||
27 | /* On-disk XFS access control list structure */ | ||
28 | struct xfs_acl_entry { | ||
29 | __be32 ae_tag; | ||
30 | __be32 ae_id; | ||
31 | __be16 ae_perm; | ||
32 | __be16 ae_pad; /* fill the implicit hole in the structure */ | ||
33 | }; | ||
34 | |||
35 | struct xfs_acl { | ||
36 | __be32 acl_cnt; | ||
37 | struct xfs_acl_entry acl_entry[0]; | ||
38 | }; | ||
39 | |||
40 | /* | ||
41 | * The number of ACL entries allowed is defined by the on-disk format. | ||
42 | * For v4 superblocks, that is limited to 25 entries. For v5 superblocks, it is | ||
43 | * limited only by the maximum size of the xattr that stores the information. | ||
44 | */ | ||
45 | #define XFS_ACL_MAX_ENTRIES(mp) \ | ||
46 | (xfs_sb_version_hascrc(&mp->m_sb) \ | ||
47 | ? (XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \ | ||
48 | sizeof(struct xfs_acl_entry) \ | ||
49 | : 25) | ||
50 | |||
51 | #define XFS_ACL_MAX_SIZE(mp) \ | ||
52 | (sizeof(struct xfs_acl) + \ | ||
53 | sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp))) | ||
54 | |||
55 | /* On-disk XFS extended attribute names */ | ||
56 | #define SGI_ACL_FILE (unsigned char *)"SGI_ACL_FILE" | ||
57 | #define SGI_ACL_DEFAULT (unsigned char *)"SGI_ACL_DEFAULT" | ||
58 | #define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1) | ||
59 | #define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) | ||
60 | |||
61 | #ifdef CONFIG_XFS_POSIX_ACL | 25 | #ifdef CONFIG_XFS_POSIX_ACL |
62 | extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); | 26 | extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); |
63 | extern int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); | 27 | extern int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index f5b2453a43b2..18e2f3bbae5e 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -20,8 +20,6 @@ | |||
20 | #include "xfs_format.h" | 20 | #include "xfs_format.h" |
21 | #include "xfs_log_format.h" | 21 | #include "xfs_log_format.h" |
22 | #include "xfs_trans_resv.h" | 22 | #include "xfs_trans_resv.h" |
23 | #include "xfs_sb.h" | ||
24 | #include "xfs_ag.h" | ||
25 | #include "xfs_mount.h" | 23 | #include "xfs_mount.h" |
26 | #include "xfs_inode.h" | 24 | #include "xfs_inode.h" |
27 | #include "xfs_trans.h" | 25 | #include "xfs_trans.h" |
@@ -33,7 +31,6 @@ | |||
33 | #include "xfs_bmap.h" | 31 | #include "xfs_bmap.h" |
34 | #include "xfs_bmap_util.h" | 32 | #include "xfs_bmap_util.h" |
35 | #include "xfs_bmap_btree.h" | 33 | #include "xfs_bmap_btree.h" |
36 | #include "xfs_dinode.h" | ||
37 | #include <linux/aio.h> | 34 | #include <linux/aio.h> |
38 | #include <linux/gfp.h> | 35 | #include <linux/gfp.h> |
39 | #include <linux/mpage.h> | 36 | #include <linux/mpage.h> |
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index aa2a8b1838a2..83af4c149635 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c | |||
@@ -23,8 +23,6 @@ | |||
23 | #include "xfs_log_format.h" | 23 | #include "xfs_log_format.h" |
24 | #include "xfs_trans_resv.h" | 24 | #include "xfs_trans_resv.h" |
25 | #include "xfs_bit.h" | 25 | #include "xfs_bit.h" |
26 | #include "xfs_sb.h" | ||
27 | #include "xfs_ag.h" | ||
28 | #include "xfs_mount.h" | 26 | #include "xfs_mount.h" |
29 | #include "xfs_da_format.h" | 27 | #include "xfs_da_format.h" |
30 | #include "xfs_da_btree.h" | 28 | #include "xfs_da_btree.h" |
@@ -39,7 +37,6 @@ | |||
39 | #include "xfs_error.h" | 37 | #include "xfs_error.h" |
40 | #include "xfs_quota.h" | 38 | #include "xfs_quota.h" |
41 | #include "xfs_trace.h" | 39 | #include "xfs_trace.h" |
42 | #include "xfs_dinode.h" | ||
43 | #include "xfs_dir2.h" | 40 | #include "xfs_dir2.h" |
44 | 41 | ||
45 | /* | 42 | /* |
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 62db83ab6cbc..a43d370d2c58 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c | |||
@@ -22,8 +22,6 @@ | |||
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_bit.h" | 24 | #include "xfs_bit.h" |
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
28 | #include "xfs_da_format.h" | 26 | #include "xfs_da_format.h" |
29 | #include "xfs_da_btree.h" | 27 | #include "xfs_da_btree.h" |
@@ -39,7 +37,6 @@ | |||
39 | #include "xfs_trace.h" | 37 | #include "xfs_trace.h" |
40 | #include "xfs_buf_item.h" | 38 | #include "xfs_buf_item.h" |
41 | #include "xfs_cksum.h" | 39 | #include "xfs_cksum.h" |
42 | #include "xfs_dinode.h" | ||
43 | #include "xfs_dir2.h" | 40 | #include "xfs_dir2.h" |
44 | 41 | ||
45 | STATIC int | 42 | STATIC int |
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 281002689d64..22a5dcb70b32 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c | |||
@@ -23,8 +23,6 @@ | |||
23 | #include "xfs_log_format.h" | 23 | #include "xfs_log_format.h" |
24 | #include "xfs_trans_resv.h" | 24 | #include "xfs_trans_resv.h" |
25 | #include "xfs_bit.h" | 25 | #include "xfs_bit.h" |
26 | #include "xfs_sb.h" | ||
27 | #include "xfs_ag.h" | ||
28 | #include "xfs_mount.h" | 26 | #include "xfs_mount.h" |
29 | #include "xfs_da_format.h" | 27 | #include "xfs_da_format.h" |
30 | #include "xfs_inode.h" | 28 | #include "xfs_inode.h" |
@@ -42,7 +40,6 @@ | |||
42 | #include "xfs_trace.h" | 40 | #include "xfs_trace.h" |
43 | #include "xfs_icache.h" | 41 | #include "xfs_icache.h" |
44 | #include "xfs_log.h" | 42 | #include "xfs_log.h" |
45 | #include "xfs_dinode.h" | ||
46 | 43 | ||
47 | /* Kernel only BMAP related definitions and functions */ | 44 | /* Kernel only BMAP related definitions and functions */ |
48 | 45 | ||
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 24b4ebea0d4d..bb502a391792 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -34,18 +34,16 @@ | |||
34 | #include <linux/backing-dev.h> | 34 | #include <linux/backing-dev.h> |
35 | #include <linux/freezer.h> | 35 | #include <linux/freezer.h> |
36 | 36 | ||
37 | #include "xfs_format.h" | ||
37 | #include "xfs_log_format.h" | 38 | #include "xfs_log_format.h" |
38 | #include "xfs_trans_resv.h" | 39 | #include "xfs_trans_resv.h" |
39 | #include "xfs_sb.h" | 40 | #include "xfs_sb.h" |
40 | #include "xfs_ag.h" | ||
41 | #include "xfs_mount.h" | 41 | #include "xfs_mount.h" |
42 | #include "xfs_trace.h" | 42 | #include "xfs_trace.h" |
43 | #include "xfs_log.h" | 43 | #include "xfs_log.h" |
44 | 44 | ||
45 | static kmem_zone_t *xfs_buf_zone; | 45 | static kmem_zone_t *xfs_buf_zone; |
46 | 46 | ||
47 | static struct workqueue_struct *xfslogd_workqueue; | ||
48 | |||
49 | #ifdef XFS_BUF_LOCK_TRACKING | 47 | #ifdef XFS_BUF_LOCK_TRACKING |
50 | # define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) | 48 | # define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) |
51 | # define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1) | 49 | # define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1) |
@@ -463,7 +461,7 @@ _xfs_buf_find( | |||
463 | * have to check that the buffer falls within the filesystem bounds. | 461 | * have to check that the buffer falls within the filesystem bounds. |
464 | */ | 462 | */ |
465 | eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks); | 463 | eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks); |
466 | if (blkno >= eofs) { | 464 | if (blkno < 0 || blkno >= eofs) { |
467 | /* | 465 | /* |
468 | * XXX (dgc): we should really be returning -EFSCORRUPTED here, | 466 | * XXX (dgc): we should really be returning -EFSCORRUPTED here, |
469 | * but none of the higher level infrastructure supports | 467 | * but none of the higher level infrastructure supports |
@@ -1043,7 +1041,7 @@ xfs_buf_ioend_work( | |||
1043 | struct work_struct *work) | 1041 | struct work_struct *work) |
1044 | { | 1042 | { |
1045 | struct xfs_buf *bp = | 1043 | struct xfs_buf *bp = |
1046 | container_of(work, xfs_buf_t, b_iodone_work); | 1044 | container_of(work, xfs_buf_t, b_ioend_work); |
1047 | 1045 | ||
1048 | xfs_buf_ioend(bp); | 1046 | xfs_buf_ioend(bp); |
1049 | } | 1047 | } |
@@ -1052,8 +1050,8 @@ void | |||
1052 | xfs_buf_ioend_async( | 1050 | xfs_buf_ioend_async( |
1053 | struct xfs_buf *bp) | 1051 | struct xfs_buf *bp) |
1054 | { | 1052 | { |
1055 | INIT_WORK(&bp->b_iodone_work, xfs_buf_ioend_work); | 1053 | INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work); |
1056 | queue_work(xfslogd_workqueue, &bp->b_iodone_work); | 1054 | queue_work(bp->b_ioend_wq, &bp->b_ioend_work); |
1057 | } | 1055 | } |
1058 | 1056 | ||
1059 | void | 1057 | void |
@@ -1222,6 +1220,13 @@ _xfs_buf_ioapply( | |||
1222 | */ | 1220 | */ |
1223 | bp->b_error = 0; | 1221 | bp->b_error = 0; |
1224 | 1222 | ||
1223 | /* | ||
1224 | * Initialize the I/O completion workqueue if we haven't yet or the | ||
1225 | * submitter has not opted to specify a custom one. | ||
1226 | */ | ||
1227 | if (!bp->b_ioend_wq) | ||
1228 | bp->b_ioend_wq = bp->b_target->bt_mount->m_buf_workqueue; | ||
1229 | |||
1225 | if (bp->b_flags & XBF_WRITE) { | 1230 | if (bp->b_flags & XBF_WRITE) { |
1226 | if (bp->b_flags & XBF_SYNCIO) | 1231 | if (bp->b_flags & XBF_SYNCIO) |
1227 | rw = WRITE_SYNC; | 1232 | rw = WRITE_SYNC; |
@@ -1882,15 +1887,8 @@ xfs_buf_init(void) | |||
1882 | if (!xfs_buf_zone) | 1887 | if (!xfs_buf_zone) |
1883 | goto out; | 1888 | goto out; |
1884 | 1889 | ||
1885 | xfslogd_workqueue = alloc_workqueue("xfslogd", | ||
1886 | WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE, 1); | ||
1887 | if (!xfslogd_workqueue) | ||
1888 | goto out_free_buf_zone; | ||
1889 | |||
1890 | return 0; | 1890 | return 0; |
1891 | 1891 | ||
1892 | out_free_buf_zone: | ||
1893 | kmem_zone_destroy(xfs_buf_zone); | ||
1894 | out: | 1892 | out: |
1895 | return -ENOMEM; | 1893 | return -ENOMEM; |
1896 | } | 1894 | } |
@@ -1898,6 +1896,5 @@ xfs_buf_init(void) | |||
1898 | void | 1896 | void |
1899 | xfs_buf_terminate(void) | 1897 | xfs_buf_terminate(void) |
1900 | { | 1898 | { |
1901 | destroy_workqueue(xfslogd_workqueue); | ||
1902 | kmem_zone_destroy(xfs_buf_zone); | 1899 | kmem_zone_destroy(xfs_buf_zone); |
1903 | } | 1900 | } |
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 82002c00af90..75ff5d5a7d2e 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h | |||
@@ -164,7 +164,8 @@ typedef struct xfs_buf { | |||
164 | struct xfs_perag *b_pag; /* contains rbtree root */ | 164 | struct xfs_perag *b_pag; /* contains rbtree root */ |
165 | xfs_buftarg_t *b_target; /* buffer target (device) */ | 165 | xfs_buftarg_t *b_target; /* buffer target (device) */ |
166 | void *b_addr; /* virtual address of buffer */ | 166 | void *b_addr; /* virtual address of buffer */ |
167 | struct work_struct b_iodone_work; | 167 | struct work_struct b_ioend_work; |
168 | struct workqueue_struct *b_ioend_wq; /* I/O completion wq */ | ||
168 | xfs_buf_iodone_t b_iodone; /* I/O completion function */ | 169 | xfs_buf_iodone_t b_iodone; /* I/O completion function */ |
169 | struct completion b_iowait; /* queue for I/O waiters */ | 170 | struct completion b_iowait; /* queue for I/O waiters */ |
170 | void *b_fspriv; | 171 | void *b_fspriv; |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index f15969543326..3f9bd58edec7 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -17,11 +17,11 @@ | |||
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_format.h" | ||
20 | #include "xfs_log_format.h" | 21 | #include "xfs_log_format.h" |
21 | #include "xfs_trans_resv.h" | 22 | #include "xfs_trans_resv.h" |
22 | #include "xfs_bit.h" | 23 | #include "xfs_bit.h" |
23 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
24 | #include "xfs_ag.h" | ||
25 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
26 | #include "xfs_trans.h" | 26 | #include "xfs_trans.h" |
27 | #include "xfs_buf_item.h" | 27 | #include "xfs_buf_item.h" |
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index f1b69edcdf31..098cd78fe708 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c | |||
@@ -22,8 +22,6 @@ | |||
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_bit.h" | 24 | #include "xfs_bit.h" |
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
28 | #include "xfs_da_format.h" | 26 | #include "xfs_da_format.h" |
29 | #include "xfs_da_btree.h" | 27 | #include "xfs_da_btree.h" |
@@ -34,7 +32,6 @@ | |||
34 | #include "xfs_trace.h" | 32 | #include "xfs_trace.h" |
35 | #include "xfs_bmap.h" | 33 | #include "xfs_bmap.h" |
36 | #include "xfs_trans.h" | 34 | #include "xfs_trans.h" |
37 | #include "xfs_dinode.h" | ||
38 | 35 | ||
39 | /* | 36 | /* |
40 | * Directory file type support functions | 37 | * Directory file type support functions |
@@ -44,7 +41,7 @@ static unsigned char xfs_dir3_filetype_table[] = { | |||
44 | DT_FIFO, DT_SOCK, DT_LNK, DT_WHT, | 41 | DT_FIFO, DT_SOCK, DT_LNK, DT_WHT, |
45 | }; | 42 | }; |
46 | 43 | ||
47 | unsigned char | 44 | static unsigned char |
48 | xfs_dir3_get_dtype( | 45 | xfs_dir3_get_dtype( |
49 | struct xfs_mount *mp, | 46 | struct xfs_mount *mp, |
50 | __uint8_t filetype) | 47 | __uint8_t filetype) |
@@ -57,22 +54,6 @@ xfs_dir3_get_dtype( | |||
57 | 54 | ||
58 | return xfs_dir3_filetype_table[filetype]; | 55 | return xfs_dir3_filetype_table[filetype]; |
59 | } | 56 | } |
60 | /* | ||
61 | * @mode, if set, indicates that the type field needs to be set up. | ||
62 | * This uses the transformation from file mode to DT_* as defined in linux/fs.h | ||
63 | * for file type specification. This will be propagated into the directory | ||
64 | * structure if appropriate for the given operation and filesystem config. | ||
65 | */ | ||
66 | const unsigned char xfs_mode_to_ftype[S_IFMT >> S_SHIFT] = { | ||
67 | [0] = XFS_DIR3_FT_UNKNOWN, | ||
68 | [S_IFREG >> S_SHIFT] = XFS_DIR3_FT_REG_FILE, | ||
69 | [S_IFDIR >> S_SHIFT] = XFS_DIR3_FT_DIR, | ||
70 | [S_IFCHR >> S_SHIFT] = XFS_DIR3_FT_CHRDEV, | ||
71 | [S_IFBLK >> S_SHIFT] = XFS_DIR3_FT_BLKDEV, | ||
72 | [S_IFIFO >> S_SHIFT] = XFS_DIR3_FT_FIFO, | ||
73 | [S_IFSOCK >> S_SHIFT] = XFS_DIR3_FT_SOCK, | ||
74 | [S_IFLNK >> S_SHIFT] = XFS_DIR3_FT_SYMLINK, | ||
75 | }; | ||
76 | 57 | ||
77 | STATIC int | 58 | STATIC int |
78 | xfs_dir2_sf_getdents( | 59 | xfs_dir2_sf_getdents( |
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 13d08a1b390e..799e5a2d334d 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include "xfs_log_format.h" | 20 | #include "xfs_log_format.h" |
21 | #include "xfs_trans_resv.h" | 21 | #include "xfs_trans_resv.h" |
22 | #include "xfs_sb.h" | 22 | #include "xfs_sb.h" |
23 | #include "xfs_ag.h" | ||
24 | #include "xfs_mount.h" | 23 | #include "xfs_mount.h" |
25 | #include "xfs_quota.h" | 24 | #include "xfs_quota.h" |
26 | #include "xfs_inode.h" | 25 | #include "xfs_inode.h" |
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 63c2de49f61d..02c01bbbc789 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c | |||
@@ -22,8 +22,6 @@ | |||
22 | #include "xfs_shared.h" | 22 | #include "xfs_shared.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_bit.h" | 24 | #include "xfs_bit.h" |
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
28 | #include "xfs_inode.h" | 26 | #include "xfs_inode.h" |
29 | #include "xfs_bmap.h" | 27 | #include "xfs_bmap.h" |
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index f33fbaaa4d8a..814cff94e78f 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c | |||
@@ -20,8 +20,6 @@ | |||
20 | #include "xfs_format.h" | 20 | #include "xfs_format.h" |
21 | #include "xfs_log_format.h" | 21 | #include "xfs_log_format.h" |
22 | #include "xfs_trans_resv.h" | 22 | #include "xfs_trans_resv.h" |
23 | #include "xfs_sb.h" | ||
24 | #include "xfs_ag.h" | ||
25 | #include "xfs_mount.h" | 23 | #include "xfs_mount.h" |
26 | #include "xfs_inode.h" | 24 | #include "xfs_inode.h" |
27 | #include "xfs_quota.h" | 25 | #include "xfs_quota.h" |
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index b92fd7bc49e3..3ee186ac1093 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c | |||
@@ -20,8 +20,6 @@ | |||
20 | #include "xfs_fs.h" | 20 | #include "xfs_fs.h" |
21 | #include "xfs_log_format.h" | 21 | #include "xfs_log_format.h" |
22 | #include "xfs_trans_resv.h" | 22 | #include "xfs_trans_resv.h" |
23 | #include "xfs_sb.h" | ||
24 | #include "xfs_ag.h" | ||
25 | #include "xfs_mount.h" | 23 | #include "xfs_mount.h" |
26 | #include "xfs_error.h" | 24 | #include "xfs_error.h" |
27 | 25 | ||
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 5a6bd5d8779a..5eb4a14e0a0f 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c | |||
@@ -19,10 +19,9 @@ | |||
19 | #include "xfs_format.h" | 19 | #include "xfs_format.h" |
20 | #include "xfs_log_format.h" | 20 | #include "xfs_log_format.h" |
21 | #include "xfs_trans_resv.h" | 21 | #include "xfs_trans_resv.h" |
22 | #include "xfs_sb.h" | ||
23 | #include "xfs_ag.h" | ||
24 | #include "xfs_mount.h" | 22 | #include "xfs_mount.h" |
25 | #include "xfs_da_format.h" | 23 | #include "xfs_da_format.h" |
24 | #include "xfs_da_btree.h" | ||
26 | #include "xfs_dir2.h" | 25 | #include "xfs_dir2.h" |
27 | #include "xfs_export.h" | 26 | #include "xfs_export.h" |
28 | #include "xfs_inode.h" | 27 | #include "xfs_inode.h" |
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index fd22f69049d4..c263e079273e 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c | |||
@@ -24,7 +24,6 @@ | |||
24 | #include "xfs_shared.h" | 24 | #include "xfs_shared.h" |
25 | #include "xfs_trans_resv.h" | 25 | #include "xfs_trans_resv.h" |
26 | #include "xfs_sb.h" | 26 | #include "xfs_sb.h" |
27 | #include "xfs_ag.h" | ||
28 | #include "xfs_mount.h" | 27 | #include "xfs_mount.h" |
29 | #include "xfs_alloc.h" | 28 | #include "xfs_alloc.h" |
30 | #include "xfs_extent_busy.h" | 29 | #include "xfs_extent_busy.h" |
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index c4327419dc5c..cb7fe64cdbfa 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c | |||
@@ -17,10 +17,9 @@ | |||
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_format.h" | ||
20 | #include "xfs_log_format.h" | 21 | #include "xfs_log_format.h" |
21 | #include "xfs_trans_resv.h" | 22 | #include "xfs_trans_resv.h" |
22 | #include "xfs_sb.h" | ||
23 | #include "xfs_ag.h" | ||
24 | #include "xfs_mount.h" | 23 | #include "xfs_mount.h" |
25 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
26 | #include "xfs_trans_priv.h" | 25 | #include "xfs_trans_priv.h" |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index eb596b419942..13e974e6a889 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -21,8 +21,6 @@ | |||
21 | #include "xfs_format.h" | 21 | #include "xfs_format.h" |
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
27 | #include "xfs_da_format.h" | 25 | #include "xfs_da_format.h" |
28 | #include "xfs_da_btree.h" | 26 | #include "xfs_da_btree.h" |
@@ -37,7 +35,6 @@ | |||
37 | #include "xfs_ioctl.h" | 35 | #include "xfs_ioctl.h" |
38 | #include "xfs_trace.h" | 36 | #include "xfs_trace.h" |
39 | #include "xfs_log.h" | 37 | #include "xfs_log.h" |
40 | #include "xfs_dinode.h" | ||
41 | #include "xfs_icache.h" | 38 | #include "xfs_icache.h" |
42 | 39 | ||
43 | #include <linux/aio.h> | 40 | #include <linux/aio.h> |
@@ -933,7 +930,6 @@ xfs_file_readdir( | |||
933 | { | 930 | { |
934 | struct inode *inode = file_inode(file); | 931 | struct inode *inode = file_inode(file); |
935 | xfs_inode_t *ip = XFS_I(inode); | 932 | xfs_inode_t *ip = XFS_I(inode); |
936 | int error; | ||
937 | size_t bufsize; | 933 | size_t bufsize; |
938 | 934 | ||
939 | /* | 935 | /* |
@@ -950,10 +946,7 @@ xfs_file_readdir( | |||
950 | */ | 946 | */ |
951 | bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size); | 947 | bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size); |
952 | 948 | ||
953 | error = xfs_readdir(ip, ctx, bufsize); | 949 | return xfs_readdir(ip, ctx, bufsize); |
954 | if (error) | ||
955 | return error; | ||
956 | return 0; | ||
957 | } | 950 | } |
958 | 951 | ||
959 | STATIC int | 952 | STATIC int |
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index e92730c1d3ca..a2e86e8a0fea 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c | |||
@@ -20,16 +20,13 @@ | |||
20 | #include "xfs_format.h" | 20 | #include "xfs_format.h" |
21 | #include "xfs_log_format.h" | 21 | #include "xfs_log_format.h" |
22 | #include "xfs_trans_resv.h" | 22 | #include "xfs_trans_resv.h" |
23 | #include "xfs_ag.h" | ||
24 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
25 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
26 | #include "xfs_inum.h" | ||
27 | #include "xfs_inode.h" | 25 | #include "xfs_inode.h" |
28 | #include "xfs_bmap.h" | 26 | #include "xfs_bmap.h" |
29 | #include "xfs_bmap_util.h" | 27 | #include "xfs_bmap_util.h" |
30 | #include "xfs_alloc.h" | 28 | #include "xfs_alloc.h" |
31 | #include "xfs_mru_cache.h" | 29 | #include "xfs_mru_cache.h" |
32 | #include "xfs_dinode.h" | ||
33 | #include "xfs_filestream.h" | 30 | #include "xfs_filestream.h" |
34 | #include "xfs_trace.h" | 31 | #include "xfs_trace.h" |
35 | 32 | ||
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index c05ac8b70fa9..fdc64220fcb0 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
27 | #include "xfs_da_format.h" | 26 | #include "xfs_da_format.h" |
28 | #include "xfs_da_btree.h" | 27 | #include "xfs_da_btree.h" |
@@ -40,7 +39,6 @@ | |||
40 | #include "xfs_rtalloc.h" | 39 | #include "xfs_rtalloc.h" |
41 | #include "xfs_trace.h" | 40 | #include "xfs_trace.h" |
42 | #include "xfs_log.h" | 41 | #include "xfs_log.h" |
43 | #include "xfs_dinode.h" | ||
44 | #include "xfs_filestream.h" | 42 | #include "xfs_filestream.h" |
45 | 43 | ||
46 | /* | 44 | /* |
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index b45f7b27b5df..9771b7ef62ed 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c | |||
@@ -20,9 +20,7 @@ | |||
20 | #include "xfs_format.h" | 20 | #include "xfs_format.h" |
21 | #include "xfs_log_format.h" | 21 | #include "xfs_log_format.h" |
22 | #include "xfs_trans_resv.h" | 22 | #include "xfs_trans_resv.h" |
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
27 | #include "xfs_inode.h" | 25 | #include "xfs_inode.h" |
28 | #include "xfs_error.h" | 26 | #include "xfs_error.h" |
@@ -65,6 +63,7 @@ xfs_inode_alloc( | |||
65 | return NULL; | 63 | return NULL; |
66 | } | 64 | } |
67 | 65 | ||
66 | XFS_STATS_INC(vn_active); | ||
68 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 67 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
69 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 68 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
70 | ASSERT(!xfs_isiflocked(ip)); | 69 | ASSERT(!xfs_isiflocked(ip)); |
@@ -130,6 +129,7 @@ xfs_inode_free( | |||
130 | /* asserts to verify all state is correct here */ | 129 | /* asserts to verify all state is correct here */ |
131 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 130 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
132 | ASSERT(!xfs_isiflocked(ip)); | 131 | ASSERT(!xfs_isiflocked(ip)); |
132 | XFS_STATS_DEC(vn_active); | ||
133 | 133 | ||
134 | call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); | 134 | call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); |
135 | } | 135 | } |
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 46748b86b12f..62f1f91c32cb 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h | |||
@@ -34,6 +34,14 @@ struct xfs_eofblocks { | |||
34 | #define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ | 34 | #define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ |
35 | 35 | ||
36 | /* | 36 | /* |
37 | * tags for inode radix tree | ||
38 | */ | ||
39 | #define XFS_ICI_NO_TAG (-1) /* special flag for an untagged lookup | ||
40 | in xfs_inode_ag_iterator */ | ||
41 | #define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */ | ||
42 | #define XFS_ICI_EOFBLOCKS_TAG 1 /* inode has blocks beyond EOF */ | ||
43 | |||
44 | /* | ||
37 | * Flags for xfs_iget() | 45 | * Flags for xfs_iget() |
38 | */ | 46 | */ |
39 | #define XFS_IGET_CREATE 0x1 | 47 | #define XFS_IGET_CREATE 0x1 |
diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index 7e4549233251..d45ca72af6fb 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c | |||
@@ -18,11 +18,10 @@ | |||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_shared.h" | 20 | #include "xfs_shared.h" |
21 | #include "xfs_format.h" | ||
21 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
22 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
23 | #include "xfs_bit.h" | 24 | #include "xfs_bit.h" |
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
27 | #include "xfs_trans.h" | 26 | #include "xfs_trans.h" |
28 | #include "xfs_trans_priv.h" | 27 | #include "xfs_trans_priv.h" |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 8ed049d1e332..41f804e740d7 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -23,9 +23,7 @@ | |||
23 | #include "xfs_format.h" | 23 | #include "xfs_format.h" |
24 | #include "xfs_log_format.h" | 24 | #include "xfs_log_format.h" |
25 | #include "xfs_trans_resv.h" | 25 | #include "xfs_trans_resv.h" |
26 | #include "xfs_inum.h" | ||
27 | #include "xfs_sb.h" | 26 | #include "xfs_sb.h" |
28 | #include "xfs_ag.h" | ||
29 | #include "xfs_mount.h" | 27 | #include "xfs_mount.h" |
30 | #include "xfs_inode.h" | 28 | #include "xfs_inode.h" |
31 | #include "xfs_da_format.h" | 29 | #include "xfs_da_format.h" |
@@ -1082,7 +1080,7 @@ xfs_create( | |||
1082 | struct xfs_dquot *udqp = NULL; | 1080 | struct xfs_dquot *udqp = NULL; |
1083 | struct xfs_dquot *gdqp = NULL; | 1081 | struct xfs_dquot *gdqp = NULL; |
1084 | struct xfs_dquot *pdqp = NULL; | 1082 | struct xfs_dquot *pdqp = NULL; |
1085 | struct xfs_trans_res tres; | 1083 | struct xfs_trans_res *tres; |
1086 | uint resblks; | 1084 | uint resblks; |
1087 | 1085 | ||
1088 | trace_xfs_create(dp, name); | 1086 | trace_xfs_create(dp, name); |
@@ -1105,13 +1103,11 @@ xfs_create( | |||
1105 | if (is_dir) { | 1103 | if (is_dir) { |
1106 | rdev = 0; | 1104 | rdev = 0; |
1107 | resblks = XFS_MKDIR_SPACE_RES(mp, name->len); | 1105 | resblks = XFS_MKDIR_SPACE_RES(mp, name->len); |
1108 | tres.tr_logres = M_RES(mp)->tr_mkdir.tr_logres; | 1106 | tres = &M_RES(mp)->tr_mkdir; |
1109 | tres.tr_logcount = XFS_MKDIR_LOG_COUNT; | ||
1110 | tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); | 1107 | tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); |
1111 | } else { | 1108 | } else { |
1112 | resblks = XFS_CREATE_SPACE_RES(mp, name->len); | 1109 | resblks = XFS_CREATE_SPACE_RES(mp, name->len); |
1113 | tres.tr_logres = M_RES(mp)->tr_create.tr_logres; | 1110 | tres = &M_RES(mp)->tr_create; |
1114 | tres.tr_logcount = XFS_CREATE_LOG_COUNT; | ||
1115 | tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); | 1111 | tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); |
1116 | } | 1112 | } |
1117 | 1113 | ||
@@ -1123,17 +1119,16 @@ xfs_create( | |||
1123 | * the case we'll drop the one we have and get a more | 1119 | * the case we'll drop the one we have and get a more |
1124 | * appropriate transaction later. | 1120 | * appropriate transaction later. |
1125 | */ | 1121 | */ |
1126 | tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; | 1122 | error = xfs_trans_reserve(tp, tres, resblks, 0); |
1127 | error = xfs_trans_reserve(tp, &tres, resblks, 0); | ||
1128 | if (error == -ENOSPC) { | 1123 | if (error == -ENOSPC) { |
1129 | /* flush outstanding delalloc blocks and retry */ | 1124 | /* flush outstanding delalloc blocks and retry */ |
1130 | xfs_flush_inodes(mp); | 1125 | xfs_flush_inodes(mp); |
1131 | error = xfs_trans_reserve(tp, &tres, resblks, 0); | 1126 | error = xfs_trans_reserve(tp, tres, resblks, 0); |
1132 | } | 1127 | } |
1133 | if (error == -ENOSPC) { | 1128 | if (error == -ENOSPC) { |
1134 | /* No space at all so try a "no-allocation" reservation */ | 1129 | /* No space at all so try a "no-allocation" reservation */ |
1135 | resblks = 0; | 1130 | resblks = 0; |
1136 | error = xfs_trans_reserve(tp, &tres, 0, 0); | 1131 | error = xfs_trans_reserve(tp, tres, 0, 0); |
1137 | } | 1132 | } |
1138 | if (error) { | 1133 | if (error) { |
1139 | cancel_flags = 0; | 1134 | cancel_flags = 0; |
@@ -2488,9 +2483,7 @@ xfs_remove( | |||
2488 | xfs_fsblock_t first_block; | 2483 | xfs_fsblock_t first_block; |
2489 | int cancel_flags; | 2484 | int cancel_flags; |
2490 | int committed; | 2485 | int committed; |
2491 | int link_zero; | ||
2492 | uint resblks; | 2486 | uint resblks; |
2493 | uint log_count; | ||
2494 | 2487 | ||
2495 | trace_xfs_remove(dp, name); | 2488 | trace_xfs_remove(dp, name); |
2496 | 2489 | ||
@@ -2505,13 +2498,10 @@ xfs_remove( | |||
2505 | if (error) | 2498 | if (error) |
2506 | goto std_return; | 2499 | goto std_return; |
2507 | 2500 | ||
2508 | if (is_dir) { | 2501 | if (is_dir) |
2509 | tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR); | 2502 | tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR); |
2510 | log_count = XFS_DEFAULT_LOG_COUNT; | 2503 | else |
2511 | } else { | ||
2512 | tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); | 2504 | tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); |
2513 | log_count = XFS_REMOVE_LOG_COUNT; | ||
2514 | } | ||
2515 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; | 2505 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; |
2516 | 2506 | ||
2517 | /* | 2507 | /* |
@@ -2579,9 +2569,6 @@ xfs_remove( | |||
2579 | if (error) | 2569 | if (error) |
2580 | goto out_trans_cancel; | 2570 | goto out_trans_cancel; |
2581 | 2571 | ||
2582 | /* Determine if this is the last link while the inode is locked */ | ||
2583 | link_zero = (ip->i_d.di_nlink == 0); | ||
2584 | |||
2585 | xfs_bmap_init(&free_list, &first_block); | 2572 | xfs_bmap_init(&free_list, &first_block); |
2586 | error = xfs_dir_removename(tp, dp, name, ip->i_ino, | 2573 | error = xfs_dir_removename(tp, dp, name, ip->i_ino, |
2587 | &first_block, &free_list, resblks); | 2574 | &first_block, &free_list, resblks); |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 9af2882e1f4c..4ed2ba9342dc 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -20,7 +20,6 @@ | |||
20 | 20 | ||
21 | #include "xfs_inode_buf.h" | 21 | #include "xfs_inode_buf.h" |
22 | #include "xfs_inode_fork.h" | 22 | #include "xfs_inode_fork.h" |
23 | #include "xfs_dinode.h" | ||
24 | 23 | ||
25 | /* | 24 | /* |
26 | * Kernel only inode definitions | 25 | * Kernel only inode definitions |
@@ -324,7 +323,6 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) | |||
324 | (((pip)->i_mount->m_flags & XFS_MOUNT_GRPID) || \ | 323 | (((pip)->i_mount->m_flags & XFS_MOUNT_GRPID) || \ |
325 | ((pip)->i_d.di_mode & S_ISGID)) | 324 | ((pip)->i_d.di_mode & S_ISGID)) |
326 | 325 | ||
327 | |||
328 | int xfs_release(struct xfs_inode *ip); | 326 | int xfs_release(struct xfs_inode *ip); |
329 | void xfs_inactive(struct xfs_inode *ip); | 327 | void xfs_inactive(struct xfs_inode *ip); |
330 | int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, | 328 | int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 63de0b0acc32..bf13a5a7e2f4 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -20,8 +20,6 @@ | |||
20 | #include "xfs_format.h" | 20 | #include "xfs_format.h" |
21 | #include "xfs_log_format.h" | 21 | #include "xfs_log_format.h" |
22 | #include "xfs_trans_resv.h" | 22 | #include "xfs_trans_resv.h" |
23 | #include "xfs_sb.h" | ||
24 | #include "xfs_ag.h" | ||
25 | #include "xfs_mount.h" | 23 | #include "xfs_mount.h" |
26 | #include "xfs_inode.h" | 24 | #include "xfs_inode.h" |
27 | #include "xfs_trans.h" | 25 | #include "xfs_trans.h" |
@@ -29,7 +27,6 @@ | |||
29 | #include "xfs_error.h" | 27 | #include "xfs_error.h" |
30 | #include "xfs_trace.h" | 28 | #include "xfs_trace.h" |
31 | #include "xfs_trans_priv.h" | 29 | #include "xfs_trans_priv.h" |
32 | #include "xfs_dinode.h" | ||
33 | #include "xfs_log.h" | 30 | #include "xfs_log.h" |
34 | 31 | ||
35 | 32 | ||
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 24c926b6fe85..a1831980a68e 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c | |||
@@ -21,8 +21,6 @@ | |||
21 | #include "xfs_format.h" | 21 | #include "xfs_format.h" |
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
27 | #include "xfs_inode.h" | 25 | #include "xfs_inode.h" |
28 | #include "xfs_ioctl.h" | 26 | #include "xfs_ioctl.h" |
@@ -40,7 +38,6 @@ | |||
40 | #include "xfs_trace.h" | 38 | #include "xfs_trace.h" |
41 | #include "xfs_icache.h" | 39 | #include "xfs_icache.h" |
42 | #include "xfs_symlink.h" | 40 | #include "xfs_symlink.h" |
43 | #include "xfs_dinode.h" | ||
44 | #include "xfs_trans.h" | 41 | #include "xfs_trans.h" |
45 | 42 | ||
46 | #include <linux/capability.h> | 43 | #include <linux/capability.h> |
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 94ce027e28e3..ec6772866f3d 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c | |||
@@ -25,8 +25,6 @@ | |||
25 | #include "xfs_format.h" | 25 | #include "xfs_format.h" |
26 | #include "xfs_log_format.h" | 26 | #include "xfs_log_format.h" |
27 | #include "xfs_trans_resv.h" | 27 | #include "xfs_trans_resv.h" |
28 | #include "xfs_sb.h" | ||
29 | #include "xfs_ag.h" | ||
30 | #include "xfs_mount.h" | 28 | #include "xfs_mount.h" |
31 | #include "xfs_inode.h" | 29 | #include "xfs_inode.h" |
32 | #include "xfs_itable.h" | 30 | #include "xfs_itable.h" |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index afcf3c926565..c980e2a5086b 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -21,8 +21,6 @@ | |||
21 | #include "xfs_format.h" | 21 | #include "xfs_format.h" |
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
27 | #include "xfs_inode.h" | 25 | #include "xfs_inode.h" |
28 | #include "xfs_btree.h" | 26 | #include "xfs_btree.h" |
@@ -38,7 +36,6 @@ | |||
38 | #include "xfs_quota.h" | 36 | #include "xfs_quota.h" |
39 | #include "xfs_dquot_item.h" | 37 | #include "xfs_dquot_item.h" |
40 | #include "xfs_dquot.h" | 38 | #include "xfs_dquot.h" |
41 | #include "xfs_dinode.h" | ||
42 | 39 | ||
43 | 40 | ||
44 | #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ | 41 | #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ |
@@ -52,7 +49,6 @@ xfs_iomap_eof_align_last_fsb( | |||
52 | xfs_extlen_t extsize, | 49 | xfs_extlen_t extsize, |
53 | xfs_fileoff_t *last_fsb) | 50 | xfs_fileoff_t *last_fsb) |
54 | { | 51 | { |
55 | xfs_fileoff_t new_last_fsb = 0; | ||
56 | xfs_extlen_t align = 0; | 52 | xfs_extlen_t align = 0; |
57 | int eof, error; | 53 | int eof, error; |
58 | 54 | ||
@@ -70,8 +66,8 @@ xfs_iomap_eof_align_last_fsb( | |||
70 | else if (mp->m_dalign) | 66 | else if (mp->m_dalign) |
71 | align = mp->m_dalign; | 67 | align = mp->m_dalign; |
72 | 68 | ||
73 | if (align && XFS_ISIZE(ip) >= XFS_FSB_TO_B(mp, align)) | 69 | if (align && XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, align)) |
74 | new_last_fsb = roundup_64(*last_fsb, align); | 70 | align = 0; |
75 | } | 71 | } |
76 | 72 | ||
77 | /* | 73 | /* |
@@ -79,14 +75,14 @@ xfs_iomap_eof_align_last_fsb( | |||
79 | * (when file on a real-time subvolume or has di_extsize hint). | 75 | * (when file on a real-time subvolume or has di_extsize hint). |
80 | */ | 76 | */ |
81 | if (extsize) { | 77 | if (extsize) { |
82 | if (new_last_fsb) | 78 | if (align) |
83 | align = roundup_64(new_last_fsb, extsize); | 79 | align = roundup_64(align, extsize); |
84 | else | 80 | else |
85 | align = extsize; | 81 | align = extsize; |
86 | new_last_fsb = roundup_64(*last_fsb, align); | ||
87 | } | 82 | } |
88 | 83 | ||
89 | if (new_last_fsb) { | 84 | if (align) { |
85 | xfs_fileoff_t new_last_fsb = roundup_64(*last_fsb, align); | ||
90 | error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof); | 86 | error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof); |
91 | if (error) | 87 | if (error) |
92 | return error; | 88 | return error; |
@@ -264,7 +260,6 @@ xfs_iomap_eof_want_preallocate( | |||
264 | { | 260 | { |
265 | xfs_fileoff_t start_fsb; | 261 | xfs_fileoff_t start_fsb; |
266 | xfs_filblks_t count_fsb; | 262 | xfs_filblks_t count_fsb; |
267 | xfs_fsblock_t firstblock; | ||
268 | int n, error, imaps; | 263 | int n, error, imaps; |
269 | int found_delalloc = 0; | 264 | int found_delalloc = 0; |
270 | 265 | ||
@@ -289,7 +284,6 @@ xfs_iomap_eof_want_preallocate( | |||
289 | count_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); | 284 | count_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); |
290 | while (count_fsb > 0) { | 285 | while (count_fsb > 0) { |
291 | imaps = nimaps; | 286 | imaps = nimaps; |
292 | firstblock = NULLFSBLOCK; | ||
293 | error = xfs_bmapi_read(ip, start_fsb, count_fsb, imap, &imaps, | 287 | error = xfs_bmapi_read(ip, start_fsb, count_fsb, imap, &imaps, |
294 | 0); | 288 | 0); |
295 | if (error) | 289 | if (error) |
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ec6dcdc181ee..c50311cae1b1 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c | |||
@@ -21,8 +21,6 @@ | |||
21 | #include "xfs_format.h" | 21 | #include "xfs_format.h" |
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
27 | #include "xfs_da_format.h" | 25 | #include "xfs_da_format.h" |
28 | #include "xfs_inode.h" | 26 | #include "xfs_inode.h" |
@@ -37,8 +35,7 @@ | |||
37 | #include "xfs_icache.h" | 35 | #include "xfs_icache.h" |
38 | #include "xfs_symlink.h" | 36 | #include "xfs_symlink.h" |
39 | #include "xfs_da_btree.h" | 37 | #include "xfs_da_btree.h" |
40 | #include "xfs_dir2_priv.h" | 38 | #include "xfs_dir2.h" |
41 | #include "xfs_dinode.h" | ||
42 | #include "xfs_trans_space.h" | 39 | #include "xfs_trans_space.h" |
43 | 40 | ||
44 | #include <linux/capability.h> | 41 | #include <linux/capability.h> |
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 894924a5129b..82e314258f73 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
@@ -21,9 +21,6 @@ | |||
21 | #include "xfs_format.h" | 21 | #include "xfs_format.h" |
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_inum.h" | ||
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
28 | #include "xfs_inode.h" | 25 | #include "xfs_inode.h" |
29 | #include "xfs_btree.h" | 26 | #include "xfs_btree.h" |
@@ -33,7 +30,6 @@ | |||
33 | #include "xfs_error.h" | 30 | #include "xfs_error.h" |
34 | #include "xfs_trace.h" | 31 | #include "xfs_trace.h" |
35 | #include "xfs_icache.h" | 32 | #include "xfs_icache.h" |
36 | #include "xfs_dinode.h" | ||
37 | 33 | ||
38 | STATIC int | 34 | STATIC int |
39 | xfs_internal_inum( | 35 | xfs_internal_inum( |
@@ -352,7 +348,6 @@ xfs_bulkstat( | |||
352 | int *done) /* 1 if there are more stats to get */ | 348 | int *done) /* 1 if there are more stats to get */ |
353 | { | 349 | { |
354 | xfs_buf_t *agbp; /* agi header buffer */ | 350 | xfs_buf_t *agbp; /* agi header buffer */ |
355 | xfs_agi_t *agi; /* agi header data */ | ||
356 | xfs_agino_t agino; /* inode # in allocation group */ | 351 | xfs_agino_t agino; /* inode # in allocation group */ |
357 | xfs_agnumber_t agno; /* allocation group number */ | 352 | xfs_agnumber_t agno; /* allocation group number */ |
358 | xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ | 353 | xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ |
@@ -403,7 +398,6 @@ xfs_bulkstat( | |||
403 | error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); | 398 | error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); |
404 | if (error) | 399 | if (error) |
405 | break; | 400 | break; |
406 | agi = XFS_BUF_TO_AGI(agbp); | ||
407 | /* | 401 | /* |
408 | * Allocate and initialize a btree cursor for ialloc btree. | 402 | * Allocate and initialize a btree cursor for ialloc btree. |
409 | */ | 403 | */ |
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 6a51619d8690..c31d2c2eadc4 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h | |||
@@ -384,4 +384,10 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y) | |||
384 | #endif /* XFS_WARN */ | 384 | #endif /* XFS_WARN */ |
385 | #endif /* DEBUG */ | 385 | #endif /* DEBUG */ |
386 | 386 | ||
387 | #ifdef CONFIG_XFS_RT | ||
388 | #define XFS_IS_REALTIME_INODE(ip) ((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) | ||
389 | #else | ||
390 | #define XFS_IS_REALTIME_INODE(ip) (0) | ||
391 | #endif | ||
392 | |||
387 | #endif /* __XFS_LINUX__ */ | 393 | #endif /* __XFS_LINUX__ */ |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index fe88ef67f93a..e408bf5a3ff7 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -21,8 +21,6 @@ | |||
21 | #include "xfs_format.h" | 21 | #include "xfs_format.h" |
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
27 | #include "xfs_error.h" | 25 | #include "xfs_error.h" |
28 | #include "xfs_trans.h" | 26 | #include "xfs_trans.h" |
@@ -1031,7 +1029,7 @@ xfs_log_need_covered(xfs_mount_t *mp) | |||
1031 | struct xlog *log = mp->m_log; | 1029 | struct xlog *log = mp->m_log; |
1032 | int needed = 0; | 1030 | int needed = 0; |
1033 | 1031 | ||
1034 | if (!xfs_fs_writable(mp)) | 1032 | if (!xfs_fs_writable(mp, SB_FREEZE_WRITE)) |
1035 | return 0; | 1033 | return 0; |
1036 | 1034 | ||
1037 | if (!xlog_cil_empty(log)) | 1035 | if (!xlog_cil_empty(log)) |
@@ -1808,6 +1806,8 @@ xlog_sync( | |||
1808 | XFS_BUF_ZEROFLAGS(bp); | 1806 | XFS_BUF_ZEROFLAGS(bp); |
1809 | XFS_BUF_ASYNC(bp); | 1807 | XFS_BUF_ASYNC(bp); |
1810 | bp->b_flags |= XBF_SYNCIO; | 1808 | bp->b_flags |= XBF_SYNCIO; |
1809 | /* use high priority completion wq */ | ||
1810 | bp->b_ioend_wq = log->l_mp->m_log_workqueue; | ||
1811 | 1811 | ||
1812 | if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) { | 1812 | if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) { |
1813 | bp->b_flags |= XBF_FUA; | 1813 | bp->b_flags |= XBF_FUA; |
@@ -1856,6 +1856,8 @@ xlog_sync( | |||
1856 | bp->b_flags |= XBF_SYNCIO; | 1856 | bp->b_flags |= XBF_SYNCIO; |
1857 | if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) | 1857 | if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) |
1858 | bp->b_flags |= XBF_FUA; | 1858 | bp->b_flags |= XBF_FUA; |
1859 | /* use high priority completion wq */ | ||
1860 | bp->b_ioend_wq = log->l_mp->m_log_workqueue; | ||
1859 | 1861 | ||
1860 | ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); | 1862 | ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); |
1861 | ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); | 1863 | ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index f506c457011e..45cc0ce18adf 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -17,11 +17,10 @@ | |||
17 | 17 | ||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_format.h" | ||
20 | #include "xfs_log_format.h" | 21 | #include "xfs_log_format.h" |
21 | #include "xfs_shared.h" | 22 | #include "xfs_shared.h" |
22 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
23 | #include "xfs_sb.h" | ||
24 | #include "xfs_ag.h" | ||
25 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
26 | #include "xfs_error.h" | 25 | #include "xfs_error.h" |
27 | #include "xfs_alloc.h" | 26 | #include "xfs_alloc.h" |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 00cd7f3a8f59..a5a945fc3bdc 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -22,11 +22,10 @@ | |||
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_bit.h" | 24 | #include "xfs_bit.h" |
25 | #include "xfs_inum.h" | ||
26 | #include "xfs_sb.h" | 25 | #include "xfs_sb.h" |
27 | #include "xfs_ag.h" | ||
28 | #include "xfs_mount.h" | 26 | #include "xfs_mount.h" |
29 | #include "xfs_da_format.h" | 27 | #include "xfs_da_format.h" |
28 | #include "xfs_da_btree.h" | ||
30 | #include "xfs_inode.h" | 29 | #include "xfs_inode.h" |
31 | #include "xfs_trans.h" | 30 | #include "xfs_trans.h" |
32 | #include "xfs_log.h" | 31 | #include "xfs_log.h" |
@@ -42,7 +41,6 @@ | |||
42 | #include "xfs_trace.h" | 41 | #include "xfs_trace.h" |
43 | #include "xfs_icache.h" | 42 | #include "xfs_icache.h" |
44 | #include "xfs_bmap_btree.h" | 43 | #include "xfs_bmap_btree.h" |
45 | #include "xfs_dinode.h" | ||
46 | #include "xfs_error.h" | 44 | #include "xfs_error.h" |
47 | #include "xfs_dir2.h" | 45 | #include "xfs_dir2.h" |
48 | 46 | ||
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index 63ca2f0420b1..d8b67547ab34 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c | |||
@@ -17,10 +17,9 @@ | |||
17 | 17 | ||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_format.h" | ||
20 | #include "xfs_log_format.h" | 21 | #include "xfs_log_format.h" |
21 | #include "xfs_trans_resv.h" | 22 | #include "xfs_trans_resv.h" |
22 | #include "xfs_sb.h" | ||
23 | #include "xfs_ag.h" | ||
24 | #include "xfs_mount.h" | 23 | #include "xfs_mount.h" |
25 | 24 | ||
26 | /* | 25 | /* |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 51435dbce9c4..d3d38836f87f 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -22,11 +22,10 @@ | |||
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_bit.h" | 24 | #include "xfs_bit.h" |
25 | #include "xfs_inum.h" | ||
26 | #include "xfs_sb.h" | 25 | #include "xfs_sb.h" |
27 | #include "xfs_ag.h" | ||
28 | #include "xfs_mount.h" | 26 | #include "xfs_mount.h" |
29 | #include "xfs_da_format.h" | 27 | #include "xfs_da_format.h" |
28 | #include "xfs_da_btree.h" | ||
30 | #include "xfs_inode.h" | 29 | #include "xfs_inode.h" |
31 | #include "xfs_dir2.h" | 30 | #include "xfs_dir2.h" |
32 | #include "xfs_ialloc.h" | 31 | #include "xfs_ialloc.h" |
@@ -41,7 +40,6 @@ | |||
41 | #include "xfs_fsops.h" | 40 | #include "xfs_fsops.h" |
42 | #include "xfs_trace.h" | 41 | #include "xfs_trace.h" |
43 | #include "xfs_icache.h" | 42 | #include "xfs_icache.h" |
44 | #include "xfs_dinode.h" | ||
45 | #include "xfs_sysfs.h" | 43 | #include "xfs_sysfs.h" |
46 | 44 | ||
47 | 45 | ||
@@ -1074,11 +1072,23 @@ xfs_unmountfs( | |||
1074 | xfs_sysfs_del(&mp->m_kobj); | 1072 | xfs_sysfs_del(&mp->m_kobj); |
1075 | } | 1073 | } |
1076 | 1074 | ||
1077 | int | 1075 | /* |
1078 | xfs_fs_writable(xfs_mount_t *mp) | 1076 | * Determine whether modifications can proceed. The caller specifies the minimum |
1077 | * freeze level for which modifications should not be allowed. This allows | ||
1078 | * certain operations to proceed while the freeze sequence is in progress, if | ||
1079 | * necessary. | ||
1080 | */ | ||
1081 | bool | ||
1082 | xfs_fs_writable( | ||
1083 | struct xfs_mount *mp, | ||
1084 | int level) | ||
1079 | { | 1085 | { |
1080 | return !(mp->m_super->s_writers.frozen || XFS_FORCED_SHUTDOWN(mp) || | 1086 | ASSERT(level > SB_UNFROZEN); |
1081 | (mp->m_flags & XFS_MOUNT_RDONLY)); | 1087 | if ((mp->m_super->s_writers.frozen >= level) || |
1088 | XFS_FORCED_SHUTDOWN(mp) || (mp->m_flags & XFS_MOUNT_RDONLY)) | ||
1089 | return false; | ||
1090 | |||
1091 | return true; | ||
1082 | } | 1092 | } |
1083 | 1093 | ||
1084 | /* | 1094 | /* |
@@ -1086,9 +1096,9 @@ xfs_fs_writable(xfs_mount_t *mp) | |||
1086 | * | 1096 | * |
1087 | * Sync the superblock counters to disk. | 1097 | * Sync the superblock counters to disk. |
1088 | * | 1098 | * |
1089 | * Note this code can be called during the process of freezing, so | 1099 | * Note this code can be called during the process of freezing, so we use the |
1090 | * we may need to use the transaction allocator which does not | 1100 | * transaction allocator that does not block when the transaction subsystem is |
1091 | * block when the transaction subsystem is in its frozen state. | 1101 | * in its frozen state. |
1092 | */ | 1102 | */ |
1093 | int | 1103 | int |
1094 | xfs_log_sbcount(xfs_mount_t *mp) | 1104 | xfs_log_sbcount(xfs_mount_t *mp) |
@@ -1096,7 +1106,8 @@ xfs_log_sbcount(xfs_mount_t *mp) | |||
1096 | xfs_trans_t *tp; | 1106 | xfs_trans_t *tp; |
1097 | int error; | 1107 | int error; |
1098 | 1108 | ||
1099 | if (!xfs_fs_writable(mp)) | 1109 | /* allow this to proceed during the freeze sequence... */ |
1110 | if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE)) | ||
1100 | return 0; | 1111 | return 0; |
1101 | 1112 | ||
1102 | xfs_icsb_sync_counters(mp, 0); | 1113 | xfs_icsb_sync_counters(mp, 0); |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index b0447c86e7e2..22ccf69d4d3c 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -168,6 +168,7 @@ typedef struct xfs_mount { | |||
168 | /* low free space thresholds */ | 168 | /* low free space thresholds */ |
169 | struct xfs_kobj m_kobj; | 169 | struct xfs_kobj m_kobj; |
170 | 170 | ||
171 | struct workqueue_struct *m_buf_workqueue; | ||
171 | struct workqueue_struct *m_data_workqueue; | 172 | struct workqueue_struct *m_data_workqueue; |
172 | struct workqueue_struct *m_unwritten_workqueue; | 173 | struct workqueue_struct *m_unwritten_workqueue; |
173 | struct workqueue_struct *m_cil_workqueue; | 174 | struct workqueue_struct *m_cil_workqueue; |
@@ -320,10 +321,7 @@ typedef struct xfs_mod_sb { | |||
320 | 321 | ||
321 | /* | 322 | /* |
322 | * Per-ag incore structure, copies of information in agf and agi, to improve the | 323 | * Per-ag incore structure, copies of information in agf and agi, to improve the |
323 | * performance of allocation group selection. This is defined for the kernel | 324 | * performance of allocation group selection. |
324 | * only, and hence is defined here instead of in xfs_ag.h. You need the struct | ||
325 | * xfs_mount to be defined to look up a xfs_perag anyway (via mp->m_perag_tree), | ||
326 | * so this doesn't introduce any strange header file dependencies. | ||
327 | */ | 325 | */ |
328 | typedef struct xfs_perag { | 326 | typedef struct xfs_perag { |
329 | struct xfs_mount *pag_mount; /* owner filesystem */ | 327 | struct xfs_mount *pag_mount; /* owner filesystem */ |
@@ -384,7 +382,7 @@ extern int xfs_mount_log_sb(xfs_mount_t *, __int64_t); | |||
384 | extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); | 382 | extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); |
385 | extern int xfs_readsb(xfs_mount_t *, int); | 383 | extern int xfs_readsb(xfs_mount_t *, int); |
386 | extern void xfs_freesb(xfs_mount_t *); | 384 | extern void xfs_freesb(xfs_mount_t *); |
387 | extern int xfs_fs_writable(xfs_mount_t *); | 385 | extern bool xfs_fs_writable(struct xfs_mount *mp, int level); |
388 | extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); | 386 | extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); |
389 | 387 | ||
390 | extern int xfs_dev_is_read_only(struct xfs_mount *, char *); | 388 | extern int xfs_dev_is_read_only(struct xfs_mount *, char *); |
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index d68f23021af3..79fb19dd9c83 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_bit.h" | 24 | #include "xfs_bit.h" |
25 | #include "xfs_sb.h" | 25 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | 26 | #include "xfs_mount.h" |
28 | #include "xfs_inode.h" | 27 | #include "xfs_inode.h" |
29 | #include "xfs_ialloc.h" | 28 | #include "xfs_ialloc.h" |
@@ -38,7 +37,6 @@ | |||
38 | #include "xfs_trace.h" | 37 | #include "xfs_trace.h" |
39 | #include "xfs_icache.h" | 38 | #include "xfs_icache.h" |
40 | #include "xfs_cksum.h" | 39 | #include "xfs_cksum.h" |
41 | #include "xfs_dinode.h" | ||
42 | 40 | ||
43 | /* | 41 | /* |
44 | * The global quota manager. There is only one of these for the entire | 42 | * The global quota manager. There is only one of these for the entire |
@@ -1749,23 +1747,21 @@ xfs_qm_vop_dqalloc( | |||
1749 | xfs_iunlock(ip, lockflags); | 1747 | xfs_iunlock(ip, lockflags); |
1750 | if (O_udqpp) | 1748 | if (O_udqpp) |
1751 | *O_udqpp = uq; | 1749 | *O_udqpp = uq; |
1752 | else if (uq) | 1750 | else |
1753 | xfs_qm_dqrele(uq); | 1751 | xfs_qm_dqrele(uq); |
1754 | if (O_gdqpp) | 1752 | if (O_gdqpp) |
1755 | *O_gdqpp = gq; | 1753 | *O_gdqpp = gq; |
1756 | else if (gq) | 1754 | else |
1757 | xfs_qm_dqrele(gq); | 1755 | xfs_qm_dqrele(gq); |
1758 | if (O_pdqpp) | 1756 | if (O_pdqpp) |
1759 | *O_pdqpp = pq; | 1757 | *O_pdqpp = pq; |
1760 | else if (pq) | 1758 | else |
1761 | xfs_qm_dqrele(pq); | 1759 | xfs_qm_dqrele(pq); |
1762 | return 0; | 1760 | return 0; |
1763 | 1761 | ||
1764 | error_rele: | 1762 | error_rele: |
1765 | if (gq) | 1763 | xfs_qm_dqrele(gq); |
1766 | xfs_qm_dqrele(gq); | 1764 | xfs_qm_dqrele(uq); |
1767 | if (uq) | ||
1768 | xfs_qm_dqrele(uq); | ||
1769 | return error; | 1765 | return error; |
1770 | } | 1766 | } |
1771 | 1767 | ||
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index 2c61e61b0205..3e52d5de7ae1 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c | |||
@@ -20,8 +20,6 @@ | |||
20 | #include "xfs_format.h" | 20 | #include "xfs_format.h" |
21 | #include "xfs_log_format.h" | 21 | #include "xfs_log_format.h" |
22 | #include "xfs_trans_resv.h" | 22 | #include "xfs_trans_resv.h" |
23 | #include "xfs_sb.h" | ||
24 | #include "xfs_ag.h" | ||
25 | #include "xfs_quota.h" | 23 | #include "xfs_quota.h" |
26 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
27 | #include "xfs_inode.h" | 25 | #include "xfs_inode.h" |
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 80f2d77d929a..74fca68e43b6 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c | |||
@@ -26,7 +26,6 @@ | |||
26 | #include "xfs_trans_resv.h" | 26 | #include "xfs_trans_resv.h" |
27 | #include "xfs_bit.h" | 27 | #include "xfs_bit.h" |
28 | #include "xfs_sb.h" | 28 | #include "xfs_sb.h" |
29 | #include "xfs_ag.h" | ||
30 | #include "xfs_mount.h" | 29 | #include "xfs_mount.h" |
31 | #include "xfs_inode.h" | 30 | #include "xfs_inode.h" |
32 | #include "xfs_trans.h" | 31 | #include "xfs_trans.h" |
@@ -784,19 +783,21 @@ xfs_qm_log_quotaoff( | |||
784 | { | 783 | { |
785 | xfs_trans_t *tp; | 784 | xfs_trans_t *tp; |
786 | int error; | 785 | int error; |
787 | xfs_qoff_logitem_t *qoffi=NULL; | 786 | xfs_qoff_logitem_t *qoffi; |
788 | uint oldsbqflag=0; | 787 | |
788 | *qoffstartp = NULL; | ||
789 | 789 | ||
790 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF); | 790 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF); |
791 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_quotaoff, 0, 0); | 791 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_quotaoff, 0, 0); |
792 | if (error) | 792 | if (error) { |
793 | goto error0; | 793 | xfs_trans_cancel(tp, 0); |
794 | goto out; | ||
795 | } | ||
794 | 796 | ||
795 | qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT); | 797 | qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT); |
796 | xfs_trans_log_quotaoff_item(tp, qoffi); | 798 | xfs_trans_log_quotaoff_item(tp, qoffi); |
797 | 799 | ||
798 | spin_lock(&mp->m_sb_lock); | 800 | spin_lock(&mp->m_sb_lock); |
799 | oldsbqflag = mp->m_sb.sb_qflags; | ||
800 | mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL; | 801 | mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL; |
801 | spin_unlock(&mp->m_sb_lock); | 802 | spin_unlock(&mp->m_sb_lock); |
802 | 803 | ||
@@ -809,19 +810,11 @@ xfs_qm_log_quotaoff( | |||
809 | */ | 810 | */ |
810 | xfs_trans_set_sync(tp); | 811 | xfs_trans_set_sync(tp); |
811 | error = xfs_trans_commit(tp, 0); | 812 | error = xfs_trans_commit(tp, 0); |
813 | if (error) | ||
814 | goto out; | ||
812 | 815 | ||
813 | error0: | ||
814 | if (error) { | ||
815 | xfs_trans_cancel(tp, 0); | ||
816 | /* | ||
817 | * No one else is modifying sb_qflags, so this is OK. | ||
818 | * We still hold the quotaofflock. | ||
819 | */ | ||
820 | spin_lock(&mp->m_sb_lock); | ||
821 | mp->m_sb.sb_qflags = oldsbqflag; | ||
822 | spin_unlock(&mp->m_sb_lock); | ||
823 | } | ||
824 | *qoffstartp = qoffi; | 816 | *qoffstartp = qoffi; |
817 | out: | ||
825 | return error; | 818 | return error; |
826 | } | 819 | } |
827 | 820 | ||
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index b238027df987..7542bbeca6a1 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c | |||
@@ -19,8 +19,6 @@ | |||
19 | #include "xfs_format.h" | 19 | #include "xfs_format.h" |
20 | #include "xfs_log_format.h" | 20 | #include "xfs_log_format.h" |
21 | #include "xfs_trans_resv.h" | 21 | #include "xfs_trans_resv.h" |
22 | #include "xfs_sb.h" | ||
23 | #include "xfs_ag.h" | ||
24 | #include "xfs_mount.h" | 22 | #include "xfs_mount.h" |
25 | #include "xfs_inode.h" | 23 | #include "xfs_inode.h" |
26 | #include "xfs_quota.h" | 24 | #include "xfs_quota.h" |
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index e1175ea9b551..f2079b6911cc 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c | |||
@@ -22,8 +22,6 @@ | |||
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_bit.h" | 24 | #include "xfs_bit.h" |
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
28 | #include "xfs_inode.h" | 26 | #include "xfs_inode.h" |
29 | #include "xfs_bmap.h" | 27 | #include "xfs_bmap.h" |
@@ -36,7 +34,6 @@ | |||
36 | #include "xfs_trace.h" | 34 | #include "xfs_trace.h" |
37 | #include "xfs_buf.h" | 35 | #include "xfs_buf.h" |
38 | #include "xfs_icache.h" | 36 | #include "xfs_icache.h" |
39 | #include "xfs_dinode.h" | ||
40 | #include "xfs_rtalloc.h" | 37 | #include "xfs_rtalloc.h" |
41 | 38 | ||
42 | 39 | ||
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 206b97fd1d8a..19cbda196369 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -21,9 +21,7 @@ | |||
21 | #include "xfs_format.h" | 21 | #include "xfs_format.h" |
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_inum.h" | ||
25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
28 | #include "xfs_da_format.h" | 26 | #include "xfs_da_format.h" |
29 | #include "xfs_inode.h" | 27 | #include "xfs_inode.h" |
@@ -44,7 +42,6 @@ | |||
44 | #include "xfs_icache.h" | 42 | #include "xfs_icache.h" |
45 | #include "xfs_trace.h" | 43 | #include "xfs_trace.h" |
46 | #include "xfs_icreate_item.h" | 44 | #include "xfs_icreate_item.h" |
47 | #include "xfs_dinode.h" | ||
48 | #include "xfs_filestream.h" | 45 | #include "xfs_filestream.h" |
49 | #include "xfs_quota.h" | 46 | #include "xfs_quota.h" |
50 | #include "xfs_sysfs.h" | 47 | #include "xfs_sysfs.h" |
@@ -796,8 +793,7 @@ xfs_open_devices( | |||
796 | out_free_ddev_targ: | 793 | out_free_ddev_targ: |
797 | xfs_free_buftarg(mp, mp->m_ddev_targp); | 794 | xfs_free_buftarg(mp, mp->m_ddev_targp); |
798 | out_close_rtdev: | 795 | out_close_rtdev: |
799 | if (rtdev) | 796 | xfs_blkdev_put(rtdev); |
800 | xfs_blkdev_put(rtdev); | ||
801 | out_close_logdev: | 797 | out_close_logdev: |
802 | if (logdev && logdev != ddev) | 798 | if (logdev && logdev != ddev) |
803 | xfs_blkdev_put(logdev); | 799 | xfs_blkdev_put(logdev); |
@@ -842,10 +838,15 @@ STATIC int | |||
842 | xfs_init_mount_workqueues( | 838 | xfs_init_mount_workqueues( |
843 | struct xfs_mount *mp) | 839 | struct xfs_mount *mp) |
844 | { | 840 | { |
841 | mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s", | ||
842 | WQ_MEM_RECLAIM|WQ_FREEZABLE, 1, mp->m_fsname); | ||
843 | if (!mp->m_buf_workqueue) | ||
844 | goto out; | ||
845 | |||
845 | mp->m_data_workqueue = alloc_workqueue("xfs-data/%s", | 846 | mp->m_data_workqueue = alloc_workqueue("xfs-data/%s", |
846 | WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); | 847 | WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); |
847 | if (!mp->m_data_workqueue) | 848 | if (!mp->m_data_workqueue) |
848 | goto out; | 849 | goto out_destroy_buf; |
849 | 850 | ||
850 | mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", | 851 | mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", |
851 | WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); | 852 | WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); |
@@ -863,7 +864,7 @@ xfs_init_mount_workqueues( | |||
863 | goto out_destroy_cil; | 864 | goto out_destroy_cil; |
864 | 865 | ||
865 | mp->m_log_workqueue = alloc_workqueue("xfs-log/%s", | 866 | mp->m_log_workqueue = alloc_workqueue("xfs-log/%s", |
866 | WQ_FREEZABLE, 0, mp->m_fsname); | 867 | WQ_FREEZABLE|WQ_HIGHPRI, 0, mp->m_fsname); |
867 | if (!mp->m_log_workqueue) | 868 | if (!mp->m_log_workqueue) |
868 | goto out_destroy_reclaim; | 869 | goto out_destroy_reclaim; |
869 | 870 | ||
@@ -884,6 +885,8 @@ out_destroy_unwritten: | |||
884 | destroy_workqueue(mp->m_unwritten_workqueue); | 885 | destroy_workqueue(mp->m_unwritten_workqueue); |
885 | out_destroy_data_iodone_queue: | 886 | out_destroy_data_iodone_queue: |
886 | destroy_workqueue(mp->m_data_workqueue); | 887 | destroy_workqueue(mp->m_data_workqueue); |
888 | out_destroy_buf: | ||
889 | destroy_workqueue(mp->m_buf_workqueue); | ||
887 | out: | 890 | out: |
888 | return -ENOMEM; | 891 | return -ENOMEM; |
889 | } | 892 | } |
@@ -898,6 +901,7 @@ xfs_destroy_mount_workqueues( | |||
898 | destroy_workqueue(mp->m_cil_workqueue); | 901 | destroy_workqueue(mp->m_cil_workqueue); |
899 | destroy_workqueue(mp->m_data_workqueue); | 902 | destroy_workqueue(mp->m_data_workqueue); |
900 | destroy_workqueue(mp->m_unwritten_workqueue); | 903 | destroy_workqueue(mp->m_unwritten_workqueue); |
904 | destroy_workqueue(mp->m_buf_workqueue); | ||
901 | } | 905 | } |
902 | 906 | ||
903 | /* | 907 | /* |
@@ -1000,7 +1004,6 @@ xfs_fs_evict_inode( | |||
1000 | clear_inode(inode); | 1004 | clear_inode(inode); |
1001 | XFS_STATS_INC(vn_rele); | 1005 | XFS_STATS_INC(vn_rele); |
1002 | XFS_STATS_INC(vn_remove); | 1006 | XFS_STATS_INC(vn_remove); |
1003 | XFS_STATS_DEC(vn_active); | ||
1004 | 1007 | ||
1005 | xfs_inactive(ip); | 1008 | xfs_inactive(ip); |
1006 | } | 1009 | } |
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 02ae62a998e0..25791df6f638 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c | |||
@@ -23,8 +23,6 @@ | |||
23 | #include "xfs_log_format.h" | 23 | #include "xfs_log_format.h" |
24 | #include "xfs_trans_resv.h" | 24 | #include "xfs_trans_resv.h" |
25 | #include "xfs_bit.h" | 25 | #include "xfs_bit.h" |
26 | #include "xfs_sb.h" | ||
27 | #include "xfs_ag.h" | ||
28 | #include "xfs_mount.h" | 26 | #include "xfs_mount.h" |
29 | #include "xfs_da_format.h" | 27 | #include "xfs_da_format.h" |
30 | #include "xfs_da_btree.h" | 28 | #include "xfs_da_btree.h" |
@@ -42,7 +40,6 @@ | |||
42 | #include "xfs_symlink.h" | 40 | #include "xfs_symlink.h" |
43 | #include "xfs_trans.h" | 41 | #include "xfs_trans.h" |
44 | #include "xfs_log.h" | 42 | #include "xfs_log.h" |
45 | #include "xfs_dinode.h" | ||
46 | 43 | ||
47 | /* ----- Kernel only functions below ----- */ | 44 | /* ----- Kernel only functions below ----- */ |
48 | STATIC int | 45 | STATIC int |
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index 1e85bcd0e418..13a029806805 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c | |||
@@ -21,8 +21,6 @@ | |||
21 | #include "xfs_format.h" | 21 | #include "xfs_format.h" |
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
27 | #include "xfs_da_format.h" | 25 | #include "xfs_da_format.h" |
28 | #include "xfs_inode.h" | 26 | #include "xfs_inode.h" |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 30e8e3410955..fa3135b9bf04 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -22,8 +22,6 @@ | |||
22 | #include "xfs_format.h" | 22 | #include "xfs_format.h" |
23 | #include "xfs_log_format.h" | 23 | #include "xfs_log_format.h" |
24 | #include "xfs_trans_resv.h" | 24 | #include "xfs_trans_resv.h" |
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
28 | #include "xfs_inode.h" | 26 | #include "xfs_inode.h" |
29 | #include "xfs_extent_busy.h" | 27 | #include "xfs_extent_busy.h" |
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 859482f53b5a..573aefb5a573 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c | |||
@@ -18,10 +18,9 @@ | |||
18 | */ | 18 | */ |
19 | #include "xfs.h" | 19 | #include "xfs.h" |
20 | #include "xfs_fs.h" | 20 | #include "xfs_fs.h" |
21 | #include "xfs_format.h" | ||
21 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
22 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
23 | #include "xfs_sb.h" | ||
24 | #include "xfs_ag.h" | ||
25 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
26 | #include "xfs_trans.h" | 25 | #include "xfs_trans.h" |
27 | #include "xfs_trans_priv.h" | 26 | #include "xfs_trans_priv.h" |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index e2b2216b1635..0a4d4ab6d9a9 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -21,8 +21,6 @@ | |||
21 | #include "xfs_format.h" | 21 | #include "xfs_format.h" |
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
27 | #include "xfs_inode.h" | 25 | #include "xfs_inode.h" |
28 | #include "xfs_trans.h" | 26 | #include "xfs_trans.h" |
@@ -229,13 +227,6 @@ xfs_trans_getsb(xfs_trans_t *tp, | |||
229 | return bp; | 227 | return bp; |
230 | } | 228 | } |
231 | 229 | ||
232 | #ifdef DEBUG | ||
233 | xfs_buftarg_t *xfs_error_target; | ||
234 | int xfs_do_error; | ||
235 | int xfs_req_num; | ||
236 | int xfs_error_mod = 33; | ||
237 | #endif | ||
238 | |||
239 | /* | 230 | /* |
240 | * Get and lock the buffer for the caller if it is not already | 231 | * Get and lock the buffer for the caller if it is not already |
241 | * locked within the given transaction. If it has not yet been | 232 | * locked within the given transaction. If it has not yet been |
@@ -257,46 +248,11 @@ xfs_trans_read_buf_map( | |||
257 | struct xfs_buf **bpp, | 248 | struct xfs_buf **bpp, |
258 | const struct xfs_buf_ops *ops) | 249 | const struct xfs_buf_ops *ops) |
259 | { | 250 | { |
260 | xfs_buf_t *bp; | 251 | struct xfs_buf *bp = NULL; |
261 | xfs_buf_log_item_t *bip; | 252 | struct xfs_buf_log_item *bip; |
262 | int error; | 253 | int error; |
263 | 254 | ||
264 | *bpp = NULL; | 255 | *bpp = NULL; |
265 | if (!tp) { | ||
266 | bp = xfs_buf_read_map(target, map, nmaps, flags, ops); | ||
267 | if (!bp) | ||
268 | return (flags & XBF_TRYLOCK) ? | ||
269 | -EAGAIN : -ENOMEM; | ||
270 | |||
271 | if (bp->b_error) { | ||
272 | error = bp->b_error; | ||
273 | xfs_buf_ioerror_alert(bp, __func__); | ||
274 | XFS_BUF_UNDONE(bp); | ||
275 | xfs_buf_stale(bp); | ||
276 | xfs_buf_relse(bp); | ||
277 | |||
278 | /* bad CRC means corrupted metadata */ | ||
279 | if (error == -EFSBADCRC) | ||
280 | error = -EFSCORRUPTED; | ||
281 | return error; | ||
282 | } | ||
283 | #ifdef DEBUG | ||
284 | if (xfs_do_error) { | ||
285 | if (xfs_error_target == target) { | ||
286 | if (((xfs_req_num++) % xfs_error_mod) == 0) { | ||
287 | xfs_buf_relse(bp); | ||
288 | xfs_debug(mp, "Returning error!"); | ||
289 | return -EIO; | ||
290 | } | ||
291 | } | ||
292 | } | ||
293 | #endif | ||
294 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
295 | goto shutdown_abort; | ||
296 | *bpp = bp; | ||
297 | return 0; | ||
298 | } | ||
299 | |||
300 | /* | 256 | /* |
301 | * If we find the buffer in the cache with this transaction | 257 | * If we find the buffer in the cache with this transaction |
302 | * pointer in its b_fsprivate2 field, then we know we already | 258 | * pointer in its b_fsprivate2 field, then we know we already |
@@ -305,49 +261,24 @@ xfs_trans_read_buf_map( | |||
305 | * If the buffer is not yet read in, then we read it in, increment | 261 | * If the buffer is not yet read in, then we read it in, increment |
306 | * the lock recursion count, and return it to the caller. | 262 | * the lock recursion count, and return it to the caller. |
307 | */ | 263 | */ |
308 | bp = xfs_trans_buf_item_match(tp, target, map, nmaps); | 264 | if (tp) |
309 | if (bp != NULL) { | 265 | bp = xfs_trans_buf_item_match(tp, target, map, nmaps); |
266 | if (bp) { | ||
310 | ASSERT(xfs_buf_islocked(bp)); | 267 | ASSERT(xfs_buf_islocked(bp)); |
311 | ASSERT(bp->b_transp == tp); | 268 | ASSERT(bp->b_transp == tp); |
312 | ASSERT(bp->b_fspriv != NULL); | 269 | ASSERT(bp->b_fspriv != NULL); |
313 | ASSERT(!bp->b_error); | 270 | ASSERT(!bp->b_error); |
314 | if (!(XFS_BUF_ISDONE(bp))) { | 271 | ASSERT(bp->b_flags & XBF_DONE); |
315 | trace_xfs_trans_read_buf_io(bp, _RET_IP_); | 272 | |
316 | ASSERT(!XFS_BUF_ISASYNC(bp)); | ||
317 | ASSERT(bp->b_iodone == NULL); | ||
318 | XFS_BUF_READ(bp); | ||
319 | bp->b_ops = ops; | ||
320 | |||
321 | error = xfs_buf_submit_wait(bp); | ||
322 | if (error) { | ||
323 | if (!XFS_FORCED_SHUTDOWN(mp)) | ||
324 | xfs_buf_ioerror_alert(bp, __func__); | ||
325 | xfs_buf_relse(bp); | ||
326 | /* | ||
327 | * We can gracefully recover from most read | ||
328 | * errors. Ones we can't are those that happen | ||
329 | * after the transaction's already dirty. | ||
330 | */ | ||
331 | if (tp->t_flags & XFS_TRANS_DIRTY) | ||
332 | xfs_force_shutdown(tp->t_mountp, | ||
333 | SHUTDOWN_META_IO_ERROR); | ||
334 | /* bad CRC means corrupted metadata */ | ||
335 | if (error == -EFSBADCRC) | ||
336 | error = -EFSCORRUPTED; | ||
337 | return error; | ||
338 | } | ||
339 | } | ||
340 | /* | 273 | /* |
341 | * We never locked this buf ourselves, so we shouldn't | 274 | * We never locked this buf ourselves, so we shouldn't |
342 | * brelse it either. Just get out. | 275 | * brelse it either. Just get out. |
343 | */ | 276 | */ |
344 | if (XFS_FORCED_SHUTDOWN(mp)) { | 277 | if (XFS_FORCED_SHUTDOWN(mp)) { |
345 | trace_xfs_trans_read_buf_shut(bp, _RET_IP_); | 278 | trace_xfs_trans_read_buf_shut(bp, _RET_IP_); |
346 | *bpp = NULL; | ||
347 | return -EIO; | 279 | return -EIO; |
348 | } | 280 | } |
349 | 281 | ||
350 | |||
351 | bip = bp->b_fspriv; | 282 | bip = bp->b_fspriv; |
352 | bip->bli_recur++; | 283 | bip->bli_recur++; |
353 | 284 | ||
@@ -358,17 +289,29 @@ xfs_trans_read_buf_map( | |||
358 | } | 289 | } |
359 | 290 | ||
360 | bp = xfs_buf_read_map(target, map, nmaps, flags, ops); | 291 | bp = xfs_buf_read_map(target, map, nmaps, flags, ops); |
361 | if (bp == NULL) { | 292 | if (!bp) { |
362 | *bpp = NULL; | 293 | if (!(flags & XBF_TRYLOCK)) |
363 | return (flags & XBF_TRYLOCK) ? | 294 | return -ENOMEM; |
364 | 0 : -ENOMEM; | 295 | return tp ? 0 : -EAGAIN; |
365 | } | 296 | } |
297 | |||
298 | /* | ||
299 | * If we've had a read error, then the contents of the buffer are | ||
300 | * invalid and should not be used. To ensure that a followup read tries | ||
301 | * to pull the buffer from disk again, we clear the XBF_DONE flag and | ||
302 | * mark the buffer stale. This ensures that anyone who has a current | ||
303 | * reference to the buffer will interpret it's contents correctly and | ||
304 | * future cache lookups will also treat it as an empty, uninitialised | ||
305 | * buffer. | ||
306 | */ | ||
366 | if (bp->b_error) { | 307 | if (bp->b_error) { |
367 | error = bp->b_error; | 308 | error = bp->b_error; |
309 | if (!XFS_FORCED_SHUTDOWN(mp)) | ||
310 | xfs_buf_ioerror_alert(bp, __func__); | ||
311 | bp->b_flags &= ~XBF_DONE; | ||
368 | xfs_buf_stale(bp); | 312 | xfs_buf_stale(bp); |
369 | XFS_BUF_DONE(bp); | 313 | |
370 | xfs_buf_ioerror_alert(bp, __func__); | 314 | if (tp && (tp->t_flags & XFS_TRANS_DIRTY)) |
371 | if (tp->t_flags & XFS_TRANS_DIRTY) | ||
372 | xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR); | 315 | xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR); |
373 | xfs_buf_relse(bp); | 316 | xfs_buf_relse(bp); |
374 | 317 | ||
@@ -377,33 +320,19 @@ xfs_trans_read_buf_map( | |||
377 | error = -EFSCORRUPTED; | 320 | error = -EFSCORRUPTED; |
378 | return error; | 321 | return error; |
379 | } | 322 | } |
380 | #ifdef DEBUG | 323 | |
381 | if (xfs_do_error && !(tp->t_flags & XFS_TRANS_DIRTY)) { | 324 | if (XFS_FORCED_SHUTDOWN(mp)) { |
382 | if (xfs_error_target == target) { | 325 | xfs_buf_relse(bp); |
383 | if (((xfs_req_num++) % xfs_error_mod) == 0) { | 326 | trace_xfs_trans_read_buf_shut(bp, _RET_IP_); |
384 | xfs_force_shutdown(tp->t_mountp, | 327 | return -EIO; |
385 | SHUTDOWN_META_IO_ERROR); | ||
386 | xfs_buf_relse(bp); | ||
387 | xfs_debug(mp, "Returning trans error!"); | ||
388 | return -EIO; | ||
389 | } | ||
390 | } | ||
391 | } | 328 | } |
392 | #endif | ||
393 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
394 | goto shutdown_abort; | ||
395 | 329 | ||
396 | _xfs_trans_bjoin(tp, bp, 1); | 330 | if (tp) |
331 | _xfs_trans_bjoin(tp, bp, 1); | ||
397 | trace_xfs_trans_read_buf(bp->b_fspriv); | 332 | trace_xfs_trans_read_buf(bp->b_fspriv); |
398 | |||
399 | *bpp = bp; | 333 | *bpp = bp; |
400 | return 0; | 334 | return 0; |
401 | 335 | ||
402 | shutdown_abort: | ||
403 | trace_xfs_trans_read_buf_shut(bp, _RET_IP_); | ||
404 | xfs_buf_relse(bp); | ||
405 | *bpp = NULL; | ||
406 | return -EIO; | ||
407 | } | 336 | } |
408 | 337 | ||
409 | /* | 338 | /* |
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 846e061c2e98..76a16df55ef7 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c | |||
@@ -21,8 +21,6 @@ | |||
21 | #include "xfs_format.h" | 21 | #include "xfs_format.h" |
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
27 | #include "xfs_inode.h" | 25 | #include "xfs_inode.h" |
28 | #include "xfs_error.h" | 26 | #include "xfs_error.h" |
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c index 47978ba89dae..284397dd7990 100644 --- a/fs/xfs/xfs_trans_extfree.c +++ b/fs/xfs/xfs_trans_extfree.c | |||
@@ -18,10 +18,9 @@ | |||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_shared.h" | 20 | #include "xfs_shared.h" |
21 | #include "xfs_format.h" | ||
21 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
22 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
23 | #include "xfs_sb.h" | ||
24 | #include "xfs_ag.h" | ||
25 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
26 | #include "xfs_trans.h" | 25 | #include "xfs_trans.h" |
27 | #include "xfs_trans_priv.h" | 26 | #include "xfs_trans_priv.h" |
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index cdb4d86520e1..17280cd71934 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c | |||
@@ -21,8 +21,6 @@ | |||
21 | #include "xfs_format.h" | 21 | #include "xfs_format.h" |
22 | #include "xfs_log_format.h" | 22 | #include "xfs_log_format.h" |
23 | #include "xfs_trans_resv.h" | 23 | #include "xfs_trans_resv.h" |
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | 24 | #include "xfs_mount.h" |
27 | #include "xfs_inode.h" | 25 | #include "xfs_inode.h" |
28 | #include "xfs_trans.h" | 26 | #include "xfs_trans.h" |
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 93455b998041..69f6e475de97 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c | |||
@@ -20,8 +20,6 @@ | |||
20 | #include "xfs_format.h" | 20 | #include "xfs_format.h" |
21 | #include "xfs_log_format.h" | 21 | #include "xfs_log_format.h" |
22 | #include "xfs_trans_resv.h" | 22 | #include "xfs_trans_resv.h" |
23 | #include "xfs_sb.h" | ||
24 | #include "xfs_ag.h" | ||
25 | #include "xfs_mount.h" | 23 | #include "xfs_mount.h" |
26 | #include "xfs_da_format.h" | 24 | #include "xfs_da_format.h" |
27 | #include "xfs_inode.h" | 25 | #include "xfs_inode.h" |