diff options
30 files changed, 696 insertions, 202 deletions
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt index 1c1844957166..728c38c242d6 100644 --- a/Documentation/device-mapper/dm-raid.txt +++ b/Documentation/device-mapper/dm-raid.txt | |||
| @@ -132,3 +132,12 @@ Here we can see the RAID type is raid4, there are 5 devices - all of | |||
| 132 | which are 'A'live, and the array is 2/490221568 complete with recovery. | 132 | which are 'A'live, and the array is 2/490221568 complete with recovery. |
| 133 | Faulty or missing devices are marked 'D'. Devices that are out-of-sync | 133 | Faulty or missing devices are marked 'D'. Devices that are out-of-sync |
| 134 | are marked 'a'. | 134 | are marked 'a'. |
| 135 | |||
| 136 | |||
| 137 | Version History | ||
| 138 | --------------- | ||
| 139 | 1.0.0 Initial version. Support for RAID 4/5/6 | ||
| 140 | 1.1.0 Added support for RAID 1 | ||
| 141 | 1.2.0 Handle creation of arrays that contain failed devices. | ||
| 142 | 1.3.0 Added support for RAID 10 | ||
| 143 | 1.3.1 Allow device replacement/rebuild for RAID 10 | ||
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 7140b6b26441..78de6805268d 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild | |||
| @@ -1,6 +1,4 @@ | |||
| 1 | include include/asm-generic/Kbuild.asm | ||
| 2 | 1 | ||
| 3 | header-y += elf.h | ||
| 4 | header-y += ucontext.h | 2 | header-y += ucontext.h |
| 5 | 3 | ||
| 6 | generic-y += atomic.h | 4 | generic-y += atomic.h |
diff --git a/arch/openrisc/include/asm/elf.h b/arch/openrisc/include/asm/elf.h index 225a7ff320ad..f4aa8a542a22 100644 --- a/arch/openrisc/include/asm/elf.h +++ b/arch/openrisc/include/asm/elf.h | |||
| @@ -15,60 +15,12 @@ | |||
| 15 | * the Free Software Foundation; either version 2 of the License, or | 15 | * the Free Software Foundation; either version 2 of the License, or |
| 16 | * (at your option) any later version. | 16 | * (at your option) any later version. |
| 17 | */ | 17 | */ |
| 18 | |||
| 19 | #ifndef __ASM_OPENRISC_ELF_H | 18 | #ifndef __ASM_OPENRISC_ELF_H |
| 20 | #define __ASM_OPENRISC_ELF_H | 19 | #define __ASM_OPENRISC_ELF_H |
| 21 | 20 | ||
| 22 | /* | ||
| 23 | * This files is partially exported to userspace. This allows us to keep | ||
| 24 | * the ELF bits in one place which should assist in keeping the kernel and | ||
| 25 | * userspace in sync. | ||
| 26 | */ | ||
| 27 | |||
| 28 | /* | ||
| 29 | * ELF register definitions.. | ||
| 30 | */ | ||
| 31 | |||
| 32 | /* for struct user_regs_struct definition */ | ||
| 33 | #include <asm/ptrace.h> | ||
| 34 | |||
| 35 | /* The OR1K relocation types... not all relevant for module loader */ | ||
| 36 | #define R_OR32_NONE 0 | ||
| 37 | #define R_OR32_32 1 | ||
| 38 | #define R_OR32_16 2 | ||
| 39 | #define R_OR32_8 3 | ||
| 40 | #define R_OR32_CONST 4 | ||
| 41 | #define R_OR32_CONSTH 5 | ||
| 42 | #define R_OR32_JUMPTARG 6 | ||
| 43 | #define R_OR32_VTINHERIT 7 | ||
| 44 | #define R_OR32_VTENTRY 8 | ||
| 45 | |||
| 46 | typedef unsigned long elf_greg_t; | ||
| 47 | |||
| 48 | /* | ||
| 49 | * Note that NGREG is defined to ELF_NGREG in include/linux/elfcore.h, and is | ||
| 50 | * thus exposed to user-space. | ||
| 51 | */ | ||
| 52 | #define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t)) | ||
| 53 | typedef elf_greg_t elf_gregset_t[ELF_NGREG]; | ||
| 54 | |||
| 55 | /* A placeholder; OR32 does not have fp support yes, so no fp regs for now. */ | ||
| 56 | typedef unsigned long elf_fpregset_t; | ||
| 57 | |||
| 58 | /* This should be moved to include/linux/elf.h */ | ||
| 59 | #define EM_OR32 0x8472 | ||
| 60 | #define EM_OPENRISC 92 /* OpenRISC 32-bit embedded processor */ | ||
| 61 | |||
| 62 | /* | ||
| 63 | * These are used to set parameters in the core dumps. | ||
| 64 | */ | ||
| 65 | #define ELF_ARCH EM_OR32 | ||
| 66 | #define ELF_CLASS ELFCLASS32 | ||
| 67 | #define ELF_DATA ELFDATA2MSB | ||
| 68 | |||
| 69 | #ifdef __KERNEL__ | ||
| 70 | 21 | ||
| 71 | #include <linux/types.h> | 22 | #include <linux/types.h> |
| 23 | #include <uapi/asm/elf.h> | ||
| 72 | 24 | ||
| 73 | /* | 25 | /* |
| 74 | * This is used to ensure we don't load something for the wrong architecture. | 26 | * This is used to ensure we don't load something for the wrong architecture. |
| @@ -113,5 +65,4 @@ extern void dump_elf_thread(elf_greg_t *dest, struct pt_regs *pt); | |||
| 113 | #define SET_PERSONALITY(ex) \ | 65 | #define SET_PERSONALITY(ex) \ |
| 114 | set_personality(PER_LINUX | (current->personality & (~PER_MASK))) | 66 | set_personality(PER_LINUX | (current->personality & (~PER_MASK))) |
| 115 | 67 | ||
| 116 | #endif /* __KERNEL__ */ | ||
| 117 | #endif | 68 | #endif |
diff --git a/arch/openrisc/include/asm/ptrace.h b/arch/openrisc/include/asm/ptrace.h index 8555c0c3d4d7..6ca17264c399 100644 --- a/arch/openrisc/include/asm/ptrace.h +++ b/arch/openrisc/include/asm/ptrace.h | |||
| @@ -15,25 +15,12 @@ | |||
| 15 | * the Free Software Foundation; either version 2 of the License, or | 15 | * the Free Software Foundation; either version 2 of the License, or |
| 16 | * (at your option) any later version. | 16 | * (at your option) any later version. |
| 17 | */ | 17 | */ |
| 18 | |||
| 19 | #ifndef __ASM_OPENRISC_PTRACE_H | 18 | #ifndef __ASM_OPENRISC_PTRACE_H |
| 20 | #define __ASM_OPENRISC_PTRACE_H | 19 | #define __ASM_OPENRISC_PTRACE_H |
| 21 | 20 | ||
| 22 | #ifndef __ASSEMBLY__ | ||
| 23 | /* | ||
| 24 | * This is the layout of the regset returned by the GETREGSET ptrace call | ||
| 25 | */ | ||
| 26 | struct user_regs_struct { | ||
| 27 | /* GPR R0-R31... */ | ||
| 28 | unsigned long gpr[32]; | ||
| 29 | unsigned long pc; | ||
| 30 | unsigned long sr; | ||
| 31 | }; | ||
| 32 | #endif | ||
| 33 | |||
| 34 | #ifdef __KERNEL__ | ||
| 35 | 21 | ||
| 36 | #include <asm/spr_defs.h> | 22 | #include <asm/spr_defs.h> |
| 23 | #include <uapi/asm/ptrace.h> | ||
| 37 | 24 | ||
| 38 | /* | 25 | /* |
| 39 | * Make kernel PTrace/register structures opaque to userspace... userspace can | 26 | * Make kernel PTrace/register structures opaque to userspace... userspace can |
| @@ -134,6 +121,4 @@ static inline long regs_return_value(struct pt_regs *regs) | |||
| 134 | #define PT_ORIG_GPR11 132 | 121 | #define PT_ORIG_GPR11 132 |
| 135 | #define PT_SYSCALLNO 136 | 122 | #define PT_SYSCALLNO 136 |
| 136 | 123 | ||
| 137 | #endif /* __KERNEL__ */ | ||
| 138 | |||
| 139 | #endif /* __ASM_OPENRISC_PTRACE_H */ | 124 | #endif /* __ASM_OPENRISC_PTRACE_H */ |
diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild index baebb3da1d44..80761eb82b5f 100644 --- a/arch/openrisc/include/uapi/asm/Kbuild +++ b/arch/openrisc/include/uapi/asm/Kbuild | |||
| @@ -1,3 +1,10 @@ | |||
| 1 | # UAPI Header export list | 1 | # UAPI Header export list |
| 2 | include include/uapi/asm-generic/Kbuild.asm | 2 | include include/uapi/asm-generic/Kbuild.asm |
| 3 | 3 | ||
| 4 | header-y += byteorder.h | ||
| 5 | header-y += elf.h | ||
| 6 | header-y += kvm_para.h | ||
| 7 | header-y += param.h | ||
| 8 | header-y += ptrace.h | ||
| 9 | header-y += sigcontext.h | ||
| 10 | header-y += unistd.h | ||
diff --git a/arch/openrisc/include/asm/byteorder.h b/arch/openrisc/include/uapi/asm/byteorder.h index 60d14f7e14e2..60d14f7e14e2 100644 --- a/arch/openrisc/include/asm/byteorder.h +++ b/arch/openrisc/include/uapi/asm/byteorder.h | |||
diff --git a/arch/openrisc/include/uapi/asm/elf.h b/arch/openrisc/include/uapi/asm/elf.h new file mode 100644 index 000000000000..f02ea5830420 --- /dev/null +++ b/arch/openrisc/include/uapi/asm/elf.h | |||
| @@ -0,0 +1,69 @@ | |||
| 1 | /* | ||
| 2 | * OpenRISC Linux | ||
| 3 | * | ||
| 4 | * Linux architectural port borrowing liberally from similar works of | ||
| 5 | * others. All original copyrights apply as per the original source | ||
| 6 | * declaration. | ||
| 7 | * | ||
| 8 | * OpenRISC implementation: | ||
| 9 | * Copyright (C) 2003 Matjaz Breskvar <phoenix@bsemi.com> | ||
| 10 | * Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se> | ||
| 11 | * et al. | ||
| 12 | * | ||
| 13 | * This program is free software; you can redistribute it and/or modify | ||
| 14 | * it under the terms of the GNU General Public License as published by | ||
| 15 | * the Free Software Foundation; either version 2 of the License, or | ||
| 16 | * (at your option) any later version. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef _UAPI__ASM_OPENRISC_ELF_H | ||
| 20 | #define _UAPI__ASM_OPENRISC_ELF_H | ||
| 21 | |||
| 22 | /* | ||
| 23 | * This files is partially exported to userspace. This allows us to keep | ||
| 24 | * the ELF bits in one place which should assist in keeping the kernel and | ||
| 25 | * userspace in sync. | ||
| 26 | */ | ||
| 27 | |||
| 28 | /* | ||
| 29 | * ELF register definitions.. | ||
| 30 | */ | ||
| 31 | |||
| 32 | /* for struct user_regs_struct definition */ | ||
| 33 | #include <asm/ptrace.h> | ||
| 34 | |||
| 35 | /* The OR1K relocation types... not all relevant for module loader */ | ||
| 36 | #define R_OR32_NONE 0 | ||
| 37 | #define R_OR32_32 1 | ||
| 38 | #define R_OR32_16 2 | ||
| 39 | #define R_OR32_8 3 | ||
| 40 | #define R_OR32_CONST 4 | ||
| 41 | #define R_OR32_CONSTH 5 | ||
| 42 | #define R_OR32_JUMPTARG 6 | ||
| 43 | #define R_OR32_VTINHERIT 7 | ||
| 44 | #define R_OR32_VTENTRY 8 | ||
| 45 | |||
| 46 | typedef unsigned long elf_greg_t; | ||
| 47 | |||
| 48 | /* | ||
| 49 | * Note that NGREG is defined to ELF_NGREG in include/linux/elfcore.h, and is | ||
| 50 | * thus exposed to user-space. | ||
| 51 | */ | ||
| 52 | #define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t)) | ||
| 53 | typedef elf_greg_t elf_gregset_t[ELF_NGREG]; | ||
| 54 | |||
| 55 | /* A placeholder; OR32 does not have fp support yes, so no fp regs for now. */ | ||
| 56 | typedef unsigned long elf_fpregset_t; | ||
| 57 | |||
| 58 | /* This should be moved to include/linux/elf.h */ | ||
| 59 | #define EM_OR32 0x8472 | ||
| 60 | #define EM_OPENRISC 92 /* OpenRISC 32-bit embedded processor */ | ||
| 61 | |||
| 62 | /* | ||
| 63 | * These are used to set parameters in the core dumps. | ||
| 64 | */ | ||
| 65 | #define ELF_ARCH EM_OR32 | ||
| 66 | #define ELF_CLASS ELFCLASS32 | ||
| 67 | #define ELF_DATA ELFDATA2MSB | ||
| 68 | |||
| 69 | #endif /* _UAPI__ASM_OPENRISC_ELF_H */ | ||
diff --git a/arch/openrisc/include/asm/kvm_para.h b/arch/openrisc/include/uapi/asm/kvm_para.h index 14fab8f0b957..14fab8f0b957 100644 --- a/arch/openrisc/include/asm/kvm_para.h +++ b/arch/openrisc/include/uapi/asm/kvm_para.h | |||
diff --git a/arch/openrisc/include/asm/param.h b/arch/openrisc/include/uapi/asm/param.h index c39a336610e2..c39a336610e2 100644 --- a/arch/openrisc/include/asm/param.h +++ b/arch/openrisc/include/uapi/asm/param.h | |||
diff --git a/arch/openrisc/include/uapi/asm/ptrace.h b/arch/openrisc/include/uapi/asm/ptrace.h new file mode 100644 index 000000000000..9760bd17fbcc --- /dev/null +++ b/arch/openrisc/include/uapi/asm/ptrace.h | |||
| @@ -0,0 +1,35 @@ | |||
| 1 | /* | ||
| 2 | * OpenRISC Linux | ||
| 3 | * | ||
| 4 | * Linux architectural port borrowing liberally from similar works of | ||
| 5 | * others. All original copyrights apply as per the original source | ||
| 6 | * declaration. | ||
| 7 | * | ||
| 8 | * OpenRISC implementation: | ||
| 9 | * Copyright (C) 2003 Matjaz Breskvar <phoenix@bsemi.com> | ||
| 10 | * Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se> | ||
| 11 | * et al. | ||
| 12 | * | ||
| 13 | * This program is free software; you can redistribute it and/or modify | ||
| 14 | * it under the terms of the GNU General Public License as published by | ||
| 15 | * the Free Software Foundation; either version 2 of the License, or | ||
| 16 | * (at your option) any later version. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef _UAPI__ASM_OPENRISC_PTRACE_H | ||
| 20 | #define _UAPI__ASM_OPENRISC_PTRACE_H | ||
| 21 | |||
| 22 | #ifndef __ASSEMBLY__ | ||
| 23 | /* | ||
| 24 | * This is the layout of the regset returned by the GETREGSET ptrace call | ||
| 25 | */ | ||
| 26 | struct user_regs_struct { | ||
| 27 | /* GPR R0-R31... */ | ||
| 28 | unsigned long gpr[32]; | ||
| 29 | unsigned long pc; | ||
| 30 | unsigned long sr; | ||
| 31 | }; | ||
| 32 | #endif | ||
| 33 | |||
| 34 | |||
| 35 | #endif /* _UAPI__ASM_OPENRISC_PTRACE_H */ | ||
diff --git a/arch/openrisc/include/asm/sigcontext.h b/arch/openrisc/include/uapi/asm/sigcontext.h index b79c2b19afbe..b79c2b19afbe 100644 --- a/arch/openrisc/include/asm/sigcontext.h +++ b/arch/openrisc/include/uapi/asm/sigcontext.h | |||
diff --git a/arch/openrisc/include/asm/unistd.h b/arch/openrisc/include/uapi/asm/unistd.h index 437bdbb61b14..437bdbb61b14 100644 --- a/arch/openrisc/include/asm/unistd.h +++ b/arch/openrisc/include/uapi/asm/unistd.h | |||
diff --git a/crypto/xor.c b/crypto/xor.c index 65c7b416b4a3..35d6b3adf230 100644 --- a/crypto/xor.c +++ b/crypto/xor.c | |||
| @@ -56,11 +56,11 @@ xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs) | |||
| 56 | EXPORT_SYMBOL(xor_blocks); | 56 | EXPORT_SYMBOL(xor_blocks); |
| 57 | 57 | ||
| 58 | /* Set of all registered templates. */ | 58 | /* Set of all registered templates. */ |
| 59 | static struct xor_block_template *template_list; | 59 | static struct xor_block_template *__initdata template_list; |
| 60 | 60 | ||
| 61 | #define BENCH_SIZE (PAGE_SIZE) | 61 | #define BENCH_SIZE (PAGE_SIZE) |
| 62 | 62 | ||
| 63 | static void | 63 | static void __init |
| 64 | do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) | 64 | do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) |
| 65 | { | 65 | { |
| 66 | int speed; | 66 | int speed; |
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 94e7f6ba2e11..7155945f8eb8 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
| @@ -163,20 +163,17 @@ static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mdde | |||
| 163 | * As devices are only added or removed when raid_disk is < 0 and | 163 | * As devices are only added or removed when raid_disk is < 0 and |
| 164 | * nr_pending is 0 and In_sync is clear, the entries we return will | 164 | * nr_pending is 0 and In_sync is clear, the entries we return will |
| 165 | * still be in the same position on the list when we re-enter | 165 | * still be in the same position on the list when we re-enter |
| 166 | * list_for_each_continue_rcu. | 166 | * list_for_each_entry_continue_rcu. |
| 167 | */ | 167 | */ |
| 168 | struct list_head *pos; | ||
| 169 | rcu_read_lock(); | 168 | rcu_read_lock(); |
| 170 | if (rdev == NULL) | 169 | if (rdev == NULL) |
| 171 | /* start at the beginning */ | 170 | /* start at the beginning */ |
| 172 | pos = &mddev->disks; | 171 | rdev = list_entry_rcu(&mddev->disks, struct md_rdev, same_set); |
| 173 | else { | 172 | else { |
| 174 | /* release the previous rdev and start from there. */ | 173 | /* release the previous rdev and start from there. */ |
| 175 | rdev_dec_pending(rdev, mddev); | 174 | rdev_dec_pending(rdev, mddev); |
| 176 | pos = &rdev->same_set; | ||
| 177 | } | 175 | } |
| 178 | list_for_each_continue_rcu(pos, &mddev->disks) { | 176 | list_for_each_entry_continue_rcu(rdev, &mddev->disks, same_set) { |
| 179 | rdev = list_entry(pos, struct md_rdev, same_set); | ||
| 180 | if (rdev->raid_disk >= 0 && | 177 | if (rdev->raid_disk >= 0 && |
| 181 | !test_bit(Faulty, &rdev->flags)) { | 178 | !test_bit(Faulty, &rdev->flags)) { |
| 182 | /* this is a usable devices */ | 179 | /* this is a usable devices */ |
| @@ -473,14 +470,10 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap) | |||
| 473 | { | 470 | { |
| 474 | bitmap_super_t *sb; | 471 | bitmap_super_t *sb; |
| 475 | unsigned long chunksize, daemon_sleep, write_behind; | 472 | unsigned long chunksize, daemon_sleep, write_behind; |
| 476 | int err = -EINVAL; | ||
| 477 | 473 | ||
| 478 | bitmap->storage.sb_page = alloc_page(GFP_KERNEL); | 474 | bitmap->storage.sb_page = alloc_page(GFP_KERNEL); |
| 479 | if (IS_ERR(bitmap->storage.sb_page)) { | 475 | if (bitmap->storage.sb_page == NULL) |
| 480 | err = PTR_ERR(bitmap->storage.sb_page); | 476 | return -ENOMEM; |
| 481 | bitmap->storage.sb_page = NULL; | ||
| 482 | return err; | ||
| 483 | } | ||
| 484 | bitmap->storage.sb_page->index = 0; | 477 | bitmap->storage.sb_page->index = 0; |
| 485 | 478 | ||
| 486 | sb = kmap_atomic(bitmap->storage.sb_page); | 479 | sb = kmap_atomic(bitmap->storage.sb_page); |
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 982e3e390c45..45d94a7e7f6d 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c | |||
| @@ -338,6 +338,84 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) | |||
| 338 | } | 338 | } |
| 339 | 339 | ||
| 340 | /* | 340 | /* |
| 341 | * validate_rebuild_devices | ||
| 342 | * @rs | ||
| 343 | * | ||
| 344 | * Determine if the devices specified for rebuild can result in a valid | ||
| 345 | * usable array that is capable of rebuilding the given devices. | ||
| 346 | * | ||
| 347 | * Returns: 0 on success, -EINVAL on failure. | ||
| 348 | */ | ||
| 349 | static int validate_rebuild_devices(struct raid_set *rs) | ||
| 350 | { | ||
| 351 | unsigned i, rebuild_cnt = 0; | ||
| 352 | unsigned rebuilds_per_group, copies, d; | ||
| 353 | |||
| 354 | if (!(rs->print_flags & DMPF_REBUILD)) | ||
| 355 | return 0; | ||
| 356 | |||
| 357 | for (i = 0; i < rs->md.raid_disks; i++) | ||
| 358 | if (!test_bit(In_sync, &rs->dev[i].rdev.flags)) | ||
| 359 | rebuild_cnt++; | ||
| 360 | |||
| 361 | switch (rs->raid_type->level) { | ||
| 362 | case 1: | ||
| 363 | if (rebuild_cnt >= rs->md.raid_disks) | ||
| 364 | goto too_many; | ||
| 365 | break; | ||
| 366 | case 4: | ||
| 367 | case 5: | ||
| 368 | case 6: | ||
| 369 | if (rebuild_cnt > rs->raid_type->parity_devs) | ||
| 370 | goto too_many; | ||
| 371 | break; | ||
| 372 | case 10: | ||
| 373 | copies = raid10_md_layout_to_copies(rs->md.layout); | ||
| 374 | if (rebuild_cnt < copies) | ||
| 375 | break; | ||
| 376 | |||
| 377 | /* | ||
| 378 | * It is possible to have a higher rebuild count for RAID10, | ||
| 379 | * as long as the failed devices occur in different mirror | ||
| 380 | * groups (i.e. different stripes). | ||
| 381 | * | ||
| 382 | * Right now, we only allow for "near" copies. When other | ||
| 383 | * formats are added, we will have to check those too. | ||
| 384 | * | ||
| 385 | * When checking "near" format, make sure no adjacent devices | ||
| 386 | * have failed beyond what can be handled. In addition to the | ||
| 387 | * simple case where the number of devices is a multiple of the | ||
| 388 | * number of copies, we must also handle cases where the number | ||
| 389 | * of devices is not a multiple of the number of copies. | ||
| 390 | * E.g. dev1 dev2 dev3 dev4 dev5 | ||
| 391 | * A A B B C | ||
| 392 | * C D D E E | ||
| 393 | */ | ||
| 394 | rebuilds_per_group = 0; | ||
| 395 | for (i = 0; i < rs->md.raid_disks * copies; i++) { | ||
| 396 | d = i % rs->md.raid_disks; | ||
| 397 | if (!test_bit(In_sync, &rs->dev[d].rdev.flags) && | ||
| 398 | (++rebuilds_per_group >= copies)) | ||
| 399 | goto too_many; | ||
| 400 | if (!((i + 1) % copies)) | ||
| 401 | rebuilds_per_group = 0; | ||
| 402 | } | ||
| 403 | break; | ||
| 404 | default: | ||
| 405 | DMERR("The rebuild parameter is not supported for %s", | ||
| 406 | rs->raid_type->name); | ||
| 407 | rs->ti->error = "Rebuild not supported for this RAID type"; | ||
| 408 | return -EINVAL; | ||
| 409 | } | ||
| 410 | |||
| 411 | return 0; | ||
| 412 | |||
| 413 | too_many: | ||
| 414 | rs->ti->error = "Too many rebuild devices specified"; | ||
| 415 | return -EINVAL; | ||
| 416 | } | ||
| 417 | |||
| 418 | /* | ||
| 341 | * Possible arguments are... | 419 | * Possible arguments are... |
| 342 | * <chunk_size> [optional_args] | 420 | * <chunk_size> [optional_args] |
| 343 | * | 421 | * |
| @@ -365,7 +443,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
| 365 | { | 443 | { |
| 366 | char *raid10_format = "near"; | 444 | char *raid10_format = "near"; |
| 367 | unsigned raid10_copies = 2; | 445 | unsigned raid10_copies = 2; |
| 368 | unsigned i, rebuild_cnt = 0; | 446 | unsigned i; |
| 369 | unsigned long value, region_size = 0; | 447 | unsigned long value, region_size = 0; |
| 370 | sector_t sectors_per_dev = rs->ti->len; | 448 | sector_t sectors_per_dev = rs->ti->len; |
| 371 | sector_t max_io_len; | 449 | sector_t max_io_len; |
| @@ -461,31 +539,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
| 461 | 539 | ||
| 462 | /* Parameters that take a numeric value are checked here */ | 540 | /* Parameters that take a numeric value are checked here */ |
| 463 | if (!strcasecmp(key, "rebuild")) { | 541 | if (!strcasecmp(key, "rebuild")) { |
| 464 | rebuild_cnt++; | 542 | if (value >= rs->md.raid_disks) { |
| 465 | |||
| 466 | switch (rs->raid_type->level) { | ||
| 467 | case 1: | ||
| 468 | if (rebuild_cnt >= rs->md.raid_disks) { | ||
| 469 | rs->ti->error = "Too many rebuild devices specified"; | ||
| 470 | return -EINVAL; | ||
| 471 | } | ||
| 472 | break; | ||
| 473 | case 4: | ||
| 474 | case 5: | ||
| 475 | case 6: | ||
| 476 | if (rebuild_cnt > rs->raid_type->parity_devs) { | ||
| 477 | rs->ti->error = "Too many rebuild devices specified for given RAID type"; | ||
| 478 | return -EINVAL; | ||
| 479 | } | ||
| 480 | break; | ||
| 481 | case 10: | ||
| 482 | default: | ||
| 483 | DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name); | ||
| 484 | rs->ti->error = "Rebuild not supported for this RAID type"; | ||
| 485 | return -EINVAL; | ||
| 486 | } | ||
| 487 | |||
| 488 | if (value > rs->md.raid_disks) { | ||
| 489 | rs->ti->error = "Invalid rebuild index given"; | 543 | rs->ti->error = "Invalid rebuild index given"; |
| 490 | return -EINVAL; | 544 | return -EINVAL; |
| 491 | } | 545 | } |
| @@ -608,6 +662,9 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
| 608 | } | 662 | } |
| 609 | rs->md.dev_sectors = sectors_per_dev; | 663 | rs->md.dev_sectors = sectors_per_dev; |
| 610 | 664 | ||
| 665 | if (validate_rebuild_devices(rs)) | ||
| 666 | return -EINVAL; | ||
| 667 | |||
| 611 | /* Assume there are no metadata devices until the drives are parsed */ | 668 | /* Assume there are no metadata devices until the drives are parsed */ |
| 612 | rs->md.persistent = 0; | 669 | rs->md.persistent = 0; |
| 613 | rs->md.external = 1; | 670 | rs->md.external = 1; |
| @@ -960,6 +1017,19 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) | |||
| 960 | 1017 | ||
| 961 | freshest = NULL; | 1018 | freshest = NULL; |
| 962 | rdev_for_each_safe(rdev, tmp, mddev) { | 1019 | rdev_for_each_safe(rdev, tmp, mddev) { |
| 1020 | /* | ||
| 1021 | * Skipping super_load due to DMPF_SYNC will cause | ||
| 1022 | * the array to undergo initialization again as | ||
| 1023 | * though it were new. This is the intended effect | ||
| 1024 | * of the "sync" directive. | ||
| 1025 | * | ||
| 1026 | * When reshaping capability is added, we must ensure | ||
| 1027 | * that the "sync" directive is disallowed during the | ||
| 1028 | * reshape. | ||
| 1029 | */ | ||
| 1030 | if (rs->print_flags & DMPF_SYNC) | ||
| 1031 | continue; | ||
| 1032 | |||
| 963 | if (!rdev->meta_bdev) | 1033 | if (!rdev->meta_bdev) |
| 964 | continue; | 1034 | continue; |
| 965 | 1035 | ||
| @@ -1360,7 +1430,7 @@ static void raid_resume(struct dm_target *ti) | |||
| 1360 | 1430 | ||
| 1361 | static struct target_type raid_target = { | 1431 | static struct target_type raid_target = { |
| 1362 | .name = "raid", | 1432 | .name = "raid", |
| 1363 | .version = {1, 3, 0}, | 1433 | .version = {1, 3, 1}, |
| 1364 | .module = THIS_MODULE, | 1434 | .module = THIS_MODULE, |
| 1365 | .ctr = raid_ctr, | 1435 | .ctr = raid_ctr, |
| 1366 | .dtr = raid_dtr, | 1436 | .dtr = raid_dtr, |
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index fa211d80fc0a..21014836bdbf 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c | |||
| @@ -138,6 +138,7 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) | |||
| 138 | struct linear_conf *conf; | 138 | struct linear_conf *conf; |
| 139 | struct md_rdev *rdev; | 139 | struct md_rdev *rdev; |
| 140 | int i, cnt; | 140 | int i, cnt; |
| 141 | bool discard_supported = false; | ||
| 141 | 142 | ||
| 142 | conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(struct dev_info), | 143 | conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(struct dev_info), |
| 143 | GFP_KERNEL); | 144 | GFP_KERNEL); |
| @@ -171,6 +172,8 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) | |||
| 171 | conf->array_sectors += rdev->sectors; | 172 | conf->array_sectors += rdev->sectors; |
| 172 | cnt++; | 173 | cnt++; |
| 173 | 174 | ||
| 175 | if (blk_queue_discard(bdev_get_queue(rdev->bdev))) | ||
| 176 | discard_supported = true; | ||
| 174 | } | 177 | } |
| 175 | if (cnt != raid_disks) { | 178 | if (cnt != raid_disks) { |
| 176 | printk(KERN_ERR "md/linear:%s: not enough drives present. Aborting!\n", | 179 | printk(KERN_ERR "md/linear:%s: not enough drives present. Aborting!\n", |
| @@ -178,6 +181,11 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) | |||
| 178 | goto out; | 181 | goto out; |
| 179 | } | 182 | } |
| 180 | 183 | ||
| 184 | if (!discard_supported) | ||
| 185 | queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); | ||
| 186 | else | ||
| 187 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); | ||
| 188 | |||
| 181 | /* | 189 | /* |
| 182 | * Here we calculate the device offsets. | 190 | * Here we calculate the device offsets. |
| 183 | */ | 191 | */ |
| @@ -244,7 +252,9 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev) | |||
| 244 | if (!newconf) | 252 | if (!newconf) |
| 245 | return -ENOMEM; | 253 | return -ENOMEM; |
| 246 | 254 | ||
| 247 | oldconf = rcu_dereference(mddev->private); | 255 | oldconf = rcu_dereference_protected(mddev->private, |
| 256 | lockdep_is_held( | ||
| 257 | &mddev->reconfig_mutex)); | ||
| 248 | mddev->raid_disks++; | 258 | mddev->raid_disks++; |
| 249 | rcu_assign_pointer(mddev->private, newconf); | 259 | rcu_assign_pointer(mddev->private, newconf); |
| 250 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); | 260 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); |
| @@ -256,7 +266,10 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev) | |||
| 256 | 266 | ||
| 257 | static int linear_stop (struct mddev *mddev) | 267 | static int linear_stop (struct mddev *mddev) |
| 258 | { | 268 | { |
| 259 | struct linear_conf *conf = mddev->private; | 269 | struct linear_conf *conf = |
| 270 | rcu_dereference_protected(mddev->private, | ||
| 271 | lockdep_is_held( | ||
| 272 | &mddev->reconfig_mutex)); | ||
| 260 | 273 | ||
| 261 | /* | 274 | /* |
| 262 | * We do not require rcu protection here since | 275 | * We do not require rcu protection here since |
| @@ -326,6 +339,14 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio) | |||
| 326 | bio->bi_sector = bio->bi_sector - start_sector | 339 | bio->bi_sector = bio->bi_sector - start_sector |
| 327 | + tmp_dev->rdev->data_offset; | 340 | + tmp_dev->rdev->data_offset; |
| 328 | rcu_read_unlock(); | 341 | rcu_read_unlock(); |
| 342 | |||
| 343 | if (unlikely((bio->bi_rw & REQ_DISCARD) && | ||
| 344 | !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) { | ||
| 345 | /* Just ignore it */ | ||
| 346 | bio_endio(bio, 0); | ||
| 347 | return; | ||
| 348 | } | ||
| 349 | |||
| 329 | generic_make_request(bio); | 350 | generic_make_request(bio); |
| 330 | } | 351 | } |
| 331 | 352 | ||
diff --git a/drivers/md/md.c b/drivers/md/md.c index 95c88012a3b9..9ab768acfb62 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
| @@ -674,7 +674,18 @@ static struct md_rdev * find_rdev_nr(struct mddev *mddev, int nr) | |||
| 674 | return NULL; | 674 | return NULL; |
| 675 | } | 675 | } |
| 676 | 676 | ||
| 677 | static struct md_rdev * find_rdev(struct mddev * mddev, dev_t dev) | 677 | static struct md_rdev *find_rdev_nr_rcu(struct mddev *mddev, int nr) |
| 678 | { | ||
| 679 | struct md_rdev *rdev; | ||
| 680 | |||
| 681 | rdev_for_each_rcu(rdev, mddev) | ||
| 682 | if (rdev->desc_nr == nr) | ||
| 683 | return rdev; | ||
| 684 | |||
| 685 | return NULL; | ||
| 686 | } | ||
| 687 | |||
| 688 | static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev) | ||
| 678 | { | 689 | { |
| 679 | struct md_rdev *rdev; | 690 | struct md_rdev *rdev; |
| 680 | 691 | ||
| @@ -685,6 +696,17 @@ static struct md_rdev * find_rdev(struct mddev * mddev, dev_t dev) | |||
| 685 | return NULL; | 696 | return NULL; |
| 686 | } | 697 | } |
| 687 | 698 | ||
| 699 | static struct md_rdev *find_rdev_rcu(struct mddev *mddev, dev_t dev) | ||
| 700 | { | ||
| 701 | struct md_rdev *rdev; | ||
| 702 | |||
| 703 | rdev_for_each_rcu(rdev, mddev) | ||
| 704 | if (rdev->bdev->bd_dev == dev) | ||
| 705 | return rdev; | ||
| 706 | |||
| 707 | return NULL; | ||
| 708 | } | ||
| 709 | |||
| 688 | static struct md_personality *find_pers(int level, char *clevel) | 710 | static struct md_personality *find_pers(int level, char *clevel) |
| 689 | { | 711 | { |
| 690 | struct md_personality *pers; | 712 | struct md_personality *pers; |
| @@ -2022,8 +2044,14 @@ EXPORT_SYMBOL(md_integrity_register); | |||
| 2022 | /* Disable data integrity if non-capable/non-matching disk is being added */ | 2044 | /* Disable data integrity if non-capable/non-matching disk is being added */ |
| 2023 | void md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev) | 2045 | void md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev) |
| 2024 | { | 2046 | { |
| 2025 | struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); | 2047 | struct blk_integrity *bi_rdev; |
| 2026 | struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk); | 2048 | struct blk_integrity *bi_mddev; |
| 2049 | |||
| 2050 | if (!mddev->gendisk) | ||
| 2051 | return; | ||
| 2052 | |||
| 2053 | bi_rdev = bdev_get_integrity(rdev->bdev); | ||
| 2054 | bi_mddev = blk_get_integrity(mddev->gendisk); | ||
| 2027 | 2055 | ||
| 2028 | if (!bi_mddev) /* nothing to do */ | 2056 | if (!bi_mddev) /* nothing to do */ |
| 2029 | return; | 2057 | return; |
| @@ -3754,6 +3782,8 @@ resync_start_store(struct mddev *mddev, const char *buf, size_t len) | |||
| 3754 | return -EINVAL; | 3782 | return -EINVAL; |
| 3755 | 3783 | ||
| 3756 | mddev->recovery_cp = n; | 3784 | mddev->recovery_cp = n; |
| 3785 | if (mddev->pers) | ||
| 3786 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | ||
| 3757 | return len; | 3787 | return len; |
| 3758 | } | 3788 | } |
| 3759 | static struct md_sysfs_entry md_resync_start = | 3789 | static struct md_sysfs_entry md_resync_start = |
| @@ -4231,6 +4261,13 @@ action_store(struct mddev *mddev, const char *page, size_t len) | |||
| 4231 | set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); | 4261 | set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); |
| 4232 | set_bit(MD_RECOVERY_SYNC, &mddev->recovery); | 4262 | set_bit(MD_RECOVERY_SYNC, &mddev->recovery); |
| 4233 | } | 4263 | } |
| 4264 | if (mddev->ro == 2) { | ||
| 4265 | /* A write to sync_action is enough to justify | ||
| 4266 | * canceling read-auto mode | ||
| 4267 | */ | ||
| 4268 | mddev->ro = 0; | ||
| 4269 | md_wakeup_thread(mddev->sync_thread); | ||
| 4270 | } | ||
| 4234 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 4271 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
| 4235 | md_wakeup_thread(mddev->thread); | 4272 | md_wakeup_thread(mddev->thread); |
| 4236 | sysfs_notify_dirent_safe(mddev->sysfs_action); | 4273 | sysfs_notify_dirent_safe(mddev->sysfs_action); |
| @@ -4241,7 +4278,8 @@ static ssize_t | |||
| 4241 | mismatch_cnt_show(struct mddev *mddev, char *page) | 4278 | mismatch_cnt_show(struct mddev *mddev, char *page) |
| 4242 | { | 4279 | { |
| 4243 | return sprintf(page, "%llu\n", | 4280 | return sprintf(page, "%llu\n", |
| 4244 | (unsigned long long) mddev->resync_mismatches); | 4281 | (unsigned long long) |
| 4282 | atomic64_read(&mddev->resync_mismatches)); | ||
| 4245 | } | 4283 | } |
| 4246 | 4284 | ||
| 4247 | static struct md_sysfs_entry md_scan_mode = | 4285 | static struct md_sysfs_entry md_scan_mode = |
| @@ -4362,6 +4400,10 @@ sync_completed_show(struct mddev *mddev, char *page) | |||
| 4362 | if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) | 4400 | if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) |
| 4363 | return sprintf(page, "none\n"); | 4401 | return sprintf(page, "none\n"); |
| 4364 | 4402 | ||
| 4403 | if (mddev->curr_resync == 1 || | ||
| 4404 | mddev->curr_resync == 2) | ||
| 4405 | return sprintf(page, "delayed\n"); | ||
| 4406 | |||
| 4365 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || | 4407 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || |
| 4366 | test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) | 4408 | test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) |
| 4367 | max_sectors = mddev->resync_max_sectors; | 4409 | max_sectors = mddev->resync_max_sectors; |
| @@ -5207,7 +5249,7 @@ static void md_clean(struct mddev *mddev) | |||
| 5207 | mddev->new_layout = 0; | 5249 | mddev->new_layout = 0; |
| 5208 | mddev->new_chunk_sectors = 0; | 5250 | mddev->new_chunk_sectors = 0; |
| 5209 | mddev->curr_resync = 0; | 5251 | mddev->curr_resync = 0; |
| 5210 | mddev->resync_mismatches = 0; | 5252 | atomic64_set(&mddev->resync_mismatches, 0); |
| 5211 | mddev->suspend_lo = mddev->suspend_hi = 0; | 5253 | mddev->suspend_lo = mddev->suspend_hi = 0; |
| 5212 | mddev->sync_speed_min = mddev->sync_speed_max = 0; | 5254 | mddev->sync_speed_min = mddev->sync_speed_max = 0; |
| 5213 | mddev->recovery = 0; | 5255 | mddev->recovery = 0; |
| @@ -5509,8 +5551,9 @@ static int get_array_info(struct mddev * mddev, void __user * arg) | |||
| 5509 | int nr,working,insync,failed,spare; | 5551 | int nr,working,insync,failed,spare; |
| 5510 | struct md_rdev *rdev; | 5552 | struct md_rdev *rdev; |
| 5511 | 5553 | ||
| 5512 | nr=working=insync=failed=spare=0; | 5554 | nr = working = insync = failed = spare = 0; |
| 5513 | rdev_for_each(rdev, mddev) { | 5555 | rcu_read_lock(); |
| 5556 | rdev_for_each_rcu(rdev, mddev) { | ||
| 5514 | nr++; | 5557 | nr++; |
| 5515 | if (test_bit(Faulty, &rdev->flags)) | 5558 | if (test_bit(Faulty, &rdev->flags)) |
| 5516 | failed++; | 5559 | failed++; |
| @@ -5522,6 +5565,7 @@ static int get_array_info(struct mddev * mddev, void __user * arg) | |||
| 5522 | spare++; | 5565 | spare++; |
| 5523 | } | 5566 | } |
| 5524 | } | 5567 | } |
| 5568 | rcu_read_unlock(); | ||
| 5525 | 5569 | ||
| 5526 | info.major_version = mddev->major_version; | 5570 | info.major_version = mddev->major_version; |
| 5527 | info.minor_version = mddev->minor_version; | 5571 | info.minor_version = mddev->minor_version; |
| @@ -5605,7 +5649,8 @@ static int get_disk_info(struct mddev * mddev, void __user * arg) | |||
| 5605 | if (copy_from_user(&info, arg, sizeof(info))) | 5649 | if (copy_from_user(&info, arg, sizeof(info))) |
| 5606 | return -EFAULT; | 5650 | return -EFAULT; |
| 5607 | 5651 | ||
| 5608 | rdev = find_rdev_nr(mddev, info.number); | 5652 | rcu_read_lock(); |
| 5653 | rdev = find_rdev_nr_rcu(mddev, info.number); | ||
| 5609 | if (rdev) { | 5654 | if (rdev) { |
| 5610 | info.major = MAJOR(rdev->bdev->bd_dev); | 5655 | info.major = MAJOR(rdev->bdev->bd_dev); |
| 5611 | info.minor = MINOR(rdev->bdev->bd_dev); | 5656 | info.minor = MINOR(rdev->bdev->bd_dev); |
| @@ -5624,6 +5669,7 @@ static int get_disk_info(struct mddev * mddev, void __user * arg) | |||
| 5624 | info.raid_disk = -1; | 5669 | info.raid_disk = -1; |
| 5625 | info.state = (1<<MD_DISK_REMOVED); | 5670 | info.state = (1<<MD_DISK_REMOVED); |
| 5626 | } | 5671 | } |
| 5672 | rcu_read_unlock(); | ||
| 5627 | 5673 | ||
| 5628 | if (copy_to_user(arg, &info, sizeof(info))) | 5674 | if (copy_to_user(arg, &info, sizeof(info))) |
| 5629 | return -EFAULT; | 5675 | return -EFAULT; |
| @@ -6232,18 +6278,22 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) | |||
| 6232 | static int set_disk_faulty(struct mddev *mddev, dev_t dev) | 6278 | static int set_disk_faulty(struct mddev *mddev, dev_t dev) |
| 6233 | { | 6279 | { |
| 6234 | struct md_rdev *rdev; | 6280 | struct md_rdev *rdev; |
| 6281 | int err = 0; | ||
| 6235 | 6282 | ||
| 6236 | if (mddev->pers == NULL) | 6283 | if (mddev->pers == NULL) |
| 6237 | return -ENODEV; | 6284 | return -ENODEV; |
| 6238 | 6285 | ||
| 6239 | rdev = find_rdev(mddev, dev); | 6286 | rcu_read_lock(); |
| 6287 | rdev = find_rdev_rcu(mddev, dev); | ||
| 6240 | if (!rdev) | 6288 | if (!rdev) |
| 6241 | return -ENODEV; | 6289 | err = -ENODEV; |
| 6242 | 6290 | else { | |
| 6243 | md_error(mddev, rdev); | 6291 | md_error(mddev, rdev); |
| 6244 | if (!test_bit(Faulty, &rdev->flags)) | 6292 | if (!test_bit(Faulty, &rdev->flags)) |
| 6245 | return -EBUSY; | 6293 | err = -EBUSY; |
| 6246 | return 0; | 6294 | } |
| 6295 | rcu_read_unlock(); | ||
| 6296 | return err; | ||
| 6247 | } | 6297 | } |
| 6248 | 6298 | ||
| 6249 | /* | 6299 | /* |
| @@ -6315,6 +6365,27 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, | |||
| 6315 | goto abort; | 6365 | goto abort; |
| 6316 | } | 6366 | } |
| 6317 | 6367 | ||
| 6368 | /* Some actions do not requires the mutex */ | ||
| 6369 | switch (cmd) { | ||
| 6370 | case GET_ARRAY_INFO: | ||
| 6371 | if (!mddev->raid_disks && !mddev->external) | ||
| 6372 | err = -ENODEV; | ||
| 6373 | else | ||
| 6374 | err = get_array_info(mddev, argp); | ||
| 6375 | goto abort; | ||
| 6376 | |||
| 6377 | case GET_DISK_INFO: | ||
| 6378 | if (!mddev->raid_disks && !mddev->external) | ||
| 6379 | err = -ENODEV; | ||
| 6380 | else | ||
| 6381 | err = get_disk_info(mddev, argp); | ||
| 6382 | goto abort; | ||
| 6383 | |||
| 6384 | case SET_DISK_FAULTY: | ||
| 6385 | err = set_disk_faulty(mddev, new_decode_dev(arg)); | ||
| 6386 | goto abort; | ||
| 6387 | } | ||
| 6388 | |||
| 6318 | err = mddev_lock(mddev); | 6389 | err = mddev_lock(mddev); |
| 6319 | if (err) { | 6390 | if (err) { |
| 6320 | printk(KERN_INFO | 6391 | printk(KERN_INFO |
| @@ -6387,18 +6458,10 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, | |||
| 6387 | */ | 6458 | */ |
| 6388 | switch (cmd) | 6459 | switch (cmd) |
| 6389 | { | 6460 | { |
| 6390 | case GET_ARRAY_INFO: | ||
| 6391 | err = get_array_info(mddev, argp); | ||
| 6392 | goto done_unlock; | ||
| 6393 | |||
| 6394 | case GET_BITMAP_FILE: | 6461 | case GET_BITMAP_FILE: |
| 6395 | err = get_bitmap_file(mddev, argp); | 6462 | err = get_bitmap_file(mddev, argp); |
| 6396 | goto done_unlock; | 6463 | goto done_unlock; |
| 6397 | 6464 | ||
| 6398 | case GET_DISK_INFO: | ||
| 6399 | err = get_disk_info(mddev, argp); | ||
| 6400 | goto done_unlock; | ||
| 6401 | |||
| 6402 | case RESTART_ARRAY_RW: | 6465 | case RESTART_ARRAY_RW: |
| 6403 | err = restart_array(mddev); | 6466 | err = restart_array(mddev); |
| 6404 | goto done_unlock; | 6467 | goto done_unlock; |
| @@ -6480,10 +6543,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, | |||
| 6480 | err = hot_add_disk(mddev, new_decode_dev(arg)); | 6543 | err = hot_add_disk(mddev, new_decode_dev(arg)); |
| 6481 | goto done_unlock; | 6544 | goto done_unlock; |
| 6482 | 6545 | ||
| 6483 | case SET_DISK_FAULTY: | ||
| 6484 | err = set_disk_faulty(mddev, new_decode_dev(arg)); | ||
| 6485 | goto done_unlock; | ||
| 6486 | |||
| 6487 | case RUN_ARRAY: | 6546 | case RUN_ARRAY: |
| 6488 | err = do_md_run(mddev); | 6547 | err = do_md_run(mddev); |
| 6489 | goto done_unlock; | 6548 | goto done_unlock; |
| @@ -6641,7 +6700,7 @@ static int md_thread(void * arg) | |||
| 6641 | 6700 | ||
| 6642 | clear_bit(THREAD_WAKEUP, &thread->flags); | 6701 | clear_bit(THREAD_WAKEUP, &thread->flags); |
| 6643 | if (!kthread_should_stop()) | 6702 | if (!kthread_should_stop()) |
| 6644 | thread->run(thread->mddev); | 6703 | thread->run(thread); |
| 6645 | } | 6704 | } |
| 6646 | 6705 | ||
| 6647 | return 0; | 6706 | return 0; |
| @@ -6656,8 +6715,8 @@ void md_wakeup_thread(struct md_thread *thread) | |||
| 6656 | } | 6715 | } |
| 6657 | } | 6716 | } |
| 6658 | 6717 | ||
| 6659 | struct md_thread *md_register_thread(void (*run) (struct mddev *), struct mddev *mddev, | 6718 | struct md_thread *md_register_thread(void (*run) (struct md_thread *), |
| 6660 | const char *name) | 6719 | struct mddev *mddev, const char *name) |
| 6661 | { | 6720 | { |
| 6662 | struct md_thread *thread; | 6721 | struct md_thread *thread; |
| 6663 | 6722 | ||
| @@ -6752,7 +6811,11 @@ static void status_resync(struct seq_file *seq, struct mddev * mddev) | |||
| 6752 | int scale; | 6811 | int scale; |
| 6753 | unsigned int per_milli; | 6812 | unsigned int per_milli; |
| 6754 | 6813 | ||
| 6755 | resync = mddev->curr_resync - atomic_read(&mddev->recovery_active); | 6814 | if (mddev->curr_resync <= 3) |
| 6815 | resync = 0; | ||
| 6816 | else | ||
| 6817 | resync = mddev->curr_resync | ||
| 6818 | - atomic_read(&mddev->recovery_active); | ||
| 6756 | 6819 | ||
| 6757 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || | 6820 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || |
| 6758 | test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) | 6821 | test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) |
| @@ -6978,7 +7041,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
| 6978 | if (mddev->curr_resync > 2) { | 7041 | if (mddev->curr_resync > 2) { |
| 6979 | status_resync(seq, mddev); | 7042 | status_resync(seq, mddev); |
| 6980 | seq_printf(seq, "\n "); | 7043 | seq_printf(seq, "\n "); |
| 6981 | } else if (mddev->curr_resync == 1 || mddev->curr_resync == 2) | 7044 | } else if (mddev->curr_resync >= 1) |
| 6982 | seq_printf(seq, "\tresync=DELAYED\n "); | 7045 | seq_printf(seq, "\tresync=DELAYED\n "); |
| 6983 | else if (mddev->recovery_cp < MaxSector) | 7046 | else if (mddev->recovery_cp < MaxSector) |
| 6984 | seq_printf(seq, "\tresync=PENDING\n "); | 7047 | seq_printf(seq, "\tresync=PENDING\n "); |
| @@ -7206,8 +7269,9 @@ EXPORT_SYMBOL_GPL(md_allow_write); | |||
| 7206 | 7269 | ||
| 7207 | #define SYNC_MARKS 10 | 7270 | #define SYNC_MARKS 10 |
| 7208 | #define SYNC_MARK_STEP (3*HZ) | 7271 | #define SYNC_MARK_STEP (3*HZ) |
| 7209 | void md_do_sync(struct mddev *mddev) | 7272 | void md_do_sync(struct md_thread *thread) |
| 7210 | { | 7273 | { |
| 7274 | struct mddev *mddev = thread->mddev; | ||
| 7211 | struct mddev *mddev2; | 7275 | struct mddev *mddev2; |
| 7212 | unsigned int currspeed = 0, | 7276 | unsigned int currspeed = 0, |
| 7213 | window; | 7277 | window; |
| @@ -7311,7 +7375,7 @@ void md_do_sync(struct mddev *mddev) | |||
| 7311 | * which defaults to physical size, but can be virtual size | 7375 | * which defaults to physical size, but can be virtual size |
| 7312 | */ | 7376 | */ |
| 7313 | max_sectors = mddev->resync_max_sectors; | 7377 | max_sectors = mddev->resync_max_sectors; |
| 7314 | mddev->resync_mismatches = 0; | 7378 | atomic64_set(&mddev->resync_mismatches, 0); |
| 7315 | /* we don't use the checkpoint if there's a bitmap */ | 7379 | /* we don't use the checkpoint if there's a bitmap */ |
| 7316 | if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | 7380 | if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) |
| 7317 | j = mddev->resync_min; | 7381 | j = mddev->resync_min; |
| @@ -7367,8 +7431,11 @@ void md_do_sync(struct mddev *mddev) | |||
| 7367 | "md: resuming %s of %s from checkpoint.\n", | 7431 | "md: resuming %s of %s from checkpoint.\n", |
| 7368 | desc, mdname(mddev)); | 7432 | desc, mdname(mddev)); |
| 7369 | mddev->curr_resync = j; | 7433 | mddev->curr_resync = j; |
| 7370 | } | 7434 | } else |
| 7435 | mddev->curr_resync = 3; /* no longer delayed */ | ||
| 7371 | mddev->curr_resync_completed = j; | 7436 | mddev->curr_resync_completed = j; |
| 7437 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | ||
| 7438 | md_new_event(mddev); | ||
| 7372 | 7439 | ||
| 7373 | blk_start_plug(&plug); | 7440 | blk_start_plug(&plug); |
| 7374 | while (j < max_sectors) { | 7441 | while (j < max_sectors) { |
| @@ -7421,7 +7488,8 @@ void md_do_sync(struct mddev *mddev) | |||
| 7421 | break; | 7488 | break; |
| 7422 | 7489 | ||
| 7423 | j += sectors; | 7490 | j += sectors; |
| 7424 | if (j>1) mddev->curr_resync = j; | 7491 | if (j > 2) |
| 7492 | mddev->curr_resync = j; | ||
| 7425 | mddev->curr_mark_cnt = io_sectors; | 7493 | mddev->curr_mark_cnt = io_sectors; |
| 7426 | if (last_check == 0) | 7494 | if (last_check == 0) |
| 7427 | /* this is the earliest that rebuild will be | 7495 | /* this is the earliest that rebuild will be |
| @@ -7543,8 +7611,6 @@ static int remove_and_add_spares(struct mddev *mddev) | |||
| 7543 | int spares = 0; | 7611 | int spares = 0; |
| 7544 | int removed = 0; | 7612 | int removed = 0; |
| 7545 | 7613 | ||
| 7546 | mddev->curr_resync_completed = 0; | ||
| 7547 | |||
| 7548 | rdev_for_each(rdev, mddev) | 7614 | rdev_for_each(rdev, mddev) |
| 7549 | if (rdev->raid_disk >= 0 && | 7615 | if (rdev->raid_disk >= 0 && |
| 7550 | !test_bit(Blocked, &rdev->flags) && | 7616 | !test_bit(Blocked, &rdev->flags) && |
| @@ -7739,6 +7805,7 @@ void md_check_recovery(struct mddev *mddev) | |||
| 7739 | /* Set RUNNING before clearing NEEDED to avoid | 7805 | /* Set RUNNING before clearing NEEDED to avoid |
| 7740 | * any transients in the value of "sync_action". | 7806 | * any transients in the value of "sync_action". |
| 7741 | */ | 7807 | */ |
| 7808 | mddev->curr_resync_completed = 0; | ||
| 7742 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | 7809 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); |
| 7743 | /* Clear some bits that don't mean anything, but | 7810 | /* Clear some bits that don't mean anything, but |
| 7744 | * might be left set | 7811 | * might be left set |
| @@ -7752,7 +7819,7 @@ void md_check_recovery(struct mddev *mddev) | |||
| 7752 | /* no recovery is running. | 7819 | /* no recovery is running. |
| 7753 | * remove any failed drives, then | 7820 | * remove any failed drives, then |
| 7754 | * add spares if possible. | 7821 | * add spares if possible. |
| 7755 | * Spare are also removed and re-added, to allow | 7822 | * Spares are also removed and re-added, to allow |
| 7756 | * the personality to fail the re-add. | 7823 | * the personality to fail the re-add. |
| 7757 | */ | 7824 | */ |
| 7758 | 7825 | ||
diff --git a/drivers/md/md.h b/drivers/md/md.h index f385b038589d..af443ab868db 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
| @@ -282,7 +282,7 @@ struct mddev { | |||
| 282 | 282 | ||
| 283 | sector_t resync_max_sectors; /* may be set by personality */ | 283 | sector_t resync_max_sectors; /* may be set by personality */ |
| 284 | 284 | ||
| 285 | sector_t resync_mismatches; /* count of sectors where | 285 | atomic64_t resync_mismatches; /* count of sectors where |
| 286 | * parity/replica mismatch found | 286 | * parity/replica mismatch found |
| 287 | */ | 287 | */ |
| 288 | 288 | ||
| @@ -540,12 +540,13 @@ static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev) | |||
| 540 | list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set) | 540 | list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set) |
| 541 | 541 | ||
| 542 | struct md_thread { | 542 | struct md_thread { |
| 543 | void (*run) (struct mddev *mddev); | 543 | void (*run) (struct md_thread *thread); |
| 544 | struct mddev *mddev; | 544 | struct mddev *mddev; |
| 545 | wait_queue_head_t wqueue; | 545 | wait_queue_head_t wqueue; |
| 546 | unsigned long flags; | 546 | unsigned long flags; |
| 547 | struct task_struct *tsk; | 547 | struct task_struct *tsk; |
| 548 | unsigned long timeout; | 548 | unsigned long timeout; |
| 549 | void *private; | ||
| 549 | }; | 550 | }; |
| 550 | 551 | ||
| 551 | #define THREAD_WAKEUP 0 | 552 | #define THREAD_WAKEUP 0 |
| @@ -584,7 +585,7 @@ static inline void safe_put_page(struct page *p) | |||
| 584 | extern int register_md_personality(struct md_personality *p); | 585 | extern int register_md_personality(struct md_personality *p); |
| 585 | extern int unregister_md_personality(struct md_personality *p); | 586 | extern int unregister_md_personality(struct md_personality *p); |
| 586 | extern struct md_thread *md_register_thread( | 587 | extern struct md_thread *md_register_thread( |
| 587 | void (*run)(struct mddev *mddev), | 588 | void (*run)(struct md_thread *thread), |
| 588 | struct mddev *mddev, | 589 | struct mddev *mddev, |
| 589 | const char *name); | 590 | const char *name); |
| 590 | extern void md_unregister_thread(struct md_thread **threadp); | 591 | extern void md_unregister_thread(struct md_thread **threadp); |
| @@ -603,7 +604,7 @@ extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev, | |||
| 603 | extern void md_super_wait(struct mddev *mddev); | 604 | extern void md_super_wait(struct mddev *mddev); |
| 604 | extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, | 605 | extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, |
| 605 | struct page *page, int rw, bool metadata_op); | 606 | struct page *page, int rw, bool metadata_op); |
| 606 | extern void md_do_sync(struct mddev *mddev); | 607 | extern void md_do_sync(struct md_thread *thread); |
| 607 | extern void md_new_event(struct mddev *mddev); | 608 | extern void md_new_event(struct mddev *mddev); |
| 608 | extern int md_allow_write(struct mddev *mddev); | 609 | extern int md_allow_write(struct mddev *mddev); |
| 609 | extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev); | 610 | extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev); |
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 61a1833ebaf3..1642eae75a33 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c | |||
| @@ -335,8 +335,9 @@ abort: | |||
| 335 | * 3. Performs writes following reads for array syncronising. | 335 | * 3. Performs writes following reads for array syncronising. |
| 336 | */ | 336 | */ |
| 337 | 337 | ||
| 338 | static void multipathd (struct mddev *mddev) | 338 | static void multipathd(struct md_thread *thread) |
| 339 | { | 339 | { |
| 340 | struct mddev *mddev = thread->mddev; | ||
| 340 | struct multipath_bh *mp_bh; | 341 | struct multipath_bh *mp_bh; |
| 341 | struct bio *bio; | 342 | struct bio *bio; |
| 342 | unsigned long flags; | 343 | unsigned long flags; |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index a9e4fa95dfaa..24b359717a7e 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
| @@ -88,6 +88,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) | |||
| 88 | char b[BDEVNAME_SIZE]; | 88 | char b[BDEVNAME_SIZE]; |
| 89 | char b2[BDEVNAME_SIZE]; | 89 | char b2[BDEVNAME_SIZE]; |
| 90 | struct r0conf *conf = kzalloc(sizeof(*conf), GFP_KERNEL); | 90 | struct r0conf *conf = kzalloc(sizeof(*conf), GFP_KERNEL); |
| 91 | bool discard_supported = false; | ||
| 91 | 92 | ||
| 92 | if (!conf) | 93 | if (!conf) |
| 93 | return -ENOMEM; | 94 | return -ENOMEM; |
| @@ -195,6 +196,9 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) | |||
| 195 | if (!smallest || (rdev1->sectors < smallest->sectors)) | 196 | if (!smallest || (rdev1->sectors < smallest->sectors)) |
| 196 | smallest = rdev1; | 197 | smallest = rdev1; |
| 197 | cnt++; | 198 | cnt++; |
| 199 | |||
| 200 | if (blk_queue_discard(bdev_get_queue(rdev1->bdev))) | ||
| 201 | discard_supported = true; | ||
| 198 | } | 202 | } |
| 199 | if (cnt != mddev->raid_disks) { | 203 | if (cnt != mddev->raid_disks) { |
| 200 | printk(KERN_ERR "md/raid0:%s: too few disks (%d of %d) - " | 204 | printk(KERN_ERR "md/raid0:%s: too few disks (%d of %d) - " |
| @@ -272,6 +276,11 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) | |||
| 272 | blk_queue_io_opt(mddev->queue, | 276 | blk_queue_io_opt(mddev->queue, |
| 273 | (mddev->chunk_sectors << 9) * mddev->raid_disks); | 277 | (mddev->chunk_sectors << 9) * mddev->raid_disks); |
| 274 | 278 | ||
| 279 | if (!discard_supported) | ||
| 280 | queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); | ||
| 281 | else | ||
| 282 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); | ||
| 283 | |||
| 275 | pr_debug("md/raid0:%s: done.\n", mdname(mddev)); | 284 | pr_debug("md/raid0:%s: done.\n", mdname(mddev)); |
| 276 | *private_conf = conf; | 285 | *private_conf = conf; |
| 277 | 286 | ||
| @@ -423,6 +432,7 @@ static int raid0_run(struct mddev *mddev) | |||
| 423 | return -EINVAL; | 432 | return -EINVAL; |
| 424 | blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); | 433 | blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); |
| 425 | blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors); | 434 | blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors); |
| 435 | blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors); | ||
| 426 | 436 | ||
| 427 | /* if private is not null, we are here after takeover */ | 437 | /* if private is not null, we are here after takeover */ |
| 428 | if (mddev->private == NULL) { | 438 | if (mddev->private == NULL) { |
| @@ -510,7 +520,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) | |||
| 510 | sector_t sector = bio->bi_sector; | 520 | sector_t sector = bio->bi_sector; |
| 511 | struct bio_pair *bp; | 521 | struct bio_pair *bp; |
| 512 | /* Sanity check -- queue functions should prevent this happening */ | 522 | /* Sanity check -- queue functions should prevent this happening */ |
| 513 | if (bio->bi_vcnt != 1 || | 523 | if ((bio->bi_vcnt != 1 && bio->bi_vcnt != 0) || |
| 514 | bio->bi_idx != 0) | 524 | bio->bi_idx != 0) |
| 515 | goto bad_map; | 525 | goto bad_map; |
| 516 | /* This is a one page bio that upper layers | 526 | /* This is a one page bio that upper layers |
| @@ -536,6 +546,13 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) | |||
| 536 | bio->bi_sector = sector_offset + zone->dev_start + | 546 | bio->bi_sector = sector_offset + zone->dev_start + |
| 537 | tmp_dev->data_offset; | 547 | tmp_dev->data_offset; |
| 538 | 548 | ||
| 549 | if (unlikely((bio->bi_rw & REQ_DISCARD) && | ||
| 550 | !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) { | ||
| 551 | /* Just ignore it */ | ||
| 552 | bio_endio(bio, 0); | ||
| 553 | return; | ||
| 554 | } | ||
| 555 | |||
| 539 | generic_make_request(bio); | 556 | generic_make_request(bio); |
| 540 | return; | 557 | return; |
| 541 | 558 | ||
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 611b5f797618..8034fbd6190c 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
| @@ -333,9 +333,10 @@ static void raid1_end_read_request(struct bio *bio, int error) | |||
| 333 | spin_unlock_irqrestore(&conf->device_lock, flags); | 333 | spin_unlock_irqrestore(&conf->device_lock, flags); |
| 334 | } | 334 | } |
| 335 | 335 | ||
| 336 | if (uptodate) | 336 | if (uptodate) { |
| 337 | raid_end_bio_io(r1_bio); | 337 | raid_end_bio_io(r1_bio); |
| 338 | else { | 338 | rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); |
| 339 | } else { | ||
| 339 | /* | 340 | /* |
| 340 | * oops, read error: | 341 | * oops, read error: |
| 341 | */ | 342 | */ |
| @@ -349,9 +350,8 @@ static void raid1_end_read_request(struct bio *bio, int error) | |||
| 349 | (unsigned long long)r1_bio->sector); | 350 | (unsigned long long)r1_bio->sector); |
| 350 | set_bit(R1BIO_ReadError, &r1_bio->state); | 351 | set_bit(R1BIO_ReadError, &r1_bio->state); |
| 351 | reschedule_retry(r1_bio); | 352 | reschedule_retry(r1_bio); |
| 353 | /* don't drop the reference on read_disk yet */ | ||
| 352 | } | 354 | } |
| 353 | |||
| 354 | rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); | ||
| 355 | } | 355 | } |
| 356 | 356 | ||
| 357 | static void close_write(struct r1bio *r1_bio) | 357 | static void close_write(struct r1bio *r1_bio) |
| @@ -781,7 +781,12 @@ static void flush_pending_writes(struct r1conf *conf) | |||
| 781 | while (bio) { /* submit pending writes */ | 781 | while (bio) { /* submit pending writes */ |
| 782 | struct bio *next = bio->bi_next; | 782 | struct bio *next = bio->bi_next; |
| 783 | bio->bi_next = NULL; | 783 | bio->bi_next = NULL; |
| 784 | generic_make_request(bio); | 784 | if (unlikely((bio->bi_rw & REQ_DISCARD) && |
| 785 | !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) | ||
| 786 | /* Just ignore it */ | ||
| 787 | bio_endio(bio, 0); | ||
| 788 | else | ||
| 789 | generic_make_request(bio); | ||
| 785 | bio = next; | 790 | bio = next; |
| 786 | } | 791 | } |
| 787 | } else | 792 | } else |
| @@ -994,6 +999,8 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
| 994 | const int rw = bio_data_dir(bio); | 999 | const int rw = bio_data_dir(bio); |
| 995 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); | 1000 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); |
| 996 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); | 1001 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); |
| 1002 | const unsigned long do_discard = (bio->bi_rw | ||
| 1003 | & (REQ_DISCARD | REQ_SECURE)); | ||
| 997 | struct md_rdev *blocked_rdev; | 1004 | struct md_rdev *blocked_rdev; |
| 998 | struct blk_plug_cb *cb; | 1005 | struct blk_plug_cb *cb; |
| 999 | struct raid1_plug_cb *plug = NULL; | 1006 | struct raid1_plug_cb *plug = NULL; |
| @@ -1295,7 +1302,7 @@ read_again: | |||
| 1295 | conf->mirrors[i].rdev->data_offset); | 1302 | conf->mirrors[i].rdev->data_offset); |
| 1296 | mbio->bi_bdev = conf->mirrors[i].rdev->bdev; | 1303 | mbio->bi_bdev = conf->mirrors[i].rdev->bdev; |
| 1297 | mbio->bi_end_io = raid1_end_write_request; | 1304 | mbio->bi_end_io = raid1_end_write_request; |
| 1298 | mbio->bi_rw = WRITE | do_flush_fua | do_sync; | 1305 | mbio->bi_rw = WRITE | do_flush_fua | do_sync | do_discard; |
| 1299 | mbio->bi_private = r1_bio; | 1306 | mbio->bi_private = r1_bio; |
| 1300 | 1307 | ||
| 1301 | atomic_inc(&r1_bio->remaining); | 1308 | atomic_inc(&r1_bio->remaining); |
| @@ -1549,6 +1556,8 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
| 1549 | clear_bit(Unmerged, &rdev->flags); | 1556 | clear_bit(Unmerged, &rdev->flags); |
| 1550 | } | 1557 | } |
| 1551 | md_integrity_add_rdev(rdev, mddev); | 1558 | md_integrity_add_rdev(rdev, mddev); |
| 1559 | if (blk_queue_discard(bdev_get_queue(rdev->bdev))) | ||
| 1560 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); | ||
| 1552 | print_conf(conf); | 1561 | print_conf(conf); |
| 1553 | return err; | 1562 | return err; |
| 1554 | } | 1563 | } |
| @@ -1867,7 +1876,7 @@ static int process_checks(struct r1bio *r1_bio) | |||
| 1867 | } else | 1876 | } else |
| 1868 | j = 0; | 1877 | j = 0; |
| 1869 | if (j >= 0) | 1878 | if (j >= 0) |
| 1870 | mddev->resync_mismatches += r1_bio->sectors; | 1879 | atomic64_add(r1_bio->sectors, &mddev->resync_mismatches); |
| 1871 | if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery) | 1880 | if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery) |
| 1872 | && test_bit(BIO_UPTODATE, &sbio->bi_flags))) { | 1881 | && test_bit(BIO_UPTODATE, &sbio->bi_flags))) { |
| 1873 | /* No need to write to this device. */ | 1882 | /* No need to write to this device. */ |
| @@ -2220,6 +2229,7 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) | |||
| 2220 | unfreeze_array(conf); | 2229 | unfreeze_array(conf); |
| 2221 | } else | 2230 | } else |
| 2222 | md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev); | 2231 | md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev); |
| 2232 | rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev); | ||
| 2223 | 2233 | ||
| 2224 | bio = r1_bio->bios[r1_bio->read_disk]; | 2234 | bio = r1_bio->bios[r1_bio->read_disk]; |
| 2225 | bdevname(bio->bi_bdev, b); | 2235 | bdevname(bio->bi_bdev, b); |
| @@ -2285,8 +2295,9 @@ read_more: | |||
| 2285 | } | 2295 | } |
| 2286 | } | 2296 | } |
| 2287 | 2297 | ||
| 2288 | static void raid1d(struct mddev *mddev) | 2298 | static void raid1d(struct md_thread *thread) |
| 2289 | { | 2299 | { |
| 2300 | struct mddev *mddev = thread->mddev; | ||
| 2290 | struct r1bio *r1_bio; | 2301 | struct r1bio *r1_bio; |
| 2291 | unsigned long flags; | 2302 | unsigned long flags; |
| 2292 | struct r1conf *conf = mddev->private; | 2303 | struct r1conf *conf = mddev->private; |
| @@ -2783,6 +2794,7 @@ static int run(struct mddev *mddev) | |||
| 2783 | int i; | 2794 | int i; |
| 2784 | struct md_rdev *rdev; | 2795 | struct md_rdev *rdev; |
| 2785 | int ret; | 2796 | int ret; |
| 2797 | bool discard_supported = false; | ||
| 2786 | 2798 | ||
| 2787 | if (mddev->level != 1) { | 2799 | if (mddev->level != 1) { |
| 2788 | printk(KERN_ERR "md/raid1:%s: raid level not set to mirroring (%d)\n", | 2800 | printk(KERN_ERR "md/raid1:%s: raid level not set to mirroring (%d)\n", |
| @@ -2812,6 +2824,8 @@ static int run(struct mddev *mddev) | |||
| 2812 | continue; | 2824 | continue; |
| 2813 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 2825 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
| 2814 | rdev->data_offset << 9); | 2826 | rdev->data_offset << 9); |
| 2827 | if (blk_queue_discard(bdev_get_queue(rdev->bdev))) | ||
| 2828 | discard_supported = true; | ||
| 2815 | } | 2829 | } |
| 2816 | 2830 | ||
| 2817 | mddev->degraded = 0; | 2831 | mddev->degraded = 0; |
| @@ -2846,6 +2860,13 @@ static int run(struct mddev *mddev) | |||
| 2846 | mddev->queue->backing_dev_info.congested_fn = raid1_congested; | 2860 | mddev->queue->backing_dev_info.congested_fn = raid1_congested; |
| 2847 | mddev->queue->backing_dev_info.congested_data = mddev; | 2861 | mddev->queue->backing_dev_info.congested_data = mddev; |
| 2848 | blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec); | 2862 | blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec); |
| 2863 | |||
| 2864 | if (discard_supported) | ||
| 2865 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, | ||
| 2866 | mddev->queue); | ||
| 2867 | else | ||
| 2868 | queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, | ||
| 2869 | mddev->queue); | ||
| 2849 | } | 2870 | } |
| 2850 | 2871 | ||
| 2851 | ret = md_integrity_register(mddev); | 2872 | ret = md_integrity_register(mddev); |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 0138a727c1f3..906ccbd0f7dc 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
| @@ -911,7 +911,12 @@ static void flush_pending_writes(struct r10conf *conf) | |||
| 911 | while (bio) { /* submit pending writes */ | 911 | while (bio) { /* submit pending writes */ |
| 912 | struct bio *next = bio->bi_next; | 912 | struct bio *next = bio->bi_next; |
| 913 | bio->bi_next = NULL; | 913 | bio->bi_next = NULL; |
| 914 | generic_make_request(bio); | 914 | if (unlikely((bio->bi_rw & REQ_DISCARD) && |
| 915 | !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) | ||
| 916 | /* Just ignore it */ | ||
| 917 | bio_endio(bio, 0); | ||
| 918 | else | ||
| 919 | generic_make_request(bio); | ||
| 915 | bio = next; | 920 | bio = next; |
| 916 | } | 921 | } |
| 917 | } else | 922 | } else |
| @@ -1050,6 +1055,44 @@ static sector_t choose_data_offset(struct r10bio *r10_bio, | |||
| 1050 | return rdev->new_data_offset; | 1055 | return rdev->new_data_offset; |
| 1051 | } | 1056 | } |
| 1052 | 1057 | ||
| 1058 | struct raid10_plug_cb { | ||
| 1059 | struct blk_plug_cb cb; | ||
| 1060 | struct bio_list pending; | ||
| 1061 | int pending_cnt; | ||
| 1062 | }; | ||
| 1063 | |||
| 1064 | static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) | ||
| 1065 | { | ||
| 1066 | struct raid10_plug_cb *plug = container_of(cb, struct raid10_plug_cb, | ||
| 1067 | cb); | ||
| 1068 | struct mddev *mddev = plug->cb.data; | ||
| 1069 | struct r10conf *conf = mddev->private; | ||
| 1070 | struct bio *bio; | ||
| 1071 | |||
| 1072 | if (from_schedule) { | ||
| 1073 | spin_lock_irq(&conf->device_lock); | ||
| 1074 | bio_list_merge(&conf->pending_bio_list, &plug->pending); | ||
| 1075 | conf->pending_count += plug->pending_cnt; | ||
| 1076 | spin_unlock_irq(&conf->device_lock); | ||
| 1077 | md_wakeup_thread(mddev->thread); | ||
| 1078 | kfree(plug); | ||
| 1079 | return; | ||
| 1080 | } | ||
| 1081 | |||
| 1082 | /* we aren't scheduling, so we can do the write-out directly. */ | ||
| 1083 | bio = bio_list_get(&plug->pending); | ||
| 1084 | bitmap_unplug(mddev->bitmap); | ||
| 1085 | wake_up(&conf->wait_barrier); | ||
| 1086 | |||
| 1087 | while (bio) { /* submit pending writes */ | ||
| 1088 | struct bio *next = bio->bi_next; | ||
| 1089 | bio->bi_next = NULL; | ||
| 1090 | generic_make_request(bio); | ||
| 1091 | bio = next; | ||
| 1092 | } | ||
| 1093 | kfree(plug); | ||
| 1094 | } | ||
| 1095 | |||
| 1053 | static void make_request(struct mddev *mddev, struct bio * bio) | 1096 | static void make_request(struct mddev *mddev, struct bio * bio) |
| 1054 | { | 1097 | { |
| 1055 | struct r10conf *conf = mddev->private; | 1098 | struct r10conf *conf = mddev->private; |
| @@ -1061,8 +1104,12 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
| 1061 | const int rw = bio_data_dir(bio); | 1104 | const int rw = bio_data_dir(bio); |
| 1062 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); | 1105 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); |
| 1063 | const unsigned long do_fua = (bio->bi_rw & REQ_FUA); | 1106 | const unsigned long do_fua = (bio->bi_rw & REQ_FUA); |
| 1107 | const unsigned long do_discard = (bio->bi_rw | ||
| 1108 | & (REQ_DISCARD | REQ_SECURE)); | ||
| 1064 | unsigned long flags; | 1109 | unsigned long flags; |
| 1065 | struct md_rdev *blocked_rdev; | 1110 | struct md_rdev *blocked_rdev; |
| 1111 | struct blk_plug_cb *cb; | ||
| 1112 | struct raid10_plug_cb *plug = NULL; | ||
| 1066 | int sectors_handled; | 1113 | int sectors_handled; |
| 1067 | int max_sectors; | 1114 | int max_sectors; |
| 1068 | int sectors; | 1115 | int sectors; |
| @@ -1081,7 +1128,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
| 1081 | || conf->prev.near_copies < conf->prev.raid_disks))) { | 1128 | || conf->prev.near_copies < conf->prev.raid_disks))) { |
| 1082 | struct bio_pair *bp; | 1129 | struct bio_pair *bp; |
| 1083 | /* Sanity check -- queue functions should prevent this happening */ | 1130 | /* Sanity check -- queue functions should prevent this happening */ |
| 1084 | if (bio->bi_vcnt != 1 || | 1131 | if ((bio->bi_vcnt != 1 && bio->bi_vcnt != 0) || |
| 1085 | bio->bi_idx != 0) | 1132 | bio->bi_idx != 0) |
| 1086 | goto bad_map; | 1133 | goto bad_map; |
| 1087 | /* This is a one page bio that upper layers | 1134 | /* This is a one page bio that upper layers |
| @@ -1410,15 +1457,26 @@ retry_write: | |||
| 1410 | conf->mirrors[d].rdev)); | 1457 | conf->mirrors[d].rdev)); |
| 1411 | mbio->bi_bdev = conf->mirrors[d].rdev->bdev; | 1458 | mbio->bi_bdev = conf->mirrors[d].rdev->bdev; |
| 1412 | mbio->bi_end_io = raid10_end_write_request; | 1459 | mbio->bi_end_io = raid10_end_write_request; |
| 1413 | mbio->bi_rw = WRITE | do_sync | do_fua; | 1460 | mbio->bi_rw = WRITE | do_sync | do_fua | do_discard; |
| 1414 | mbio->bi_private = r10_bio; | 1461 | mbio->bi_private = r10_bio; |
| 1415 | 1462 | ||
| 1416 | atomic_inc(&r10_bio->remaining); | 1463 | atomic_inc(&r10_bio->remaining); |
| 1464 | |||
| 1465 | cb = blk_check_plugged(raid10_unplug, mddev, sizeof(*plug)); | ||
| 1466 | if (cb) | ||
| 1467 | plug = container_of(cb, struct raid10_plug_cb, cb); | ||
| 1468 | else | ||
| 1469 | plug = NULL; | ||
| 1417 | spin_lock_irqsave(&conf->device_lock, flags); | 1470 | spin_lock_irqsave(&conf->device_lock, flags); |
| 1418 | bio_list_add(&conf->pending_bio_list, mbio); | 1471 | if (plug) { |
| 1419 | conf->pending_count++; | 1472 | bio_list_add(&plug->pending, mbio); |
| 1473 | plug->pending_cnt++; | ||
| 1474 | } else { | ||
| 1475 | bio_list_add(&conf->pending_bio_list, mbio); | ||
| 1476 | conf->pending_count++; | ||
| 1477 | } | ||
| 1420 | spin_unlock_irqrestore(&conf->device_lock, flags); | 1478 | spin_unlock_irqrestore(&conf->device_lock, flags); |
| 1421 | if (!mddev_check_plugged(mddev)) | 1479 | if (!plug) |
| 1422 | md_wakeup_thread(mddev->thread); | 1480 | md_wakeup_thread(mddev->thread); |
| 1423 | 1481 | ||
| 1424 | if (!r10_bio->devs[i].repl_bio) | 1482 | if (!r10_bio->devs[i].repl_bio) |
| @@ -1439,7 +1497,7 @@ retry_write: | |||
| 1439 | conf->mirrors[d].replacement)); | 1497 | conf->mirrors[d].replacement)); |
| 1440 | mbio->bi_bdev = conf->mirrors[d].replacement->bdev; | 1498 | mbio->bi_bdev = conf->mirrors[d].replacement->bdev; |
| 1441 | mbio->bi_end_io = raid10_end_write_request; | 1499 | mbio->bi_end_io = raid10_end_write_request; |
| 1442 | mbio->bi_rw = WRITE | do_sync | do_fua; | 1500 | mbio->bi_rw = WRITE | do_sync | do_fua | do_discard; |
| 1443 | mbio->bi_private = r10_bio; | 1501 | mbio->bi_private = r10_bio; |
| 1444 | 1502 | ||
| 1445 | atomic_inc(&r10_bio->remaining); | 1503 | atomic_inc(&r10_bio->remaining); |
| @@ -1638,7 +1696,7 @@ static int raid10_spare_active(struct mddev *mddev) | |||
| 1638 | && !test_bit(Faulty, &tmp->rdev->flags) | 1696 | && !test_bit(Faulty, &tmp->rdev->flags) |
| 1639 | && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { | 1697 | && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { |
| 1640 | count++; | 1698 | count++; |
| 1641 | sysfs_notify_dirent(tmp->rdev->sysfs_state); | 1699 | sysfs_notify_dirent_safe(tmp->rdev->sysfs_state); |
| 1642 | } | 1700 | } |
| 1643 | } | 1701 | } |
| 1644 | spin_lock_irqsave(&conf->device_lock, flags); | 1702 | spin_lock_irqsave(&conf->device_lock, flags); |
| @@ -1725,6 +1783,9 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
| 1725 | clear_bit(Unmerged, &rdev->flags); | 1783 | clear_bit(Unmerged, &rdev->flags); |
| 1726 | } | 1784 | } |
| 1727 | md_integrity_add_rdev(rdev, mddev); | 1785 | md_integrity_add_rdev(rdev, mddev); |
| 1786 | if (blk_queue_discard(bdev_get_queue(rdev->bdev))) | ||
| 1787 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); | ||
| 1788 | |||
| 1728 | print_conf(conf); | 1789 | print_conf(conf); |
| 1729 | return err; | 1790 | return err; |
| 1730 | } | 1791 | } |
| @@ -1952,7 +2013,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) | |||
| 1952 | break; | 2013 | break; |
| 1953 | if (j == vcnt) | 2014 | if (j == vcnt) |
| 1954 | continue; | 2015 | continue; |
| 1955 | mddev->resync_mismatches += r10_bio->sectors; | 2016 | atomic64_add(r10_bio->sectors, &mddev->resync_mismatches); |
| 1956 | if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) | 2017 | if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) |
| 1957 | /* Don't fix anything. */ | 2018 | /* Don't fix anything. */ |
| 1958 | continue; | 2019 | continue; |
| @@ -2673,8 +2734,9 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) | |||
| 2673 | } | 2734 | } |
| 2674 | } | 2735 | } |
| 2675 | 2736 | ||
| 2676 | static void raid10d(struct mddev *mddev) | 2737 | static void raid10d(struct md_thread *thread) |
| 2677 | { | 2738 | { |
| 2739 | struct mddev *mddev = thread->mddev; | ||
| 2678 | struct r10bio *r10_bio; | 2740 | struct r10bio *r10_bio; |
| 2679 | unsigned long flags; | 2741 | unsigned long flags; |
| 2680 | struct r10conf *conf = mddev->private; | 2742 | struct r10conf *conf = mddev->private; |
| @@ -3158,7 +3220,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, | |||
| 3158 | else { | 3220 | else { |
| 3159 | bad_sectors -= (sector - first_bad); | 3221 | bad_sectors -= (sector - first_bad); |
| 3160 | if (max_sync > bad_sectors) | 3222 | if (max_sync > bad_sectors) |
| 3161 | max_sync = max_sync; | 3223 | max_sync = bad_sectors; |
| 3162 | continue; | 3224 | continue; |
| 3163 | } | 3225 | } |
| 3164 | } | 3226 | } |
| @@ -3482,6 +3544,7 @@ static int run(struct mddev *mddev) | |||
| 3482 | sector_t size; | 3544 | sector_t size; |
| 3483 | sector_t min_offset_diff = 0; | 3545 | sector_t min_offset_diff = 0; |
| 3484 | int first = 1; | 3546 | int first = 1; |
| 3547 | bool discard_supported = false; | ||
| 3485 | 3548 | ||
| 3486 | if (mddev->private == NULL) { | 3549 | if (mddev->private == NULL) { |
| 3487 | conf = setup_conf(mddev); | 3550 | conf = setup_conf(mddev); |
| @@ -3498,6 +3561,8 @@ static int run(struct mddev *mddev) | |||
| 3498 | 3561 | ||
| 3499 | chunk_size = mddev->chunk_sectors << 9; | 3562 | chunk_size = mddev->chunk_sectors << 9; |
| 3500 | if (mddev->queue) { | 3563 | if (mddev->queue) { |
| 3564 | blk_queue_max_discard_sectors(mddev->queue, | ||
| 3565 | mddev->chunk_sectors); | ||
| 3501 | blk_queue_io_min(mddev->queue, chunk_size); | 3566 | blk_queue_io_min(mddev->queue, chunk_size); |
| 3502 | if (conf->geo.raid_disks % conf->geo.near_copies) | 3567 | if (conf->geo.raid_disks % conf->geo.near_copies) |
| 3503 | blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); | 3568 | blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); |
| @@ -3543,8 +3608,16 @@ static int run(struct mddev *mddev) | |||
| 3543 | rdev->data_offset << 9); | 3608 | rdev->data_offset << 9); |
| 3544 | 3609 | ||
| 3545 | disk->head_position = 0; | 3610 | disk->head_position = 0; |
| 3611 | |||
| 3612 | if (blk_queue_discard(bdev_get_queue(rdev->bdev))) | ||
| 3613 | discard_supported = true; | ||
| 3546 | } | 3614 | } |
| 3547 | 3615 | ||
| 3616 | if (discard_supported) | ||
| 3617 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); | ||
| 3618 | else | ||
| 3619 | queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); | ||
| 3620 | |||
| 3548 | /* need to check that every block has at least one working mirror */ | 3621 | /* need to check that every block has at least one working mirror */ |
| 3549 | if (!enough(conf, -1)) { | 3622 | if (!enough(conf, -1)) { |
| 3550 | printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n", | 3623 | printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n", |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 0689173fd9f5..c5439dce0295 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
| @@ -551,6 +551,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
| 551 | rw = WRITE_FUA; | 551 | rw = WRITE_FUA; |
| 552 | else | 552 | else |
| 553 | rw = WRITE; | 553 | rw = WRITE; |
| 554 | if (test_bit(R5_Discard, &sh->dev[i].flags)) | ||
| 555 | rw |= REQ_DISCARD; | ||
| 554 | } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) | 556 | } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) |
| 555 | rw = READ; | 557 | rw = READ; |
| 556 | else if (test_and_clear_bit(R5_WantReplace, | 558 | else if (test_and_clear_bit(R5_WantReplace, |
| @@ -1174,8 +1176,11 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
| 1174 | set_bit(R5_WantFUA, &dev->flags); | 1176 | set_bit(R5_WantFUA, &dev->flags); |
| 1175 | if (wbi->bi_rw & REQ_SYNC) | 1177 | if (wbi->bi_rw & REQ_SYNC) |
| 1176 | set_bit(R5_SyncIO, &dev->flags); | 1178 | set_bit(R5_SyncIO, &dev->flags); |
| 1177 | tx = async_copy_data(1, wbi, dev->page, | 1179 | if (wbi->bi_rw & REQ_DISCARD) |
| 1178 | dev->sector, tx); | 1180 | set_bit(R5_Discard, &dev->flags); |
| 1181 | else | ||
| 1182 | tx = async_copy_data(1, wbi, dev->page, | ||
| 1183 | dev->sector, tx); | ||
| 1179 | wbi = r5_next_bio(wbi, dev->sector); | 1184 | wbi = r5_next_bio(wbi, dev->sector); |
| 1180 | } | 1185 | } |
| 1181 | } | 1186 | } |
| @@ -1191,7 +1196,7 @@ static void ops_complete_reconstruct(void *stripe_head_ref) | |||
| 1191 | int pd_idx = sh->pd_idx; | 1196 | int pd_idx = sh->pd_idx; |
| 1192 | int qd_idx = sh->qd_idx; | 1197 | int qd_idx = sh->qd_idx; |
| 1193 | int i; | 1198 | int i; |
| 1194 | bool fua = false, sync = false; | 1199 | bool fua = false, sync = false, discard = false; |
| 1195 | 1200 | ||
| 1196 | pr_debug("%s: stripe %llu\n", __func__, | 1201 | pr_debug("%s: stripe %llu\n", __func__, |
| 1197 | (unsigned long long)sh->sector); | 1202 | (unsigned long long)sh->sector); |
| @@ -1199,13 +1204,15 @@ static void ops_complete_reconstruct(void *stripe_head_ref) | |||
| 1199 | for (i = disks; i--; ) { | 1204 | for (i = disks; i--; ) { |
| 1200 | fua |= test_bit(R5_WantFUA, &sh->dev[i].flags); | 1205 | fua |= test_bit(R5_WantFUA, &sh->dev[i].flags); |
| 1201 | sync |= test_bit(R5_SyncIO, &sh->dev[i].flags); | 1206 | sync |= test_bit(R5_SyncIO, &sh->dev[i].flags); |
| 1207 | discard |= test_bit(R5_Discard, &sh->dev[i].flags); | ||
| 1202 | } | 1208 | } |
| 1203 | 1209 | ||
| 1204 | for (i = disks; i--; ) { | 1210 | for (i = disks; i--; ) { |
| 1205 | struct r5dev *dev = &sh->dev[i]; | 1211 | struct r5dev *dev = &sh->dev[i]; |
| 1206 | 1212 | ||
| 1207 | if (dev->written || i == pd_idx || i == qd_idx) { | 1213 | if (dev->written || i == pd_idx || i == qd_idx) { |
| 1208 | set_bit(R5_UPTODATE, &dev->flags); | 1214 | if (!discard) |
| 1215 | set_bit(R5_UPTODATE, &dev->flags); | ||
| 1209 | if (fua) | 1216 | if (fua) |
| 1210 | set_bit(R5_WantFUA, &dev->flags); | 1217 | set_bit(R5_WantFUA, &dev->flags); |
| 1211 | if (sync) | 1218 | if (sync) |
| @@ -1241,6 +1248,18 @@ ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu, | |||
| 1241 | pr_debug("%s: stripe %llu\n", __func__, | 1248 | pr_debug("%s: stripe %llu\n", __func__, |
| 1242 | (unsigned long long)sh->sector); | 1249 | (unsigned long long)sh->sector); |
| 1243 | 1250 | ||
| 1251 | for (i = 0; i < sh->disks; i++) { | ||
| 1252 | if (pd_idx == i) | ||
| 1253 | continue; | ||
| 1254 | if (!test_bit(R5_Discard, &sh->dev[i].flags)) | ||
| 1255 | break; | ||
| 1256 | } | ||
| 1257 | if (i >= sh->disks) { | ||
| 1258 | atomic_inc(&sh->count); | ||
| 1259 | set_bit(R5_Discard, &sh->dev[pd_idx].flags); | ||
| 1260 | ops_complete_reconstruct(sh); | ||
| 1261 | return; | ||
| 1262 | } | ||
| 1244 | /* check if prexor is active which means only process blocks | 1263 | /* check if prexor is active which means only process blocks |
| 1245 | * that are part of a read-modify-write (written) | 1264 | * that are part of a read-modify-write (written) |
| 1246 | */ | 1265 | */ |
| @@ -1285,10 +1304,24 @@ ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu, | |||
| 1285 | { | 1304 | { |
| 1286 | struct async_submit_ctl submit; | 1305 | struct async_submit_ctl submit; |
| 1287 | struct page **blocks = percpu->scribble; | 1306 | struct page **blocks = percpu->scribble; |
| 1288 | int count; | 1307 | int count, i; |
| 1289 | 1308 | ||
| 1290 | pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); | 1309 | pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); |
| 1291 | 1310 | ||
| 1311 | for (i = 0; i < sh->disks; i++) { | ||
| 1312 | if (sh->pd_idx == i || sh->qd_idx == i) | ||
| 1313 | continue; | ||
| 1314 | if (!test_bit(R5_Discard, &sh->dev[i].flags)) | ||
| 1315 | break; | ||
| 1316 | } | ||
| 1317 | if (i >= sh->disks) { | ||
| 1318 | atomic_inc(&sh->count); | ||
| 1319 | set_bit(R5_Discard, &sh->dev[sh->pd_idx].flags); | ||
| 1320 | set_bit(R5_Discard, &sh->dev[sh->qd_idx].flags); | ||
| 1321 | ops_complete_reconstruct(sh); | ||
| 1322 | return; | ||
| 1323 | } | ||
| 1324 | |||
| 1292 | count = set_syndrome_sources(blocks, sh); | 1325 | count = set_syndrome_sources(blocks, sh); |
| 1293 | 1326 | ||
| 1294 | atomic_inc(&sh->count); | 1327 | atomic_inc(&sh->count); |
| @@ -2408,11 +2441,11 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
| 2408 | if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) | 2441 | if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) |
| 2409 | set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); | 2442 | set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); |
| 2410 | } | 2443 | } |
| 2411 | spin_unlock_irq(&sh->stripe_lock); | ||
| 2412 | 2444 | ||
| 2413 | pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", | 2445 | pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", |
| 2414 | (unsigned long long)(*bip)->bi_sector, | 2446 | (unsigned long long)(*bip)->bi_sector, |
| 2415 | (unsigned long long)sh->sector, dd_idx); | 2447 | (unsigned long long)sh->sector, dd_idx); |
| 2448 | spin_unlock_irq(&sh->stripe_lock); | ||
| 2416 | 2449 | ||
| 2417 | if (conf->mddev->bitmap && firstwrite) { | 2450 | if (conf->mddev->bitmap && firstwrite) { |
| 2418 | bitmap_startwrite(conf->mddev->bitmap, sh->sector, | 2451 | bitmap_startwrite(conf->mddev->bitmap, sh->sector, |
| @@ -2479,10 +2512,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
| 2479 | bi = sh->dev[i].towrite; | 2512 | bi = sh->dev[i].towrite; |
| 2480 | sh->dev[i].towrite = NULL; | 2513 | sh->dev[i].towrite = NULL; |
| 2481 | spin_unlock_irq(&sh->stripe_lock); | 2514 | spin_unlock_irq(&sh->stripe_lock); |
| 2482 | if (bi) { | 2515 | if (bi) |
| 2483 | s->to_write--; | ||
| 2484 | bitmap_end = 1; | 2516 | bitmap_end = 1; |
| 2485 | } | ||
| 2486 | 2517 | ||
| 2487 | if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) | 2518 | if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) |
| 2488 | wake_up(&conf->wait_for_overlap); | 2519 | wake_up(&conf->wait_for_overlap); |
| @@ -2524,11 +2555,12 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
| 2524 | if (!test_bit(R5_Wantfill, &sh->dev[i].flags) && | 2555 | if (!test_bit(R5_Wantfill, &sh->dev[i].flags) && |
| 2525 | (!test_bit(R5_Insync, &sh->dev[i].flags) || | 2556 | (!test_bit(R5_Insync, &sh->dev[i].flags) || |
| 2526 | test_bit(R5_ReadError, &sh->dev[i].flags))) { | 2557 | test_bit(R5_ReadError, &sh->dev[i].flags))) { |
| 2558 | spin_lock_irq(&sh->stripe_lock); | ||
| 2527 | bi = sh->dev[i].toread; | 2559 | bi = sh->dev[i].toread; |
| 2528 | sh->dev[i].toread = NULL; | 2560 | sh->dev[i].toread = NULL; |
| 2561 | spin_unlock_irq(&sh->stripe_lock); | ||
| 2529 | if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) | 2562 | if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) |
| 2530 | wake_up(&conf->wait_for_overlap); | 2563 | wake_up(&conf->wait_for_overlap); |
| 2531 | if (bi) s->to_read--; | ||
| 2532 | while (bi && bi->bi_sector < | 2564 | while (bi && bi->bi_sector < |
| 2533 | sh->dev[i].sector + STRIPE_SECTORS) { | 2565 | sh->dev[i].sector + STRIPE_SECTORS) { |
| 2534 | struct bio *nextbi = | 2566 | struct bio *nextbi = |
| @@ -2741,7 +2773,8 @@ static void handle_stripe_clean_event(struct r5conf *conf, | |||
| 2741 | if (sh->dev[i].written) { | 2773 | if (sh->dev[i].written) { |
| 2742 | dev = &sh->dev[i]; | 2774 | dev = &sh->dev[i]; |
| 2743 | if (!test_bit(R5_LOCKED, &dev->flags) && | 2775 | if (!test_bit(R5_LOCKED, &dev->flags) && |
| 2744 | test_bit(R5_UPTODATE, &dev->flags)) { | 2776 | (test_bit(R5_UPTODATE, &dev->flags) || |
| 2777 | test_and_clear_bit(R5_Discard, &dev->flags))) { | ||
| 2745 | /* We can return any write requests */ | 2778 | /* We can return any write requests */ |
| 2746 | struct bio *wbi, *wbi2; | 2779 | struct bio *wbi, *wbi2; |
| 2747 | pr_debug("Return write for disc %d\n", i); | 2780 | pr_debug("Return write for disc %d\n", i); |
| @@ -2775,12 +2808,25 @@ static void handle_stripe_dirtying(struct r5conf *conf, | |||
| 2775 | int disks) | 2808 | int disks) |
| 2776 | { | 2809 | { |
| 2777 | int rmw = 0, rcw = 0, i; | 2810 | int rmw = 0, rcw = 0, i; |
| 2778 | if (conf->max_degraded == 2) { | 2811 | sector_t recovery_cp = conf->mddev->recovery_cp; |
| 2779 | /* RAID6 requires 'rcw' in current implementation | 2812 | |
| 2780 | * Calculate the real rcw later - for now fake it | 2813 | /* RAID6 requires 'rcw' in current implementation. |
| 2814 | * Otherwise, check whether resync is now happening or should start. | ||
| 2815 | * If yes, then the array is dirty (after unclean shutdown or | ||
| 2816 | * initial creation), so parity in some stripes might be inconsistent. | ||
| 2817 | * In this case, we need to always do reconstruct-write, to ensure | ||
| 2818 | * that in case of drive failure or read-error correction, we | ||
| 2819 | * generate correct data from the parity. | ||
| 2820 | */ | ||
| 2821 | if (conf->max_degraded == 2 || | ||
| 2822 | (recovery_cp < MaxSector && sh->sector >= recovery_cp)) { | ||
| 2823 | /* Calculate the real rcw later - for now make it | ||
| 2781 | * look like rcw is cheaper | 2824 | * look like rcw is cheaper |
| 2782 | */ | 2825 | */ |
| 2783 | rcw = 1; rmw = 2; | 2826 | rcw = 1; rmw = 2; |
| 2827 | pr_debug("force RCW max_degraded=%u, recovery_cp=%llu sh->sector=%llu\n", | ||
| 2828 | conf->max_degraded, (unsigned long long)recovery_cp, | ||
| 2829 | (unsigned long long)sh->sector); | ||
| 2784 | } else for (i = disks; i--; ) { | 2830 | } else for (i = disks; i--; ) { |
| 2785 | /* would I have to read this buffer for read_modify_write */ | 2831 | /* would I have to read this buffer for read_modify_write */ |
| 2786 | struct r5dev *dev = &sh->dev[i]; | 2832 | struct r5dev *dev = &sh->dev[i]; |
| @@ -2932,7 +2978,7 @@ static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh, | |||
| 2932 | */ | 2978 | */ |
| 2933 | set_bit(STRIPE_INSYNC, &sh->state); | 2979 | set_bit(STRIPE_INSYNC, &sh->state); |
| 2934 | else { | 2980 | else { |
| 2935 | conf->mddev->resync_mismatches += STRIPE_SECTORS; | 2981 | atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches); |
| 2936 | if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) | 2982 | if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) |
| 2937 | /* don't try to repair!! */ | 2983 | /* don't try to repair!! */ |
| 2938 | set_bit(STRIPE_INSYNC, &sh->state); | 2984 | set_bit(STRIPE_INSYNC, &sh->state); |
| @@ -3084,7 +3130,7 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh, | |||
| 3084 | */ | 3130 | */ |
| 3085 | } | 3131 | } |
| 3086 | } else { | 3132 | } else { |
| 3087 | conf->mddev->resync_mismatches += STRIPE_SECTORS; | 3133 | atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches); |
| 3088 | if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) | 3134 | if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) |
| 3089 | /* don't try to repair!! */ | 3135 | /* don't try to repair!! */ |
| 3090 | set_bit(STRIPE_INSYNC, &sh->state); | 3136 | set_bit(STRIPE_INSYNC, &sh->state); |
| @@ -3459,10 +3505,12 @@ static void handle_stripe(struct stripe_head *sh) | |||
| 3459 | if (s.written && | 3505 | if (s.written && |
| 3460 | (s.p_failed || ((test_bit(R5_Insync, &pdev->flags) | 3506 | (s.p_failed || ((test_bit(R5_Insync, &pdev->flags) |
| 3461 | && !test_bit(R5_LOCKED, &pdev->flags) | 3507 | && !test_bit(R5_LOCKED, &pdev->flags) |
| 3462 | && test_bit(R5_UPTODATE, &pdev->flags)))) && | 3508 | && (test_bit(R5_UPTODATE, &pdev->flags) || |
| 3509 | test_bit(R5_Discard, &pdev->flags))))) && | ||
| 3463 | (s.q_failed || ((test_bit(R5_Insync, &qdev->flags) | 3510 | (s.q_failed || ((test_bit(R5_Insync, &qdev->flags) |
| 3464 | && !test_bit(R5_LOCKED, &qdev->flags) | 3511 | && !test_bit(R5_LOCKED, &qdev->flags) |
| 3465 | && test_bit(R5_UPTODATE, &qdev->flags))))) | 3512 | && (test_bit(R5_UPTODATE, &qdev->flags) || |
| 3513 | test_bit(R5_Discard, &qdev->flags)))))) | ||
| 3466 | handle_stripe_clean_event(conf, sh, disks, &s.return_bi); | 3514 | handle_stripe_clean_event(conf, sh, disks, &s.return_bi); |
| 3467 | 3515 | ||
| 3468 | /* Now we might consider reading some blocks, either to check/generate | 3516 | /* Now we might consider reading some blocks, either to check/generate |
| @@ -3489,9 +3537,11 @@ static void handle_stripe(struct stripe_head *sh) | |||
| 3489 | /* All the 'written' buffers and the parity block are ready to | 3537 | /* All the 'written' buffers and the parity block are ready to |
| 3490 | * be written back to disk | 3538 | * be written back to disk |
| 3491 | */ | 3539 | */ |
| 3492 | BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags)); | 3540 | BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags) && |
| 3541 | !test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)); | ||
| 3493 | BUG_ON(sh->qd_idx >= 0 && | 3542 | BUG_ON(sh->qd_idx >= 0 && |
| 3494 | !test_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags)); | 3543 | !test_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags) && |
| 3544 | !test_bit(R5_Discard, &sh->dev[sh->qd_idx].flags)); | ||
| 3495 | for (i = disks; i--; ) { | 3545 | for (i = disks; i--; ) { |
| 3496 | struct r5dev *dev = &sh->dev[i]; | 3546 | struct r5dev *dev = &sh->dev[i]; |
| 3497 | if (test_bit(R5_LOCKED, &dev->flags) && | 3547 | if (test_bit(R5_LOCKED, &dev->flags) && |
| @@ -4072,6 +4122,88 @@ static void release_stripe_plug(struct mddev *mddev, | |||
| 4072 | release_stripe(sh); | 4122 | release_stripe(sh); |
| 4073 | } | 4123 | } |
| 4074 | 4124 | ||
| 4125 | static void make_discard_request(struct mddev *mddev, struct bio *bi) | ||
| 4126 | { | ||
| 4127 | struct r5conf *conf = mddev->private; | ||
| 4128 | sector_t logical_sector, last_sector; | ||
| 4129 | struct stripe_head *sh; | ||
| 4130 | int remaining; | ||
| 4131 | int stripe_sectors; | ||
| 4132 | |||
| 4133 | if (mddev->reshape_position != MaxSector) | ||
| 4134 | /* Skip discard while reshape is happening */ | ||
| 4135 | return; | ||
| 4136 | |||
| 4137 | logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); | ||
| 4138 | last_sector = bi->bi_sector + (bi->bi_size>>9); | ||
| 4139 | |||
| 4140 | bi->bi_next = NULL; | ||
| 4141 | bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ | ||
| 4142 | |||
| 4143 | stripe_sectors = conf->chunk_sectors * | ||
| 4144 | (conf->raid_disks - conf->max_degraded); | ||
| 4145 | logical_sector = DIV_ROUND_UP_SECTOR_T(logical_sector, | ||
| 4146 | stripe_sectors); | ||
| 4147 | sector_div(last_sector, stripe_sectors); | ||
| 4148 | |||
| 4149 | logical_sector *= conf->chunk_sectors; | ||
| 4150 | last_sector *= conf->chunk_sectors; | ||
| 4151 | |||
| 4152 | for (; logical_sector < last_sector; | ||
| 4153 | logical_sector += STRIPE_SECTORS) { | ||
| 4154 | DEFINE_WAIT(w); | ||
| 4155 | int d; | ||
| 4156 | again: | ||
| 4157 | sh = get_active_stripe(conf, logical_sector, 0, 0, 0); | ||
| 4158 | prepare_to_wait(&conf->wait_for_overlap, &w, | ||
| 4159 | TASK_UNINTERRUPTIBLE); | ||
| 4160 | spin_lock_irq(&sh->stripe_lock); | ||
| 4161 | for (d = 0; d < conf->raid_disks; d++) { | ||
| 4162 | if (d == sh->pd_idx || d == sh->qd_idx) | ||
| 4163 | continue; | ||
| 4164 | if (sh->dev[d].towrite || sh->dev[d].toread) { | ||
| 4165 | set_bit(R5_Overlap, &sh->dev[d].flags); | ||
| 4166 | spin_unlock_irq(&sh->stripe_lock); | ||
| 4167 | release_stripe(sh); | ||
| 4168 | schedule(); | ||
| 4169 | goto again; | ||
| 4170 | } | ||
| 4171 | } | ||
| 4172 | finish_wait(&conf->wait_for_overlap, &w); | ||
| 4173 | for (d = 0; d < conf->raid_disks; d++) { | ||
| 4174 | if (d == sh->pd_idx || d == sh->qd_idx) | ||
| 4175 | continue; | ||
| 4176 | sh->dev[d].towrite = bi; | ||
| 4177 | set_bit(R5_OVERWRITE, &sh->dev[d].flags); | ||
| 4178 | raid5_inc_bi_active_stripes(bi); | ||
| 4179 | } | ||
| 4180 | spin_unlock_irq(&sh->stripe_lock); | ||
| 4181 | if (conf->mddev->bitmap) { | ||
| 4182 | for (d = 0; | ||
| 4183 | d < conf->raid_disks - conf->max_degraded; | ||
| 4184 | d++) | ||
| 4185 | bitmap_startwrite(mddev->bitmap, | ||
| 4186 | sh->sector, | ||
| 4187 | STRIPE_SECTORS, | ||
| 4188 | 0); | ||
| 4189 | sh->bm_seq = conf->seq_flush + 1; | ||
| 4190 | set_bit(STRIPE_BIT_DELAY, &sh->state); | ||
| 4191 | } | ||
| 4192 | |||
| 4193 | set_bit(STRIPE_HANDLE, &sh->state); | ||
| 4194 | clear_bit(STRIPE_DELAYED, &sh->state); | ||
| 4195 | if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | ||
| 4196 | atomic_inc(&conf->preread_active_stripes); | ||
| 4197 | release_stripe_plug(mddev, sh); | ||
| 4198 | } | ||
| 4199 | |||
| 4200 | remaining = raid5_dec_bi_active_stripes(bi); | ||
| 4201 | if (remaining == 0) { | ||
| 4202 | md_write_end(mddev); | ||
| 4203 | bio_endio(bi, 0); | ||
| 4204 | } | ||
| 4205 | } | ||
| 4206 | |||
| 4075 | static void make_request(struct mddev *mddev, struct bio * bi) | 4207 | static void make_request(struct mddev *mddev, struct bio * bi) |
| 4076 | { | 4208 | { |
| 4077 | struct r5conf *conf = mddev->private; | 4209 | struct r5conf *conf = mddev->private; |
| @@ -4094,6 +4226,11 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
| 4094 | chunk_aligned_read(mddev,bi)) | 4226 | chunk_aligned_read(mddev,bi)) |
| 4095 | return; | 4227 | return; |
| 4096 | 4228 | ||
| 4229 | if (unlikely(bi->bi_rw & REQ_DISCARD)) { | ||
| 4230 | make_discard_request(mddev, bi); | ||
| 4231 | return; | ||
| 4232 | } | ||
| 4233 | |||
| 4097 | logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); | 4234 | logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); |
| 4098 | last_sector = bi->bi_sector + (bi->bi_size>>9); | 4235 | last_sector = bi->bi_sector + (bi->bi_size>>9); |
| 4099 | bi->bi_next = NULL; | 4236 | bi->bi_next = NULL; |
| @@ -4630,8 +4767,9 @@ static int handle_active_stripes(struct r5conf *conf) | |||
| 4630 | * During the scan, completed stripes are saved for us by the interrupt | 4767 | * During the scan, completed stripes are saved for us by the interrupt |
| 4631 | * handler, so that they will not have to wait for our next wakeup. | 4768 | * handler, so that they will not have to wait for our next wakeup. |
| 4632 | */ | 4769 | */ |
| 4633 | static void raid5d(struct mddev *mddev) | 4770 | static void raid5d(struct md_thread *thread) |
| 4634 | { | 4771 | { |
| 4772 | struct mddev *mddev = thread->mddev; | ||
| 4635 | struct r5conf *conf = mddev->private; | 4773 | struct r5conf *conf = mddev->private; |
| 4636 | int handled; | 4774 | int handled; |
| 4637 | struct blk_plug plug; | 4775 | struct blk_plug plug; |
| @@ -5366,6 +5504,7 @@ static int run(struct mddev *mddev) | |||
| 5366 | 5504 | ||
| 5367 | if (mddev->queue) { | 5505 | if (mddev->queue) { |
| 5368 | int chunk_size; | 5506 | int chunk_size; |
| 5507 | bool discard_supported = true; | ||
| 5369 | /* read-ahead size must cover two whole stripes, which | 5508 | /* read-ahead size must cover two whole stripes, which |
| 5370 | * is 2 * (datadisks) * chunksize where 'n' is the | 5509 | * is 2 * (datadisks) * chunksize where 'n' is the |
| 5371 | * number of raid devices | 5510 | * number of raid devices |
| @@ -5385,13 +5524,48 @@ static int run(struct mddev *mddev) | |||
| 5385 | blk_queue_io_min(mddev->queue, chunk_size); | 5524 | blk_queue_io_min(mddev->queue, chunk_size); |
| 5386 | blk_queue_io_opt(mddev->queue, chunk_size * | 5525 | blk_queue_io_opt(mddev->queue, chunk_size * |
| 5387 | (conf->raid_disks - conf->max_degraded)); | 5526 | (conf->raid_disks - conf->max_degraded)); |
| 5527 | /* | ||
| 5528 | * We can only discard a whole stripe. It doesn't make sense to | ||
| 5529 | * discard data disk but write parity disk | ||
| 5530 | */ | ||
| 5531 | stripe = stripe * PAGE_SIZE; | ||
| 5532 | mddev->queue->limits.discard_alignment = stripe; | ||
| 5533 | mddev->queue->limits.discard_granularity = stripe; | ||
| 5534 | /* | ||
| 5535 | * unaligned part of discard request will be ignored, so can't | ||
| 5536 | * guarantee discard_zerors_data | ||
| 5537 | */ | ||
| 5538 | mddev->queue->limits.discard_zeroes_data = 0; | ||
| 5388 | 5539 | ||
| 5389 | rdev_for_each(rdev, mddev) { | 5540 | rdev_for_each(rdev, mddev) { |
| 5390 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 5541 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
| 5391 | rdev->data_offset << 9); | 5542 | rdev->data_offset << 9); |
| 5392 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 5543 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
| 5393 | rdev->new_data_offset << 9); | 5544 | rdev->new_data_offset << 9); |
| 5545 | /* | ||
| 5546 | * discard_zeroes_data is required, otherwise data | ||
| 5547 | * could be lost. Consider a scenario: discard a stripe | ||
| 5548 | * (the stripe could be inconsistent if | ||
| 5549 | * discard_zeroes_data is 0); write one disk of the | ||
| 5550 | * stripe (the stripe could be inconsistent again | ||
| 5551 | * depending on which disks are used to calculate | ||
| 5552 | * parity); the disk is broken; The stripe data of this | ||
| 5553 | * disk is lost. | ||
| 5554 | */ | ||
| 5555 | if (!blk_queue_discard(bdev_get_queue(rdev->bdev)) || | ||
| 5556 | !bdev_get_queue(rdev->bdev)-> | ||
| 5557 | limits.discard_zeroes_data) | ||
| 5558 | discard_supported = false; | ||
| 5394 | } | 5559 | } |
| 5560 | |||
| 5561 | if (discard_supported && | ||
| 5562 | mddev->queue->limits.max_discard_sectors >= stripe && | ||
| 5563 | mddev->queue->limits.discard_granularity >= stripe) | ||
| 5564 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, | ||
| 5565 | mddev->queue); | ||
| 5566 | else | ||
| 5567 | queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, | ||
| 5568 | mddev->queue); | ||
| 5395 | } | 5569 | } |
| 5396 | 5570 | ||
| 5397 | return 0; | 5571 | return 0; |
| @@ -5702,7 +5876,8 @@ static int check_reshape(struct mddev *mddev) | |||
| 5702 | if (!check_stripe_cache(mddev)) | 5876 | if (!check_stripe_cache(mddev)) |
| 5703 | return -ENOSPC; | 5877 | return -ENOSPC; |
| 5704 | 5878 | ||
| 5705 | return resize_stripes(conf, conf->raid_disks + mddev->delta_disks); | 5879 | return resize_stripes(conf, (conf->previous_raid_disks |
| 5880 | + mddev->delta_disks)); | ||
| 5706 | } | 5881 | } |
| 5707 | 5882 | ||
| 5708 | static int raid5_start_reshape(struct mddev *mddev) | 5883 | static int raid5_start_reshape(struct mddev *mddev) |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index a9fc24901eda..18b2c4a8a1fd 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
| @@ -298,6 +298,7 @@ enum r5dev_flags { | |||
| 298 | R5_WantReplace, /* We need to update the replacement, we have read | 298 | R5_WantReplace, /* We need to update the replacement, we have read |
| 299 | * data in, and now is a good time to write it out. | 299 | * data in, and now is a good time to write it out. |
| 300 | */ | 300 | */ |
| 301 | R5_Discard, /* Discard the stripe */ | ||
| 301 | }; | 302 | }; |
| 302 | 303 | ||
| 303 | /* | 304 | /* |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index e9ebb472b28b..81e407d9677a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
| @@ -2952,8 +2952,8 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
| 2952 | struct btrfs_inode_item *item, | 2952 | struct btrfs_inode_item *item, |
| 2953 | struct inode *inode, int log_inode_only) | 2953 | struct inode *inode, int log_inode_only) |
| 2954 | { | 2954 | { |
| 2955 | btrfs_set_inode_uid(leaf, item, inode->i_uid); | 2955 | btrfs_set_inode_uid(leaf, item, i_uid_read(inode)); |
| 2956 | btrfs_set_inode_gid(leaf, item, inode->i_gid); | 2956 | btrfs_set_inode_gid(leaf, item, i_gid_read(inode)); |
| 2957 | btrfs_set_inode_mode(leaf, item, inode->i_mode); | 2957 | btrfs_set_inode_mode(leaf, item, inode->i_mode); |
| 2958 | btrfs_set_inode_nlink(leaf, item, inode->i_nlink); | 2958 | btrfs_set_inode_nlink(leaf, item, inode->i_nlink); |
| 2959 | 2959 | ||
diff --git a/fs/xattr_acl.c b/fs/xattr_acl.c index 11efd830b5f5..9fbea87fdb6e 100644 --- a/fs/xattr_acl.c +++ b/fs/xattr_acl.c | |||
| @@ -45,7 +45,7 @@ static void posix_acl_fix_xattr_userns( | |||
| 45 | break; | 45 | break; |
| 46 | case ACL_GROUP: | 46 | case ACL_GROUP: |
| 47 | gid = make_kgid(from, le32_to_cpu(entry->e_id)); | 47 | gid = make_kgid(from, le32_to_cpu(entry->e_id)); |
| 48 | entry->e_id = cpu_to_le32(from_kuid(to, uid)); | 48 | entry->e_id = cpu_to_le32(from_kgid(to, gid)); |
| 49 | break; | 49 | break; |
| 50 | default: | 50 | default: |
| 51 | break; | 51 | break; |
diff --git a/include/linux/spi/Kbuild b/include/linux/spi/Kbuild index d375a082986e..e69de29bb2d1 100644 --- a/include/linux/spi/Kbuild +++ b/include/linux/spi/Kbuild | |||
| @@ -1 +0,0 @@ | |||
| 1 | header-y += spidev.h | ||
diff --git a/include/uapi/linux/spi/Kbuild b/include/uapi/linux/spi/Kbuild index aafaa5aa54d4..0cc747eff165 100644 --- a/include/uapi/linux/spi/Kbuild +++ b/include/uapi/linux/spi/Kbuild | |||
| @@ -1 +1,2 @@ | |||
| 1 | # UAPI Header export list | 1 | # UAPI Header export list |
| 2 | header-y += spidev.h | ||
diff --git a/include/linux/spi/spidev.h b/include/uapi/linux/spi/spidev.h index 52d9ed01855f..52d9ed01855f 100644 --- a/include/linux/spi/spidev.h +++ b/include/uapi/linux/spi/spidev.h | |||
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 9d49ee6d7219..ba033f09196e 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c | |||
| @@ -591,7 +591,7 @@ static int bt_seq_show(struct seq_file *seq, void *v) | |||
| 591 | atomic_read(&sk->sk_refcnt), | 591 | atomic_read(&sk->sk_refcnt), |
| 592 | sk_rmem_alloc_get(sk), | 592 | sk_rmem_alloc_get(sk), |
| 593 | sk_wmem_alloc_get(sk), | 593 | sk_wmem_alloc_get(sk), |
| 594 | sock_i_uid(sk), | 594 | from_kuid(seq_user_ns(seq), sock_i_uid(sk)), |
| 595 | sock_i_ino(sk), | 595 | sock_i_ino(sk), |
| 596 | &src_baswapped, | 596 | &src_baswapped, |
| 597 | &dst_baswapped, | 597 | &dst_baswapped, |
