diff options
author | Darrick J. Wong <darrick.wong@oracle.com> | 2019-04-16 11:22:00 -0400 |
---|---|---|
committer | Darrick J. Wong <darrick.wong@oracle.com> | 2019-04-16 13:01:57 -0400 |
commit | 4860a05d2475d6f50de79f168beb893cc0066a6e (patch) | |
tree | 197f1c3f82a1efc399c8f4048ac3c8480ae90b0c /fs/xfs/scrub/health.c | |
parent | 160b5a784525e8a44902873f1938e1f4d77fe112 (diff) |
xfs: scrub/repair should update filesystem metadata health
Now that we have the ability to track sick metadata in-core, make scrub
and repair update those health assessments after doing work.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Diffstat (limited to 'fs/xfs/scrub/health.c')
-rw-r--r-- | fs/xfs/scrub/health.c | 176 |
1 files changed, 176 insertions, 0 deletions
diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c new file mode 100644 index 000000000000..df99e0066c54 --- /dev/null +++ b/fs/xfs/scrub/health.c | |||
@@ -0,0 +1,176 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0+ | ||
2 | /* | ||
3 | * Copyright (C) 2019 Oracle. All Rights Reserved. | ||
4 | * Author: Darrick J. Wong <darrick.wong@oracle.com> | ||
5 | */ | ||
6 | #include "xfs.h" | ||
7 | #include "xfs_fs.h" | ||
8 | #include "xfs_shared.h" | ||
9 | #include "xfs_format.h" | ||
10 | #include "xfs_trans_resv.h" | ||
11 | #include "xfs_mount.h" | ||
12 | #include "xfs_defer.h" | ||
13 | #include "xfs_btree.h" | ||
14 | #include "xfs_bit.h" | ||
15 | #include "xfs_log_format.h" | ||
16 | #include "xfs_trans.h" | ||
17 | #include "xfs_sb.h" | ||
18 | #include "xfs_inode.h" | ||
19 | #include "xfs_health.h" | ||
20 | #include "scrub/scrub.h" | ||
21 | #include "scrub/health.h" | ||
22 | |||
23 | /* | ||
24 | * Scrub and In-Core Filesystem Health Assessments | ||
25 | * =============================================== | ||
26 | * | ||
27 | * Online scrub and repair have the time and the ability to perform stronger | ||
28 | * checks than we can do from the metadata verifiers, because they can | ||
29 | * cross-reference records between data structures. Therefore, scrub is in a | ||
30 | * good position to update the online filesystem health assessments to reflect | ||
31 | * the good/bad state of the data structure. | ||
32 | * | ||
33 | * We therefore extend scrub in the following ways to achieve this: | ||
34 | * | ||
35 | * 1. Create a "sick_mask" field in the scrub context. When we're setting up a | ||
36 | * scrub call, set this to the default XFS_SICK_* flag(s) for the selected | ||
37 | * scrub type (call it A). Scrub and repair functions can override the default | ||
38 | * sick_mask value if they choose. | ||
39 | * | ||
40 | * 2. If the scrubber returns a runtime error code, we exit making no changes | ||
41 | * to the incore sick state. | ||
42 | * | ||
43 | * 3. If the scrubber finds that A is clean, use sick_mask to clear the incore | ||
44 | * sick flags before exiting. | ||
45 | * | ||
46 | * 4. If the scrubber finds that A is corrupt, use sick_mask to set the incore | ||
47 | * sick flags. If the user didn't want to repair then we exit, leaving the | ||
48 | * metadata structure unfixed and the sick flag set. | ||
49 | * | ||
50 | * 5. Now we know that A is corrupt and the user wants to repair, so run the | ||
51 | * repairer. If the repairer returns an error code, we exit with that error | ||
52 | * code, having made no further changes to the incore sick state. | ||
53 | * | ||
54 | * 6. If repair rebuilds A correctly and the subsequent re-scrub of A is clean, | ||
55 | * use sick_mask to clear the incore sick flags. This should have the effect | ||
56 | * that A is no longer marked sick. | ||
57 | * | ||
58 | * 7. If repair rebuilds A incorrectly, the re-scrub will find it corrupt and | ||
59 | * use sick_mask to set the incore sick flags. This should have no externally | ||
60 | * visible effect since we already set them in step (4). | ||
61 | * | ||
62 | * There are some complications to this story, however. For certain types of | ||
63 | * complementary metadata indices (e.g. inobt/finobt), it is easier to rebuild | ||
64 | * both structures at the same time. The following principles apply to this | ||
65 | * type of repair strategy: | ||
66 | * | ||
67 | * 8. Any repair function that rebuilds multiple structures should update | ||
68 | * sick_mask_visible to reflect whatever other structures are rebuilt, and | ||
69 | * verify that all the rebuilt structures can pass a scrub check. The outcomes | ||
70 | * of 5-7 still apply, but with a sick_mask that covers everything being | ||
71 | * rebuilt. | ||
72 | */ | ||
73 | |||
74 | /* Map our scrub type to a sick mask and a set of health update functions. */ | ||
75 | |||
76 | enum xchk_health_group { | ||
77 | XHG_FS = 1, | ||
78 | XHG_RT, | ||
79 | XHG_AG, | ||
80 | XHG_INO, | ||
81 | }; | ||
82 | |||
83 | struct xchk_health_map { | ||
84 | enum xchk_health_group group; | ||
85 | unsigned int sick_mask; | ||
86 | }; | ||
87 | |||
88 | static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = { | ||
89 | [XFS_SCRUB_TYPE_SB] = { XHG_AG, XFS_SICK_AG_SB }, | ||
90 | [XFS_SCRUB_TYPE_AGF] = { XHG_AG, XFS_SICK_AG_AGF }, | ||
91 | [XFS_SCRUB_TYPE_AGFL] = { XHG_AG, XFS_SICK_AG_AGFL }, | ||
92 | [XFS_SCRUB_TYPE_AGI] = { XHG_AG, XFS_SICK_AG_AGI }, | ||
93 | [XFS_SCRUB_TYPE_BNOBT] = { XHG_AG, XFS_SICK_AG_BNOBT }, | ||
94 | [XFS_SCRUB_TYPE_CNTBT] = { XHG_AG, XFS_SICK_AG_CNTBT }, | ||
95 | [XFS_SCRUB_TYPE_INOBT] = { XHG_AG, XFS_SICK_AG_INOBT }, | ||
96 | [XFS_SCRUB_TYPE_FINOBT] = { XHG_AG, XFS_SICK_AG_FINOBT }, | ||
97 | [XFS_SCRUB_TYPE_RMAPBT] = { XHG_AG, XFS_SICK_AG_RMAPBT }, | ||
98 | [XFS_SCRUB_TYPE_REFCNTBT] = { XHG_AG, XFS_SICK_AG_REFCNTBT }, | ||
99 | [XFS_SCRUB_TYPE_INODE] = { XHG_INO, XFS_SICK_INO_CORE }, | ||
100 | [XFS_SCRUB_TYPE_BMBTD] = { XHG_INO, XFS_SICK_INO_BMBTD }, | ||
101 | [XFS_SCRUB_TYPE_BMBTA] = { XHG_INO, XFS_SICK_INO_BMBTA }, | ||
102 | [XFS_SCRUB_TYPE_BMBTC] = { XHG_INO, XFS_SICK_INO_BMBTC }, | ||
103 | [XFS_SCRUB_TYPE_DIR] = { XHG_INO, XFS_SICK_INO_DIR }, | ||
104 | [XFS_SCRUB_TYPE_XATTR] = { XHG_INO, XFS_SICK_INO_XATTR }, | ||
105 | [XFS_SCRUB_TYPE_SYMLINK] = { XHG_INO, XFS_SICK_INO_SYMLINK }, | ||
106 | [XFS_SCRUB_TYPE_PARENT] = { XHG_INO, XFS_SICK_INO_PARENT }, | ||
107 | [XFS_SCRUB_TYPE_RTBITMAP] = { XHG_RT, XFS_SICK_RT_BITMAP }, | ||
108 | [XFS_SCRUB_TYPE_RTSUM] = { XHG_RT, XFS_SICK_RT_SUMMARY }, | ||
109 | [XFS_SCRUB_TYPE_UQUOTA] = { XHG_FS, XFS_SICK_FS_UQUOTA }, | ||
110 | [XFS_SCRUB_TYPE_GQUOTA] = { XHG_FS, XFS_SICK_FS_GQUOTA }, | ||
111 | [XFS_SCRUB_TYPE_PQUOTA] = { XHG_FS, XFS_SICK_FS_PQUOTA }, | ||
112 | }; | ||
113 | |||
114 | /* Return the health status mask for this scrub type. */ | ||
115 | unsigned int | ||
116 | xchk_health_mask_for_scrub_type( | ||
117 | __u32 scrub_type) | ||
118 | { | ||
119 | return type_to_health_flag[scrub_type].sick_mask; | ||
120 | } | ||
121 | |||
122 | /* | ||
123 | * Update filesystem health assessments based on what we found and did. | ||
124 | * | ||
125 | * If the scrubber finds errors, we mark sick whatever's mentioned in | ||
126 | * sick_mask, no matter whether this is a first scan or an | ||
127 | * evaluation of repair effectiveness. | ||
128 | * | ||
129 | * Otherwise, no direct corruption was found, so mark whatever's in | ||
130 | * sick_mask as healthy. | ||
131 | */ | ||
132 | void | ||
133 | xchk_update_health( | ||
134 | struct xfs_scrub *sc) | ||
135 | { | ||
136 | struct xfs_perag *pag; | ||
137 | bool bad; | ||
138 | |||
139 | if (!sc->sick_mask) | ||
140 | return; | ||
141 | |||
142 | bad = (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT); | ||
143 | switch (type_to_health_flag[sc->sm->sm_type].group) { | ||
144 | case XHG_AG: | ||
145 | pag = xfs_perag_get(sc->mp, sc->sm->sm_agno); | ||
146 | if (bad) | ||
147 | xfs_ag_mark_sick(pag, sc->sick_mask); | ||
148 | else | ||
149 | xfs_ag_mark_healthy(pag, sc->sick_mask); | ||
150 | xfs_perag_put(pag); | ||
151 | break; | ||
152 | case XHG_INO: | ||
153 | if (!sc->ip) | ||
154 | return; | ||
155 | if (bad) | ||
156 | xfs_inode_mark_sick(sc->ip, sc->sick_mask); | ||
157 | else | ||
158 | xfs_inode_mark_healthy(sc->ip, sc->sick_mask); | ||
159 | break; | ||
160 | case XHG_FS: | ||
161 | if (bad) | ||
162 | xfs_fs_mark_sick(sc->mp, sc->sick_mask); | ||
163 | else | ||
164 | xfs_fs_mark_healthy(sc->mp, sc->sick_mask); | ||
165 | break; | ||
166 | case XHG_RT: | ||
167 | if (bad) | ||
168 | xfs_rt_mark_sick(sc->mp, sc->sick_mask); | ||
169 | else | ||
170 | xfs_rt_mark_healthy(sc->mp, sc->sick_mask); | ||
171 | break; | ||
172 | default: | ||
173 | ASSERT(0); | ||
174 | break; | ||
175 | } | ||
176 | } | ||