aboutsummaryrefslogtreecommitdiffstats
path: root/lib/errseq.c
diff options
context:
space:
mode:
authorJeff Layton <jlayton@redhat.com>2017-07-06 07:02:24 -0400
committerJeff Layton <jlayton@redhat.com>2017-07-06 07:02:24 -0400
commit84cbadadc6eafc4798513773a2c8fce37dcd2fb8 (patch)
tree0cf2168d471693e85cc39b291df98164338cb2f5 /lib/errseq.c
parent5e8fcc1a0ffa0fb794b3c0efa2c3c7612a771c36 (diff)
lib: add errseq_t type and infrastructure for handling it
An errseq_t is a way of recording errors in one place, and allowing any number of "subscribers" to tell whether an error has been set again since a previous time. It's implemented as an unsigned 32-bit value that is managed with atomic operations. The low order bits are designated to hold an error code (max size of MAX_ERRNO). The upper bits are used as a counter. The API works with consumers sampling an errseq_t value at a particular point in time. Later, that value can be used to tell whether new errors have been set since that time. Note that there is a 1 in 512k risk of collisions here if new errors are being recorded frequently, since we have so few bits to use as a counter. To mitigate this, one bit is used as a flag to tell whether the value has been sampled since a new value was recorded. That allows us to avoid bumping the counter if no one has sampled it since it was last bumped. Later patches will build on this infrastructure to change how writeback errors are tracked in the kernel. Signed-off-by: Jeff Layton <jlayton@redhat.com> Reviewed-by: NeilBrown <neilb@suse.com> Reviewed-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'lib/errseq.c')
-rw-r--r--lib/errseq.c208
1 files changed, 208 insertions, 0 deletions
diff --git a/lib/errseq.c b/lib/errseq.c
new file mode 100644
index 000000000000..841fa24e6e00
--- /dev/null
+++ b/lib/errseq.c
@@ -0,0 +1,208 @@
1#include <linux/err.h>
2#include <linux/bug.h>
3#include <linux/atomic.h>
4#include <linux/errseq.h>
5
6/*
7 * An errseq_t is a way of recording errors in one place, and allowing any
8 * number of "subscribers" to tell whether it has changed since a previous
9 * point where it was sampled.
10 *
11 * It's implemented as an unsigned 32-bit value. The low order bits are
12 * designated to hold an error code (between 0 and -MAX_ERRNO). The upper bits
13 * are used as a counter. This is done with atomics instead of locking so that
14 * these functions can be called from any context.
15 *
16 * The general idea is for consumers to sample an errseq_t value. That value
17 * can later be used to tell whether any new errors have occurred since that
18 * sampling was done.
19 *
20 * Note that there is a risk of collisions if new errors are being recorded
21 * frequently, since we have so few bits to use as a counter.
22 *
23 * To mitigate this, one bit is used as a flag to tell whether the value has
24 * been sampled since a new value was recorded. That allows us to avoid bumping
25 * the counter if no one has sampled it since the last time an error was
26 * recorded.
27 *
28 * A new errseq_t should always be zeroed out. A errseq_t value of all zeroes
29 * is the special (but common) case where there has never been an error. An all
30 * zero value thus serves as the "epoch" if one wishes to know whether there
31 * has ever been an error set since it was first initialized.
32 */
33
34/* The low bits are designated for error code (max of MAX_ERRNO) */
35#define ERRSEQ_SHIFT ilog2(MAX_ERRNO + 1)
36
37/* This bit is used as a flag to indicate whether the value has been seen */
38#define ERRSEQ_SEEN (1 << ERRSEQ_SHIFT)
39
40/* The lowest bit of the counter */
41#define ERRSEQ_CTR_INC (1 << (ERRSEQ_SHIFT + 1))
42
43/**
44 * __errseq_set - set a errseq_t for later reporting
45 * @eseq: errseq_t field that should be set
46 * @err: error to set
47 *
48 * This function sets the error in *eseq, and increments the sequence counter
49 * if the last sequence was sampled at some point in the past.
50 *
51 * Any error set will always overwrite an existing error.
52 *
53 * Most callers will want to use the errseq_set inline wrapper to efficiently
54 * handle the common case where err is 0.
55 *
56 * We do return an errseq_t here, primarily for debugging purposes. The return
57 * value should not be used as a previously sampled value in later calls as it
58 * will not have the SEEN flag set.
59 */
60errseq_t __errseq_set(errseq_t *eseq, int err)
61{
62 errseq_t cur, old;
63
64 /* MAX_ERRNO must be able to serve as a mask */
65 BUILD_BUG_ON_NOT_POWER_OF_2(MAX_ERRNO + 1);
66
67 /*
68 * Ensure the error code actually fits where we want it to go. If it
69 * doesn't then just throw a warning and don't record anything. We
70 * also don't accept zero here as that would effectively clear a
71 * previous error.
72 */
73 old = READ_ONCE(*eseq);
74
75 if (WARN(unlikely(err == 0 || (unsigned int)-err > MAX_ERRNO),
76 "err = %d\n", err))
77 return old;
78
79 for (;;) {
80 errseq_t new;
81
82 /* Clear out error bits and set new error */
83 new = (old & ~(MAX_ERRNO|ERRSEQ_SEEN)) | -err;
84
85 /* Only increment if someone has looked at it */
86 if (old & ERRSEQ_SEEN)
87 new += ERRSEQ_CTR_INC;
88
89 /* If there would be no change, then call it done */
90 if (new == old) {
91 cur = new;
92 break;
93 }
94
95 /* Try to swap the new value into place */
96 cur = cmpxchg(eseq, old, new);
97
98 /*
99 * Call it success if we did the swap or someone else beat us
100 * to it for the same value.
101 */
102 if (likely(cur == old || cur == new))
103 break;
104
105 /* Raced with an update, try again */
106 old = cur;
107 }
108 return cur;
109}
110EXPORT_SYMBOL(__errseq_set);
111
112/**
113 * errseq_sample - grab current errseq_t value
114 * @eseq: pointer to errseq_t to be sampled
115 *
116 * This function allows callers to sample an errseq_t value, marking it as
117 * "seen" if required.
118 */
119errseq_t errseq_sample(errseq_t *eseq)
120{
121 errseq_t old = READ_ONCE(*eseq);
122 errseq_t new = old;
123
124 /*
125 * For the common case of no errors ever having been set, we can skip
126 * marking the SEEN bit. Once an error has been set, the value will
127 * never go back to zero.
128 */
129 if (old != 0) {
130 new |= ERRSEQ_SEEN;
131 if (old != new)
132 cmpxchg(eseq, old, new);
133 }
134 return new;
135}
136EXPORT_SYMBOL(errseq_sample);
137
138/**
139 * errseq_check - has an error occurred since a particular sample point?
140 * @eseq: pointer to errseq_t value to be checked
141 * @since: previously-sampled errseq_t from which to check
142 *
143 * Grab the value that eseq points to, and see if it has changed "since"
144 * the given value was sampled. The "since" value is not advanced, so there
145 * is no need to mark the value as seen.
146 *
147 * Returns the latest error set in the errseq_t or 0 if it hasn't changed.
148 */
149int errseq_check(errseq_t *eseq, errseq_t since)
150{
151 errseq_t cur = READ_ONCE(*eseq);
152
153 if (likely(cur == since))
154 return 0;
155 return -(cur & MAX_ERRNO);
156}
157EXPORT_SYMBOL(errseq_check);
158
159/**
160 * errseq_check_and_advance - check an errseq_t and advance to current value
161 * @eseq: pointer to value being checked and reported
162 * @since: pointer to previously-sampled errseq_t to check against and advance
163 *
164 * Grab the eseq value, and see whether it matches the value that "since"
165 * points to. If it does, then just return 0.
166 *
167 * If it doesn't, then the value has changed. Set the "seen" flag, and try to
168 * swap it into place as the new eseq value. Then, set that value as the new
169 * "since" value, and return whatever the error portion is set to.
170 *
171 * Note that no locking is provided here for concurrent updates to the "since"
172 * value. The caller must provide that if necessary. Because of this, callers
173 * may want to do a lockless errseq_check before taking the lock and calling
174 * this.
175 */
176int errseq_check_and_advance(errseq_t *eseq, errseq_t *since)
177{
178 int err = 0;
179 errseq_t old, new;
180
181 /*
182 * Most callers will want to use the inline wrapper to check this,
183 * so that the common case of no error is handled without needing
184 * to take the lock that protects the "since" value.
185 */
186 old = READ_ONCE(*eseq);
187 if (old != *since) {
188 /*
189 * Set the flag and try to swap it into place if it has
190 * changed.
191 *
192 * We don't care about the outcome of the swap here. If the
193 * swap doesn't occur, then it has either been updated by a
194 * writer who is altering the value in some way (updating
195 * counter or resetting the error), or another reader who is
196 * just setting the "seen" flag. Either outcome is OK, and we
197 * can advance "since" and return an error based on what we
198 * have.
199 */
200 new = old | ERRSEQ_SEEN;
201 if (new != old)
202 cmpxchg(eseq, old, new);
203 *since = new;
204 err = -(new & MAX_ERRNO);
205 }
206 return err;
207}
208EXPORT_SYMBOL(errseq_check_and_advance);