lib: add errseq_t type and infrastructure for handling it

An errseq_t is a way of recording errors in one place, and allowing any number of "subscribers" to tell whether an error has been set again since a previous time. It's implemented as an unsigned 32-bit value that is managed with atomic operations. The low order bits are designated to hold an error code (max size of MAX_ERRNO). The upper bits are used as a counter. The API works with consumers sampling an errseq_t value at a particular point in time. Later, that value can be used to tell whether new errors have been set since that time. Note that there is a 1 in 512k risk of collisions here if new errors are being recorded frequently, since we have so few bits to use as a counter. To mitigate this, one bit is used as a flag to tell whether the value has been sampled since a new value was recorded. That allows us to avoid bumping the counter if no one has sampled it since it was last bumped. Later patches will build on this infrastructure to change how writeback errors are tracked in the kernel. Signed-off-by: Jeff Layton <jlayton@redhat.com> Reviewed-by: NeilBrown <neilb@suse.com> Reviewed-by: Jan Kara <jack@suse.cz>
author: Jeff Layton <jlayton@redhat.com> 2017-07-06 07:02:24 -0400
committer: Jeff Layton <jlayton@redhat.com> 2017-07-06 07:02:24 -0400
commit: 84cbadadc6eafc4798513773a2c8fce37dcd2fb8 (patch)
tree: 0cf2168d471693e85cc39b291df98164338cb2f5 /lib/errseq.c
parent: 5e8fcc1a0ffa0fb794b3c0efa2c3c7612a771c36 (diff)
1 files changed, 208 insertions, 0 deletions
diff --git a/lib/errseq.c b/lib/errseq.c
new file mode 100644
index 000000000000..841fa24e6e00
--- /dev/null
+++ b/lib/errseq.c
@@ -0,0 +1,208 @@
+#include <linux/err.h>
+#include <linux/bug.h>
+#include <linux/atomic.h>
+#include <linux/errseq.h>
+/*
+ * An errseq_t is a way of recording errors in one place, and allowing any
+ * number of "subscribers" to tell whether it has changed since a previous
+ * point where it was sampled.
+ *
+ * It's implemented as an unsigned 32-bit value. The low order bits are
+ * designated to hold an error code (between 0 and -MAX_ERRNO). The upper bits
+ * are used as a counter. This is done with atomics instead of locking so that
+ * these functions can be called from any context.
+ *
+ * The general idea is for consumers to sample an errseq_t value. That value
+ * can later be used to tell whether any new errors have occurred since that
+ * sampling was done.
+ *
+ * Note that there is a risk of collisions if new errors are being recorded
+ * frequently, since we have so few bits to use as a counter.
+ *
+ * To mitigate this, one bit is used as a flag to tell whether the value has
+ * been sampled since a new value was recorded. That allows us to avoid bumping
+ * the counter if no one has sampled it since the last time an error was
+ * recorded.
+ *
+ * A new errseq_t should always be zeroed out.  A errseq_t value of all zeroes
+ * is the special (but common) case where there has never been an error. An all
+ * zero value thus serves as the "epoch" if one wishes to know whether there
+ * has ever been an error set since it was first initialized.
+ */
+/* The low bits are designated for error code (max of MAX_ERRNO) */
+#define ERRSEQ_SHIFT            ilog2(MAX_ERRNO + 1)
+/* This bit is used as a flag to indicate whether the value has been seen */
+#define ERRSEQ_SEEN             (1 << ERRSEQ_SHIFT)
+/* The lowest bit of the counter */
+#define ERRSEQ_CTR_INC          (1 << (ERRSEQ_SHIFT + 1))
+/**
+ * __errseq_set - set a errseq_t for later reporting
+ * @eseq: errseq_t field that should be set
+ * @err: error to set
+ *
+ * This function sets the error in *eseq, and increments the sequence counter
+ * if the last sequence was sampled at some point in the past.
+ *
+ * Any error set will always overwrite an existing error.
+ *
+ * Most callers will want to use the errseq_set inline wrapper to efficiently
+ * handle the common case where err is 0.
+ *
+ * We do return an errseq_t here, primarily for debugging purposes. The return
+ * value should not be used as a previously sampled value in later calls as it
+ * will not have the SEEN flag set.
+ */
+errseq_t __errseq_set(errseq_t *eseq, int err)
+{
+        errseq_t cur, old;
+        /* MAX_ERRNO must be able to serve as a mask */
+        BUILD_BUG_ON_NOT_POWER_OF_2(MAX_ERRNO + 1);
+        /*
+         * Ensure the error code actually fits where we want it to go. If it
+         * doesn't then just throw a warning and don't record anything. We
+         * also don't accept zero here as that would effectively clear a
+         * previous error.
+         */
+        old = READ_ONCE(*eseq);
+        if (WARN(unlikely(err == 0 || (unsigned int)-err > MAX_ERRNO),
+                                "err = %d\n", err))
+                return old;
+        for (;;) {
+                errseq_t new;
+                /* Clear out error bits and set new error */
+                new = (old & ~(MAX_ERRNO|ERRSEQ_SEEN)) | -err;
+                /* Only increment if someone has looked at it */
+                if (old & ERRSEQ_SEEN)
+                        new += ERRSEQ_CTR_INC;
+                /* If there would be no change, then call it done */
+                if (new == old) {
+                        cur = new;
+                        break;
+                }
+                /* Try to swap the new value into place */
+                cur = cmpxchg(eseq, old, new);
+                /*
+                 * Call it success if we did the swap or someone else beat us
+                 * to it for the same value.
+                 */
+                if (likely(cur == old || cur == new))
+                        break;
+                /* Raced with an update, try again */
+                old = cur;
+        }
+        return cur;
+}
+EXPORT_SYMBOL(__errseq_set);
+/**
+ * errseq_sample - grab current errseq_t value
+ * @eseq: pointer to errseq_t to be sampled
+ *
+ * This function allows callers to sample an errseq_t value, marking it as
+ * "seen" if required.
+ */
+errseq_t errseq_sample(errseq_t *eseq)
+{
+        errseq_t old = READ_ONCE(*eseq);
+        errseq_t new = old;
+        /*
+         * For the common case of no errors ever having been set, we can skip
+         * marking the SEEN bit. Once an error has been set, the value will
+         * never go back to zero.
+         */
+        if (old != 0) {
+                new |= ERRSEQ_SEEN;
+                if (old != new)
+                        cmpxchg(eseq, old, new);
+        }
+        return new;
+}
+EXPORT_SYMBOL(errseq_sample);
+/**
+ * errseq_check - has an error occurred since a particular sample point?
+ * @eseq: pointer to errseq_t value to be checked
+ * @since: previously-sampled errseq_t from which to check
+ *
+ * Grab the value that eseq points to, and see if it has changed "since"
+ * the given value was sampled. The "since" value is not advanced, so there
+ * is no need to mark the value as seen.
+ *
+ * Returns the latest error set in the errseq_t or 0 if it hasn't changed.
+ */
+int errseq_check(errseq_t *eseq, errseq_t since)
+{
+        errseq_t cur = READ_ONCE(*eseq);
+        if (likely(cur == since))
+                return 0;
+        return -(cur & MAX_ERRNO);
+}
+EXPORT_SYMBOL(errseq_check);
+/**
+ * errseq_check_and_advance - check an errseq_t and advance to current value
+ * @eseq: pointer to value being checked and reported
+ * @since: pointer to previously-sampled errseq_t to check against and advance
+ *
+ * Grab the eseq value, and see whether it matches the value that "since"
+ * points to. If it does, then just return 0.
+ *
+ * If it doesn't, then the value has changed. Set the "seen" flag, and try to
+ * swap it into place as the new eseq value. Then, set that value as the new
+ * "since" value, and return whatever the error portion is set to.
+ *
+ * Note that no locking is provided here for concurrent updates to the "since"
+ * value. The caller must provide that if necessary. Because of this, callers
+ * may want to do a lockless errseq_check before taking the lock and calling
+ * this.
+ */
+int errseq_check_and_advance(errseq_t *eseq, errseq_t *since)
+{
+        int err = 0;
+        errseq_t old, new;
+        /*
+         * Most callers will want to use the inline wrapper to check this,
+         * so that the common case of no error is handled without needing
+         * to take the lock that protects the "since" value.
+         */
+        old = READ_ONCE(*eseq);
+        if (old != *since) {
+                /*
+                 * Set the flag and try to swap it into place if it has
+                 * changed.
+                 *
+                 * We don't care about the outcome of the swap here. If the
+                 * swap doesn't occur, then it has either been updated by a
+                 * writer who is altering the value in some way (updating
+                 * counter or resetting the error), or another reader who is
+                 * just setting the "seen" flag. Either outcome is OK, and we
+                 * can advance "since" and return an error based on what we
+                 * have.
+                 */
+                new = old | ERRSEQ_SEEN;
+                if (new != old)
+                        cmpxchg(eseq, old, new);
+                *since = new;
+                err = -(new & MAX_ERRNO);
+        }
+        return err;
+}
+EXPORT_SYMBOL(errseq_check_and_advance);
author	Jeff Layton <jlayton@redhat.com>	2017-07-06 07:02:24 -0400
committer	Jeff Layton <jlayton@redhat.com>	2017-07-06 07:02:24 -0400
commit	84cbadadc6eafc4798513773a2c8fce37dcd2fb8 (patch)
tree	0cf2168d471693e85cc39b291df98164338cb2f5 /lib/errseq.c
parent	5e8fcc1a0ffa0fb794b3c0efa2c3c7612a771c36 (diff)

diff --git a/lib/errseq.c b/lib/errseq.c new file mode 100644 index 000000000000..841fa24e6e00 --- /dev/null +++ b/lib/errseq.c
@@ -0,0 +1,208 @@
	1	#include <linux/err.h>
	2	#include <linux/bug.h>
	3	#include <linux/atomic.h>
	4	#include <linux/errseq.h>
	5
	6	/*
	7	* An errseq_t is a way of recording errors in one place, and allowing any
	8	* number of "subscribers" to tell whether it has changed since a previous
	9	* point where it was sampled.
	10	*
	11	* It's implemented as an unsigned 32-bit value. The low order bits are
	12	* designated to hold an error code (between 0 and -MAX_ERRNO). The upper bits
	13	* are used as a counter. This is done with atomics instead of locking so that
	14	* these functions can be called from any context.
	15	*
	16	* The general idea is for consumers to sample an errseq_t value. That value
	17	* can later be used to tell whether any new errors have occurred since that
	18	* sampling was done.
	19	*
	20	* Note that there is a risk of collisions if new errors are being recorded
	21	* frequently, since we have so few bits to use as a counter.
	22	*
	23	* To mitigate this, one bit is used as a flag to tell whether the value has
	24	* been sampled since a new value was recorded. That allows us to avoid bumping
	25	* the counter if no one has sampled it since the last time an error was
	26	* recorded.
	27	*
	28	* A new errseq_t should always be zeroed out. A errseq_t value of all zeroes
	29	* is the special (but common) case where there has never been an error. An all
	30	* zero value thus serves as the "epoch" if one wishes to know whether there
	31	* has ever been an error set since it was first initialized.
	32	*/
	33
	34	/* The low bits are designated for error code (max of MAX_ERRNO) */
	35	#define ERRSEQ_SHIFT ilog2(MAX_ERRNO + 1)
	36
	37	/* This bit is used as a flag to indicate whether the value has been seen */
	38	#define ERRSEQ_SEEN (1 << ERRSEQ_SHIFT)
	39
	40	/* The lowest bit of the counter */
	41	#define ERRSEQ_CTR_INC (1 << (ERRSEQ_SHIFT + 1))
	42
	43	/**
	44	* __errseq_set - set a errseq_t for later reporting
	45	* @eseq: errseq_t field that should be set
	46	* @err: error to set
	47	*
	48	* This function sets the error in *eseq, and increments the sequence counter
	49	* if the last sequence was sampled at some point in the past.
	50	*
	51	* Any error set will always overwrite an existing error.
	52	*
	53	* Most callers will want to use the errseq_set inline wrapper to efficiently
	54	* handle the common case where err is 0.
	55	*
	56	* We do return an errseq_t here, primarily for debugging purposes. The return
	57	* value should not be used as a previously sampled value in later calls as it
	58	* will not have the SEEN flag set.
	59	*/
	60	errseq_t __errseq_set(errseq_t *eseq, int err)
	61	{
	62	errseq_t cur, old;
	63
	64	/* MAX_ERRNO must be able to serve as a mask */
	65	BUILD_BUG_ON_NOT_POWER_OF_2(MAX_ERRNO + 1);
	66
	67	/*
	68	* Ensure the error code actually fits where we want it to go. If it
	69	* doesn't then just throw a warning and don't record anything. We
	70	* also don't accept zero here as that would effectively clear a
	71	* previous error.
	72	*/
	73	old = READ_ONCE(*eseq);
	74
	75	if (WARN(unlikely(err == 0 \|\| (unsigned int)-err > MAX_ERRNO),
	76	"err = %d\n", err))
	77	return old;
	78
	79	for (;;) {
	80	errseq_t new;
	81
	82	/* Clear out error bits and set new error */
	83	new = (old & ~(MAX_ERRNO\|ERRSEQ_SEEN)) \| -err;
	84
	85	/* Only increment if someone has looked at it */
	86	if (old & ERRSEQ_SEEN)
	87	new += ERRSEQ_CTR_INC;
	88
	89	/* If there would be no change, then call it done */
	90	if (new == old) {
	91	cur = new;
	92	break;
	93	}
	94
	95	/* Try to swap the new value into place */
	96	cur = cmpxchg(eseq, old, new);
	97
	98	/*
	99	* Call it success if we did the swap or someone else beat us
	100	* to it for the same value.
	101	*/
	102	if (likely(cur == old \|\| cur == new))
	103	break;
	104
	105	/* Raced with an update, try again */
	106	old = cur;
	107	}
	108	return cur;
	109	}
	110	EXPORT_SYMBOL(__errseq_set);
	111
	112	/**
	113	* errseq_sample - grab current errseq_t value
	114	* @eseq: pointer to errseq_t to be sampled
	115	*
	116	* This function allows callers to sample an errseq_t value, marking it as
	117	* "seen" if required.
	118	*/
	119	errseq_t errseq_sample(errseq_t *eseq)
	120	{
	121	errseq_t old = READ_ONCE(*eseq);
	122	errseq_t new = old;
	123
	124	/*
	125	* For the common case of no errors ever having been set, we can skip
	126	* marking the SEEN bit. Once an error has been set, the value will
	127	* never go back to zero.
	128	*/
	129	if (old != 0) {
	130	new \|= ERRSEQ_SEEN;
	131	if (old != new)
	132	cmpxchg(eseq, old, new);
	133	}
	134	return new;
	135	}
	136	EXPORT_SYMBOL(errseq_sample);
	137
	138	/**
	139	* errseq_check - has an error occurred since a particular sample point?
	140	* @eseq: pointer to errseq_t value to be checked
	141	* @since: previously-sampled errseq_t from which to check
	142	*
	143	* Grab the value that eseq points to, and see if it has changed "since"
	144	* the given value was sampled. The "since" value is not advanced, so there
	145	* is no need to mark the value as seen.
	146	*
	147	* Returns the latest error set in the errseq_t or 0 if it hasn't changed.
	148	*/
	149	int errseq_check(errseq_t *eseq, errseq_t since)
	150	{
	151	errseq_t cur = READ_ONCE(*eseq);
	152
	153	if (likely(cur == since))
	154	return 0;
	155	return -(cur & MAX_ERRNO);
	156	}
	157	EXPORT_SYMBOL(errseq_check);
	158
	159	/**
	160	* errseq_check_and_advance - check an errseq_t and advance to current value
	161	* @eseq: pointer to value being checked and reported
	162	* @since: pointer to previously-sampled errseq_t to check against and advance
	163	*
	164	* Grab the eseq value, and see whether it matches the value that "since"
	165	* points to. If it does, then just return 0.
	166	*
	167	* If it doesn't, then the value has changed. Set the "seen" flag, and try to
	168	* swap it into place as the new eseq value. Then, set that value as the new
	169	* "since" value, and return whatever the error portion is set to.
	170	*
	171	* Note that no locking is provided here for concurrent updates to the "since"
	172	* value. The caller must provide that if necessary. Because of this, callers
	173	* may want to do a lockless errseq_check before taking the lock and calling
	174	* this.
	175	*/
	176	int errseq_check_and_advance(errseq_t eseq, errseq_t since)
	177	{
	178	int err = 0;
	179	errseq_t old, new;
	180
	181	/*
	182	* Most callers will want to use the inline wrapper to check this,
	183	* so that the common case of no error is handled without needing
	184	* to take the lock that protects the "since" value.
	185	*/
	186	old = READ_ONCE(*eseq);
	187	if (old != *since) {
	188	/*
	189	* Set the flag and try to swap it into place if it has
	190	* changed.
	191	*
	192	* We don't care about the outcome of the swap here. If the
	193	* swap doesn't occur, then it has either been updated by a
	194	* writer who is altering the value in some way (updating
	195	* counter or resetting the error), or another reader who is
	196	* just setting the "seen" flag. Either outcome is OK, and we
	197	* can advance "since" and return an error based on what we
	198	* have.
	199	*/
	200	new = old \| ERRSEQ_SEEN;
	201	if (new != old)
	202	cmpxchg(eseq, old, new);
	203	*since = new;
	204	err = -(new & MAX_ERRNO);
	205	}
	206	return err;
	207	}
	208	EXPORT_SYMBOL(errseq_check_and_advance);