aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/block/drbd/drbd_actlog.c50
-rw-r--r--drivers/block/drbd/drbd_nl.c4
-rw-r--r--include/linux/lru_cache.h68
-rw-r--r--lib/lru_cache.c243
4 files changed, 225 insertions, 140 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 1ce3de6eed1b..44097c87fed7 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -175,7 +175,6 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr)
175{ 175{
176 struct lc_element *al_ext; 176 struct lc_element *al_ext;
177 struct lc_element *tmp; 177 struct lc_element *tmp;
178 unsigned long al_flags = 0;
179 int wake; 178 int wake;
180 179
181 spin_lock_irq(&mdev->al_lock); 180 spin_lock_irq(&mdev->al_lock);
@@ -190,19 +189,8 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr)
190 return NULL; 189 return NULL;
191 } 190 }
192 } 191 }
193 al_ext = lc_get(mdev->act_log, enr); 192 al_ext = lc_get(mdev->act_log, enr);
194 al_flags = mdev->act_log->flags;
195 spin_unlock_irq(&mdev->al_lock); 193 spin_unlock_irq(&mdev->al_lock);
196
197 /*
198 if (!al_ext) {
199 if (al_flags & LC_STARVING)
200 dev_warn(DEV, "Have to wait for LRU element (AL too small?)\n");
201 if (al_flags & LC_DIRTY)
202 dev_warn(DEV, "Ongoing AL update (AL device too slow?)\n");
203 }
204 */
205
206 return al_ext; 194 return al_ext;
207} 195}
208 196
@@ -235,7 +223,7 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector)
235 mdev->al_writ_cnt++; 223 mdev->al_writ_cnt++;
236 224
237 spin_lock_irq(&mdev->al_lock); 225 spin_lock_irq(&mdev->al_lock);
238 lc_changed(mdev->act_log, al_ext); 226 lc_committed(mdev->act_log);
239 spin_unlock_irq(&mdev->al_lock); 227 spin_unlock_irq(&mdev->al_lock);
240 wake_up(&mdev->al_wait); 228 wake_up(&mdev->al_wait);
241 } 229 }
@@ -601,7 +589,7 @@ void drbd_al_shrink(struct drbd_conf *mdev)
601 struct lc_element *al_ext; 589 struct lc_element *al_ext;
602 int i; 590 int i;
603 591
604 D_ASSERT(test_bit(__LC_DIRTY, &mdev->act_log->flags)); 592 D_ASSERT(test_bit(__LC_LOCKED, &mdev->act_log->flags));
605 593
606 for (i = 0; i < mdev->act_log->nr_elements; i++) { 594 for (i = 0; i < mdev->act_log->nr_elements; i++) {
607 al_ext = lc_element_by_index(mdev->act_log, i); 595 al_ext = lc_element_by_index(mdev->act_log, i);
@@ -708,7 +696,9 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
708 } 696 }
709 ext->rs_left = rs_left; 697 ext->rs_left = rs_left;
710 ext->rs_failed = success ? 0 : count; 698 ext->rs_failed = success ? 0 : count;
711 lc_changed(mdev->resync, &ext->lce); 699 /* we don't keep a persistent log of the resync lru,
700 * we can commit any change right away. */
701 lc_committed(mdev->resync);
712 } 702 }
713 lc_put(mdev->resync, &ext->lce); 703 lc_put(mdev->resync, &ext->lce);
714 /* no race, we are within the al_lock! */ 704 /* no race, we are within the al_lock! */
@@ -892,7 +882,7 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr)
892 if (bm_ext->lce.lc_number != enr) { 882 if (bm_ext->lce.lc_number != enr) {
893 bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); 883 bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
894 bm_ext->rs_failed = 0; 884 bm_ext->rs_failed = 0;
895 lc_changed(mdev->resync, &bm_ext->lce); 885 lc_committed(mdev->resync);
896 wakeup = 1; 886 wakeup = 1;
897 } 887 }
898 if (bm_ext->lce.refcnt == 1) 888 if (bm_ext->lce.refcnt == 1)
@@ -908,7 +898,7 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr)
908 if (rs_flags & LC_STARVING) 898 if (rs_flags & LC_STARVING)
909 dev_warn(DEV, "Have to wait for element" 899 dev_warn(DEV, "Have to wait for element"
910 " (resync LRU too small?)\n"); 900 " (resync LRU too small?)\n");
911 BUG_ON(rs_flags & LC_DIRTY); 901 BUG_ON(rs_flags & LC_LOCKED);
912 } 902 }
913 903
914 return bm_ext; 904 return bm_ext;
@@ -916,26 +906,12 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr)
916 906
917static int _is_in_al(struct drbd_conf *mdev, unsigned int enr) 907static int _is_in_al(struct drbd_conf *mdev, unsigned int enr)
918{ 908{
919 struct lc_element *al_ext; 909 int rv;
920 int rv = 0;
921 910
922 spin_lock_irq(&mdev->al_lock); 911 spin_lock_irq(&mdev->al_lock);
923 if (unlikely(enr == mdev->act_log->new_number)) 912 rv = lc_is_used(mdev->act_log, enr);
924 rv = 1;
925 else {
926 al_ext = lc_find(mdev->act_log, enr);
927 if (al_ext) {
928 if (al_ext->refcnt)
929 rv = 1;
930 }
931 }
932 spin_unlock_irq(&mdev->al_lock); 913 spin_unlock_irq(&mdev->al_lock);
933 914
934 /*
935 if (unlikely(rv)) {
936 dev_info(DEV, "Delaying sync read until app's write is done\n");
937 }
938 */
939 return rv; 915 return rv;
940} 916}
941 917
@@ -1065,13 +1041,13 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
1065 if (rs_flags & LC_STARVING) 1041 if (rs_flags & LC_STARVING)
1066 dev_warn(DEV, "Have to wait for element" 1042 dev_warn(DEV, "Have to wait for element"
1067 " (resync LRU too small?)\n"); 1043 " (resync LRU too small?)\n");
1068 BUG_ON(rs_flags & LC_DIRTY); 1044 BUG_ON(rs_flags & LC_LOCKED);
1069 goto try_again; 1045 goto try_again;
1070 } 1046 }
1071 if (bm_ext->lce.lc_number != enr) { 1047 if (bm_ext->lce.lc_number != enr) {
1072 bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); 1048 bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
1073 bm_ext->rs_failed = 0; 1049 bm_ext->rs_failed = 0;
1074 lc_changed(mdev->resync, &bm_ext->lce); 1050 lc_committed(mdev->resync);
1075 wake_up(&mdev->al_wait); 1051 wake_up(&mdev->al_wait);
1076 D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0); 1052 D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0);
1077 } 1053 }
@@ -1082,8 +1058,6 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
1082 } 1058 }
1083check_al: 1059check_al:
1084 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { 1060 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
1085 if (unlikely(al_enr+i == mdev->act_log->new_number))
1086 goto try_again;
1087 if (lc_is_used(mdev->act_log, al_enr+i)) 1061 if (lc_is_used(mdev->act_log, al_enr+i))
1088 goto try_again; 1062 goto try_again;
1089 } 1063 }
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index ae8f42e38e4f..0a92f5226c2a 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -760,7 +760,7 @@ static int drbd_check_al_size(struct drbd_conf *mdev)
760 760
761 in_use = 0; 761 in_use = 0;
762 t = mdev->act_log; 762 t = mdev->act_log;
763 n = lc_create("act_log", drbd_al_ext_cache, 763 n = lc_create("act_log", drbd_al_ext_cache, 1,
764 mdev->sync_conf.al_extents, sizeof(struct lc_element), 0); 764 mdev->sync_conf.al_extents, sizeof(struct lc_element), 0);
765 765
766 if (n == NULL) { 766 if (n == NULL) {
@@ -1016,7 +1016,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1016 } 1016 }
1017 1017
1018 resync_lru = lc_create("resync", drbd_bm_ext_cache, 1018 resync_lru = lc_create("resync", drbd_bm_ext_cache,
1019 61, sizeof(struct bm_extent), 1019 1, 61, sizeof(struct bm_extent),
1020 offsetof(struct bm_extent, lce)); 1020 offsetof(struct bm_extent, lce));
1021 if (!resync_lru) { 1021 if (!resync_lru) {
1022 retcode = ERR_NOMEM; 1022 retcode = ERR_NOMEM;
diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h
index 4cceafb0732d..cbafae40c649 100644
--- a/include/linux/lru_cache.h
+++ b/include/linux/lru_cache.h
@@ -166,9 +166,11 @@ struct lc_element {
166 /* if we want to track a larger set of objects, 166 /* if we want to track a larger set of objects,
167 * it needs to become arch independend u64 */ 167 * it needs to become arch independend u64 */
168 unsigned lc_number; 168 unsigned lc_number;
169
170 /* special label when on free list */ 169 /* special label when on free list */
171#define LC_FREE (~0U) 170#define LC_FREE (~0U)
171
172 /* for pending changes */
173 unsigned lc_new_number;
172}; 174};
173 175
174struct lru_cache { 176struct lru_cache {
@@ -176,6 +178,7 @@ struct lru_cache {
176 struct list_head lru; 178 struct list_head lru;
177 struct list_head free; 179 struct list_head free;
178 struct list_head in_use; 180 struct list_head in_use;
181 struct list_head to_be_changed;
179 182
180 /* the pre-created kmem cache to allocate the objects from */ 183 /* the pre-created kmem cache to allocate the objects from */
181 struct kmem_cache *lc_cache; 184 struct kmem_cache *lc_cache;
@@ -186,7 +189,7 @@ struct lru_cache {
186 size_t element_off; 189 size_t element_off;
187 190
188 /* number of elements (indices) */ 191 /* number of elements (indices) */
189 unsigned int nr_elements; 192 unsigned int nr_elements;
190 /* Arbitrary limit on maximum tracked objects. Practical limit is much 193 /* Arbitrary limit on maximum tracked objects. Practical limit is much
191 * lower due to allocation failures, probably. For typical use cases, 194 * lower due to allocation failures, probably. For typical use cases,
192 * nr_elements should be a few thousand at most. 195 * nr_elements should be a few thousand at most.
@@ -194,18 +197,19 @@ struct lru_cache {
194 * 8 high bits of .lc_index to be overloaded with flags in the future. */ 197 * 8 high bits of .lc_index to be overloaded with flags in the future. */
195#define LC_MAX_ACTIVE (1<<24) 198#define LC_MAX_ACTIVE (1<<24)
196 199
200 /* allow to accumulate a few (index:label) changes,
201 * but no more than max_pending_changes */
202 unsigned int max_pending_changes;
203 /* number of elements currently on to_be_changed list */
204 unsigned int pending_changes;
205
197 /* statistics */ 206 /* statistics */
198 unsigned used; /* number of lelements currently on in_use list */ 207 unsigned used; /* number of elements currently on in_use list */
199 unsigned long hits, misses, starving, dirty, changed; 208 unsigned long hits, misses, starving, locked, changed;
200 209
201 /* see below: flag-bits for lru_cache */ 210 /* see below: flag-bits for lru_cache */
202 unsigned long flags; 211 unsigned long flags;
203 212
204 /* when changing the label of an index element */
205 unsigned int new_number;
206
207 /* for paranoia when changing the label of an index element */
208 struct lc_element *changing_element;
209 213
210 void *lc_private; 214 void *lc_private;
211 const char *name; 215 const char *name;
@@ -221,10 +225,15 @@ enum {
221 /* debugging aid, to catch concurrent access early. 225 /* debugging aid, to catch concurrent access early.
222 * user needs to guarantee exclusive access by proper locking! */ 226 * user needs to guarantee exclusive access by proper locking! */
223 __LC_PARANOIA, 227 __LC_PARANOIA,
224 /* if we need to change the set, but currently there is a changing 228
225 * transaction pending, we are "dirty", and must deferr further 229 /* annotate that the set is "dirty", possibly accumulating further
226 * changing requests */ 230 * changes, until a transaction is finally triggered */
227 __LC_DIRTY, 231 __LC_DIRTY,
232
233 /* Locked, no further changes allowed.
234 * Also used to serialize changing transactions. */
235 __LC_LOCKED,
236
228 /* if we need to change the set, but currently there is no free nor 237 /* if we need to change the set, but currently there is no free nor
229 * unused element available, we are "starving", and must not give out 238 * unused element available, we are "starving", and must not give out
230 * further references, to guarantee that eventually some refcnt will 239 * further references, to guarantee that eventually some refcnt will
@@ -236,9 +245,11 @@ enum {
236}; 245};
237#define LC_PARANOIA (1<<__LC_PARANOIA) 246#define LC_PARANOIA (1<<__LC_PARANOIA)
238#define LC_DIRTY (1<<__LC_DIRTY) 247#define LC_DIRTY (1<<__LC_DIRTY)
248#define LC_LOCKED (1<<__LC_LOCKED)
239#define LC_STARVING (1<<__LC_STARVING) 249#define LC_STARVING (1<<__LC_STARVING)
240 250
241extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, 251extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
252 unsigned max_pending_changes,
242 unsigned e_count, size_t e_size, size_t e_off); 253 unsigned e_count, size_t e_size, size_t e_off);
243extern void lc_reset(struct lru_cache *lc); 254extern void lc_reset(struct lru_cache *lc);
244extern void lc_destroy(struct lru_cache *lc); 255extern void lc_destroy(struct lru_cache *lc);
@@ -249,7 +260,7 @@ extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr);
249extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr); 260extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr);
250extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr); 261extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr);
251extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e); 262extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e);
252extern void lc_changed(struct lru_cache *lc, struct lc_element *e); 263extern void lc_committed(struct lru_cache *lc);
253 264
254struct seq_file; 265struct seq_file;
255extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc); 266extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc);
@@ -258,31 +269,40 @@ extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char
258 void (*detail) (struct seq_file *, struct lc_element *)); 269 void (*detail) (struct seq_file *, struct lc_element *));
259 270
260/** 271/**
261 * lc_try_lock - can be used to stop lc_get() from changing the tracked set 272 * lc_try_lock_for_transaction - can be used to stop lc_get() from changing the tracked set
262 * @lc: the lru cache to operate on 273 * @lc: the lru cache to operate on
263 * 274 *
264 * Note that the reference counts and order on the active and lru lists may 275 * Allows (expects) the set to be "dirty". Note that the reference counts and
265 * still change. Returns true if we acquired the lock. 276 * order on the active and lru lists may still change. Used to serialize
277 * changing transactions. Returns true if we aquired the lock.
266 */ 278 */
267static inline int lc_try_lock(struct lru_cache *lc) 279static inline int lc_try_lock_for_transaction(struct lru_cache *lc)
268{ 280{
269 return !test_and_set_bit(__LC_DIRTY, &lc->flags); 281 return !test_and_set_bit(__LC_LOCKED, &lc->flags);
270} 282}
271 283
272/** 284/**
285 * lc_try_lock - variant to stop lc_get() from changing the tracked set
286 * @lc: the lru cache to operate on
287 *
288 * Note that the reference counts and order on the active and lru lists may
289 * still change. Only works on a "clean" set. Returns true if we aquired the
290 * lock, which means there are no pending changes, and any further attempt to
291 * change the set will not succeed until the next lc_unlock().
292 */
293extern int lc_try_lock(struct lru_cache *lc);
294
295/**
273 * lc_unlock - unlock @lc, allow lc_get() to change the set again 296 * lc_unlock - unlock @lc, allow lc_get() to change the set again
274 * @lc: the lru cache to operate on 297 * @lc: the lru cache to operate on
275 */ 298 */
276static inline void lc_unlock(struct lru_cache *lc) 299static inline void lc_unlock(struct lru_cache *lc)
277{ 300{
278 clear_bit_unlock(__LC_DIRTY, &lc->flags); 301 clear_bit(__LC_DIRTY, &lc->flags);
302 clear_bit_unlock(__LC_LOCKED, &lc->flags);
279} 303}
280 304
281static inline int lc_is_used(struct lru_cache *lc, unsigned int enr) 305extern bool lc_is_used(struct lru_cache *lc, unsigned int enr);
282{
283 struct lc_element *e = lc_find(lc, enr);
284 return e && e->refcnt;
285}
286 306
287#define lc_entry(ptr, type, member) \ 307#define lc_entry(ptr, type, member) \
288 container_of(ptr, type, member) 308 container_of(ptr, type, member)
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
index 17621684758a..d71d89498943 100644
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c
@@ -55,9 +55,40 @@ MODULE_LICENSE("GPL");
55 BUG_ON(i >= lc_->nr_elements); \ 55 BUG_ON(i >= lc_->nr_elements); \
56 BUG_ON(lc_->lc_element[i] != e_); } while (0) 56 BUG_ON(lc_->lc_element[i] != e_); } while (0)
57 57
58
59/* We need to atomically
60 * - try to grab the lock (set LC_LOCKED)
61 * - only if there is no pending transaction
62 * (neither LC_DIRTY nor LC_STARVING is set)
63 * Because of PARANOIA_ENTRY() above abusing lc->flags as well,
64 * it is not sufficient to just say
65 * return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED);
66 */
67int lc_try_lock(struct lru_cache *lc)
68{
69 unsigned long val;
70 do {
71 val = cmpxchg(&lc->flags, 0, LC_LOCKED);
72 } while (unlikely (val == LC_PARANOIA));
73 /* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */
74 return 0 == val;
75#if 0
76 /* Alternative approach, spin in case someone enters or leaves a
77 * PARANOIA_ENTRY()/RETURN() section. */
78 unsigned long old, new, val;
79 do {
80 old = lc->flags & LC_PARANOIA;
81 new = old | LC_LOCKED;
82 val = cmpxchg(&lc->flags, old, new);
83 } while (unlikely (val == (old ^ LC_PARANOIA)));
84 return old == val;
85#endif
86}
87
58/** 88/**
59 * lc_create - prepares to track objects in an active set 89 * lc_create - prepares to track objects in an active set
60 * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details 90 * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details
91 * @max_pending_changes: maximum changes to accumulate until a transaction is required
61 * @e_count: number of elements allowed to be active simultaneously 92 * @e_count: number of elements allowed to be active simultaneously
62 * @e_size: size of the tracked objects 93 * @e_size: size of the tracked objects
63 * @e_off: offset to the &struct lc_element member in a tracked object 94 * @e_off: offset to the &struct lc_element member in a tracked object
@@ -66,6 +97,7 @@ MODULE_LICENSE("GPL");
66 * or NULL on (allocation) failure. 97 * or NULL on (allocation) failure.
67 */ 98 */
68struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, 99struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
100 unsigned max_pending_changes,
69 unsigned e_count, size_t e_size, size_t e_off) 101 unsigned e_count, size_t e_size, size_t e_off)
70{ 102{
71 struct hlist_head *slot = NULL; 103 struct hlist_head *slot = NULL;
@@ -98,12 +130,13 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
98 INIT_LIST_HEAD(&lc->in_use); 130 INIT_LIST_HEAD(&lc->in_use);
99 INIT_LIST_HEAD(&lc->lru); 131 INIT_LIST_HEAD(&lc->lru);
100 INIT_LIST_HEAD(&lc->free); 132 INIT_LIST_HEAD(&lc->free);
133 INIT_LIST_HEAD(&lc->to_be_changed);
101 134
102 lc->name = name; 135 lc->name = name;
103 lc->element_size = e_size; 136 lc->element_size = e_size;
104 lc->element_off = e_off; 137 lc->element_off = e_off;
105 lc->nr_elements = e_count; 138 lc->nr_elements = e_count;
106 lc->new_number = LC_FREE; 139 lc->max_pending_changes = max_pending_changes;
107 lc->lc_cache = cache; 140 lc->lc_cache = cache;
108 lc->lc_element = element; 141 lc->lc_element = element;
109 lc->lc_slot = slot; 142 lc->lc_slot = slot;
@@ -117,6 +150,7 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
117 e = p + e_off; 150 e = p + e_off;
118 e->lc_index = i; 151 e->lc_index = i;
119 e->lc_number = LC_FREE; 152 e->lc_number = LC_FREE;
153 e->lc_new_number = LC_FREE;
120 list_add(&e->list, &lc->free); 154 list_add(&e->list, &lc->free);
121 element[i] = e; 155 element[i] = e;
122 } 156 }
@@ -175,15 +209,15 @@ void lc_reset(struct lru_cache *lc)
175 INIT_LIST_HEAD(&lc->in_use); 209 INIT_LIST_HEAD(&lc->in_use);
176 INIT_LIST_HEAD(&lc->lru); 210 INIT_LIST_HEAD(&lc->lru);
177 INIT_LIST_HEAD(&lc->free); 211 INIT_LIST_HEAD(&lc->free);
212 INIT_LIST_HEAD(&lc->to_be_changed);
178 lc->used = 0; 213 lc->used = 0;
179 lc->hits = 0; 214 lc->hits = 0;
180 lc->misses = 0; 215 lc->misses = 0;
181 lc->starving = 0; 216 lc->starving = 0;
182 lc->dirty = 0; 217 lc->locked = 0;
183 lc->changed = 0; 218 lc->changed = 0;
219 lc->pending_changes = 0;
184 lc->flags = 0; 220 lc->flags = 0;
185 lc->changing_element = NULL;
186 lc->new_number = LC_FREE;
187 memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); 221 memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements);
188 222
189 for (i = 0; i < lc->nr_elements; i++) { 223 for (i = 0; i < lc->nr_elements; i++) {
@@ -194,6 +228,7 @@ void lc_reset(struct lru_cache *lc)
194 /* re-init it */ 228 /* re-init it */
195 e->lc_index = i; 229 e->lc_index = i;
196 e->lc_number = LC_FREE; 230 e->lc_number = LC_FREE;
231 e->lc_new_number = LC_FREE;
197 list_add(&e->list, &lc->free); 232 list_add(&e->list, &lc->free);
198 } 233 }
199} 234}
@@ -208,14 +243,14 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
208 /* NOTE: 243 /* NOTE:
209 * total calls to lc_get are 244 * total calls to lc_get are
210 * (starving + hits + misses) 245 * (starving + hits + misses)
211 * misses include "dirty" count (update from an other thread in 246 * misses include "locked" count (update from an other thread in
212 * progress) and "changed", when this in fact lead to an successful 247 * progress) and "changed", when this in fact lead to an successful
213 * update of the cache. 248 * update of the cache.
214 */ 249 */
215 return seq_printf(seq, "\t%s: used:%u/%u " 250 return seq_printf(seq, "\t%s: used:%u/%u "
216 "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n", 251 "hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n",
217 lc->name, lc->used, lc->nr_elements, 252 lc->name, lc->used, lc->nr_elements,
218 lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed); 253 lc->hits, lc->misses, lc->starving, lc->locked, lc->changed);
219} 254}
220 255
221static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) 256static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
@@ -224,16 +259,8 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
224} 259}
225 260
226 261
227/** 262static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr,
228 * lc_find - find element by label, if present in the hash table 263 bool include_changing)
229 * @lc: The lru_cache object
230 * @enr: element number
231 *
232 * Returns the pointer to an element, if the element with the requested
233 * "label" or element number is present in the hash table,
234 * or NULL if not found. Does not change the refcnt.
235 */
236struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
237{ 264{
238 struct hlist_node *n; 265 struct hlist_node *n;
239 struct lc_element *e; 266 struct lc_element *e;
@@ -241,29 +268,48 @@ struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
241 BUG_ON(!lc); 268 BUG_ON(!lc);
242 BUG_ON(!lc->nr_elements); 269 BUG_ON(!lc->nr_elements);
243 hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { 270 hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) {
244 if (e->lc_number == enr) 271 /* "about to be changed" elements, pending transaction commit,
272 * are hashed by their "new number". "Normal" elements have
273 * lc_number == lc_new_number. */
274 if (e->lc_new_number != enr)
275 continue;
276 if (e->lc_new_number == e->lc_number || include_changing)
245 return e; 277 return e;
278 break;
246 } 279 }
247 return NULL; 280 return NULL;
248} 281}
249 282
250/* returned element will be "recycled" immediately */ 283/**
251static struct lc_element *lc_evict(struct lru_cache *lc) 284 * lc_find - find element by label, if present in the hash table
285 * @lc: The lru_cache object
286 * @enr: element number
287 *
288 * Returns the pointer to an element, if the element with the requested
289 * "label" or element number is present in the hash table,
290 * or NULL if not found. Does not change the refcnt.
291 * Ignores elements that are "about to be used", i.e. not yet in the active
292 * set, but still pending transaction commit.
293 */
294struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
252{ 295{
253 struct list_head *n; 296 return __lc_find(lc, enr, 0);
254 struct lc_element *e; 297}
255
256 if (list_empty(&lc->lru))
257 return NULL;
258
259 n = lc->lru.prev;
260 e = list_entry(n, struct lc_element, list);
261
262 PARANOIA_LC_ELEMENT(lc, e);
263 298
264 list_del(&e->list); 299/**
265 hlist_del(&e->colision); 300 * lc_is_used - find element by label
266 return e; 301 * @lc: The lru_cache object
302 * @enr: element number
303 *
304 * Returns true, if the element with the requested "label" or element number is
305 * present in the hash table, and is used (refcnt > 0).
306 * Also finds elements that are not _currently_ used but only "about to be
307 * used", i.e. on the "to_be_changed" list, pending transaction commit.
308 */
309bool lc_is_used(struct lru_cache *lc, unsigned int enr)
310{
311 struct lc_element *e = __lc_find(lc, enr, 1);
312 return e && e->refcnt;
267} 313}
268 314
269/** 315/**
@@ -280,22 +326,34 @@ void lc_del(struct lru_cache *lc, struct lc_element *e)
280 PARANOIA_LC_ELEMENT(lc, e); 326 PARANOIA_LC_ELEMENT(lc, e);
281 BUG_ON(e->refcnt); 327 BUG_ON(e->refcnt);
282 328
283 e->lc_number = LC_FREE; 329 e->lc_number = e->lc_new_number = LC_FREE;
284 hlist_del_init(&e->colision); 330 hlist_del_init(&e->colision);
285 list_move(&e->list, &lc->free); 331 list_move(&e->list, &lc->free);
286 RETURN(); 332 RETURN();
287} 333}
288 334
289static struct lc_element *lc_get_unused_element(struct lru_cache *lc) 335static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number)
290{ 336{
291 struct list_head *n; 337 struct list_head *n;
338 struct lc_element *e;
339
340 if (!list_empty(&lc->free))
341 n = lc->free.next;
342 else if (!list_empty(&lc->lru))
343 n = lc->lru.prev;
344 else
345 return NULL;
346
347 e = list_entry(n, struct lc_element, list);
348 PARANOIA_LC_ELEMENT(lc, e);
292 349
293 if (list_empty(&lc->free)) 350 e->lc_new_number = new_number;
294 return lc_evict(lc); 351 if (!hlist_unhashed(&e->colision))
352 __hlist_del(&e->colision);
353 hlist_add_head(&e->colision, lc_hash_slot(lc, new_number));
354 list_move(&e->list, &lc->to_be_changed);
295 355
296 n = lc->free.next; 356 return e;
297 list_del(n);
298 return list_entry(n, struct lc_element, list);
299} 357}
300 358
301static int lc_unused_element_available(struct lru_cache *lc) 359static int lc_unused_element_available(struct lru_cache *lc)
@@ -318,8 +376,12 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool
318 RETURN(NULL); 376 RETURN(NULL);
319 } 377 }
320 378
321 e = lc_find(lc, enr); 379 e = __lc_find(lc, enr, 1);
322 if (e) { 380 /* if lc_new_number != lc_number,
381 * this enr is currently being pulled in already,
382 * and will be available once the pending transaction
383 * has been committed. */
384 if (e && e->lc_new_number == e->lc_number) {
323 ++lc->hits; 385 ++lc->hits;
324 if (e->refcnt++ == 0) 386 if (e->refcnt++ == 0)
325 lc->used++; 387 lc->used++;
@@ -331,6 +393,24 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool
331 if (!may_change) 393 if (!may_change)
332 RETURN(NULL); 394 RETURN(NULL);
333 395
396 /* It has been found above, but on the "to_be_changed" list, not yet
397 * committed. Don't pull it in twice, wait for the transaction, then
398 * try again */
399 if (e)
400 RETURN(NULL);
401
402 /* To avoid races with lc_try_lock(), first, mark us dirty
403 * (using test_and_set_bit, as it implies memory barriers), ... */
404 test_and_set_bit(__LC_DIRTY, &lc->flags);
405
406 /* ... only then check if it is locked anyways. If lc_unlock clears
407 * the dirty bit again, that's not a problem, we will come here again.
408 */
409 if (test_bit(__LC_LOCKED, &lc->flags)) {
410 ++lc->locked;
411 RETURN(NULL);
412 }
413
334 /* In case there is nothing available and we can not kick out 414 /* In case there is nothing available and we can not kick out
335 * the LRU element, we have to wait ... 415 * the LRU element, we have to wait ...
336 */ 416 */
@@ -339,24 +419,19 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool
339 RETURN(NULL); 419 RETURN(NULL);
340 } 420 }
341 421
342 /* it was not present in the active set. 422 /* It was not present in the active set. We are going to recycle an
343 * we are going to recycle an unused (or even "free") element. 423 * unused (or even "free") element, but we won't accumulate more than
344 * user may need to commit a transaction to record that change. 424 * max_pending_changes changes. */
345 * we serialize on flags & LC_DIRTY */ 425 if (lc->pending_changes >= lc->max_pending_changes)
346 if (test_and_set_bit(__LC_DIRTY, &lc->flags)) {
347 ++lc->dirty;
348 RETURN(NULL); 426 RETURN(NULL);
349 }
350 427
351 e = lc_get_unused_element(lc); 428 e = lc_prepare_for_change(lc, enr);
352 BUG_ON(!e); 429 BUG_ON(!e);
353 430
354 clear_bit(__LC_STARVING, &lc->flags); 431 clear_bit(__LC_STARVING, &lc->flags);
355 BUG_ON(++e->refcnt != 1); 432 BUG_ON(++e->refcnt != 1);
356 lc->used++; 433 lc->used++;
357 434 lc->pending_changes++;
358 lc->changing_element = e;
359 lc->new_number = enr;
360 435
361 RETURN(e); 436 RETURN(e);
362} 437}
@@ -388,12 +463,15 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool
388 * pointer to an UNUSED element with some different element number, 463 * pointer to an UNUSED element with some different element number,
389 * where that different number may also be %LC_FREE. 464 * where that different number may also be %LC_FREE.
390 * 465 *
391 * In this case, the cache is marked %LC_DIRTY (blocking further changes), 466 * In this case, the cache is marked %LC_DIRTY,
392 * and the returned element pointer is removed from the lru list and 467 * so lc_try_lock() will no longer succeed.
393 * hash collision chains. The user now should do whatever housekeeping 468 * The returned element pointer is moved to the "to_be_changed" list,
394 * is necessary. 469 * and registered with the new element number on the hash collision chains,
395 * Then he must call lc_changed(lc,element_pointer), to finish 470 * so it is possible to pick it up from lc_is_used().
396 * the change. 471 * Up to "max_pending_changes" (see lc_create()) can be accumulated.
472 * The user now should do whatever housekeeping is necessary,
473 * typically serialize on lc_try_lock_for_transaction(), then call
474 * lc_committed(lc) and lc_unlock(), to finish the change.
397 * 475 *
398 * NOTE: The user needs to check the lc_number on EACH use, so he recognizes 476 * NOTE: The user needs to check the lc_number on EACH use, so he recognizes
399 * any cache set change. 477 * any cache set change.
@@ -425,22 +503,25 @@ struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
425} 503}
426 504
427/** 505/**
428 * lc_changed - tell @lc that the change has been recorded 506 * lc_committed - tell @lc that pending changes have been recorded
429 * @lc: the lru cache to operate on 507 * @lc: the lru cache to operate on
430 * @e: the element pending label change 508 *
509 * User is expected to serialize on explicit lc_try_lock_for_transaction()
510 * before the transaction is started, and later needs to lc_unlock() explicitly
511 * as well.
431 */ 512 */
432void lc_changed(struct lru_cache *lc, struct lc_element *e) 513void lc_committed(struct lru_cache *lc)
433{ 514{
515 struct lc_element *e, *tmp;
516
434 PARANOIA_ENTRY(); 517 PARANOIA_ENTRY();
435 BUG_ON(e != lc->changing_element); 518 list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) {
436 PARANOIA_LC_ELEMENT(lc, e); 519 /* count number of changes, not number of transactions */
437 ++lc->changed; 520 ++lc->changed;
438 e->lc_number = lc->new_number; 521 e->lc_number = e->lc_new_number;
439 list_add(&e->list, &lc->in_use); 522 list_move(&e->list, &lc->in_use);
440 hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); 523 }
441 lc->changing_element = NULL; 524 lc->pending_changes = 0;
442 lc->new_number = LC_FREE;
443 clear_bit_unlock(__LC_DIRTY, &lc->flags);
444 RETURN(); 525 RETURN();
445} 526}
446 527
@@ -459,7 +540,7 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e)
459 PARANOIA_ENTRY(); 540 PARANOIA_ENTRY();
460 PARANOIA_LC_ELEMENT(lc, e); 541 PARANOIA_LC_ELEMENT(lc, e);
461 BUG_ON(e->refcnt == 0); 542 BUG_ON(e->refcnt == 0);
462 BUG_ON(e == lc->changing_element); 543 BUG_ON(e->lc_number != e->lc_new_number);
463 if (--e->refcnt == 0) { 544 if (--e->refcnt == 0) {
464 /* move it to the front of LRU. */ 545 /* move it to the front of LRU. */
465 list_move(&e->list, &lc->lru); 546 list_move(&e->list, &lc->lru);
@@ -504,16 +585,24 @@ unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e)
504void lc_set(struct lru_cache *lc, unsigned int enr, int index) 585void lc_set(struct lru_cache *lc, unsigned int enr, int index)
505{ 586{
506 struct lc_element *e; 587 struct lc_element *e;
588 struct list_head *lh;
507 589
508 if (index < 0 || index >= lc->nr_elements) 590 if (index < 0 || index >= lc->nr_elements)
509 return; 591 return;
510 592
511 e = lc_element_by_index(lc, index); 593 e = lc_element_by_index(lc, index);
512 e->lc_number = enr; 594 BUG_ON(e->lc_number != e->lc_new_number);
595 BUG_ON(e->refcnt != 0);
513 596
597 e->lc_number = e->lc_new_number = enr;
514 hlist_del_init(&e->colision); 598 hlist_del_init(&e->colision);
515 hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); 599 if (enr == LC_FREE)
516 list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru); 600 lh = &lc->free;
601 else {
602 hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
603 lh = &lc->lru;
604 }
605 list_move(&e->list, lh);
517} 606}
518 607
519/** 608/**
@@ -553,8 +642,10 @@ EXPORT_SYMBOL(lc_try_get);
553EXPORT_SYMBOL(lc_find); 642EXPORT_SYMBOL(lc_find);
554EXPORT_SYMBOL(lc_get); 643EXPORT_SYMBOL(lc_get);
555EXPORT_SYMBOL(lc_put); 644EXPORT_SYMBOL(lc_put);
556EXPORT_SYMBOL(lc_changed); 645EXPORT_SYMBOL(lc_committed);
557EXPORT_SYMBOL(lc_element_by_index); 646EXPORT_SYMBOL(lc_element_by_index);
558EXPORT_SYMBOL(lc_index_of); 647EXPORT_SYMBOL(lc_index_of);
559EXPORT_SYMBOL(lc_seq_printf_stats); 648EXPORT_SYMBOL(lc_seq_printf_stats);
560EXPORT_SYMBOL(lc_seq_dump_details); 649EXPORT_SYMBOL(lc_seq_dump_details);
650EXPORT_SYMBOL(lc_try_lock);
651EXPORT_SYMBOL(lc_is_used);