diff options
-rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 50 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 4 | ||||
-rw-r--r-- | include/linux/lru_cache.h | 68 | ||||
-rw-r--r-- | lib/lru_cache.c | 243 |
4 files changed, 225 insertions, 140 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 1ce3de6eed1b..44097c87fed7 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c | |||
@@ -175,7 +175,6 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) | |||
175 | { | 175 | { |
176 | struct lc_element *al_ext; | 176 | struct lc_element *al_ext; |
177 | struct lc_element *tmp; | 177 | struct lc_element *tmp; |
178 | unsigned long al_flags = 0; | ||
179 | int wake; | 178 | int wake; |
180 | 179 | ||
181 | spin_lock_irq(&mdev->al_lock); | 180 | spin_lock_irq(&mdev->al_lock); |
@@ -190,19 +189,8 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) | |||
190 | return NULL; | 189 | return NULL; |
191 | } | 190 | } |
192 | } | 191 | } |
193 | al_ext = lc_get(mdev->act_log, enr); | 192 | al_ext = lc_get(mdev->act_log, enr); |
194 | al_flags = mdev->act_log->flags; | ||
195 | spin_unlock_irq(&mdev->al_lock); | 193 | spin_unlock_irq(&mdev->al_lock); |
196 | |||
197 | /* | ||
198 | if (!al_ext) { | ||
199 | if (al_flags & LC_STARVING) | ||
200 | dev_warn(DEV, "Have to wait for LRU element (AL too small?)\n"); | ||
201 | if (al_flags & LC_DIRTY) | ||
202 | dev_warn(DEV, "Ongoing AL update (AL device too slow?)\n"); | ||
203 | } | ||
204 | */ | ||
205 | |||
206 | return al_ext; | 194 | return al_ext; |
207 | } | 195 | } |
208 | 196 | ||
@@ -235,7 +223,7 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) | |||
235 | mdev->al_writ_cnt++; | 223 | mdev->al_writ_cnt++; |
236 | 224 | ||
237 | spin_lock_irq(&mdev->al_lock); | 225 | spin_lock_irq(&mdev->al_lock); |
238 | lc_changed(mdev->act_log, al_ext); | 226 | lc_committed(mdev->act_log); |
239 | spin_unlock_irq(&mdev->al_lock); | 227 | spin_unlock_irq(&mdev->al_lock); |
240 | wake_up(&mdev->al_wait); | 228 | wake_up(&mdev->al_wait); |
241 | } | 229 | } |
@@ -601,7 +589,7 @@ void drbd_al_shrink(struct drbd_conf *mdev) | |||
601 | struct lc_element *al_ext; | 589 | struct lc_element *al_ext; |
602 | int i; | 590 | int i; |
603 | 591 | ||
604 | D_ASSERT(test_bit(__LC_DIRTY, &mdev->act_log->flags)); | 592 | D_ASSERT(test_bit(__LC_LOCKED, &mdev->act_log->flags)); |
605 | 593 | ||
606 | for (i = 0; i < mdev->act_log->nr_elements; i++) { | 594 | for (i = 0; i < mdev->act_log->nr_elements; i++) { |
607 | al_ext = lc_element_by_index(mdev->act_log, i); | 595 | al_ext = lc_element_by_index(mdev->act_log, i); |
@@ -708,7 +696,9 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, | |||
708 | } | 696 | } |
709 | ext->rs_left = rs_left; | 697 | ext->rs_left = rs_left; |
710 | ext->rs_failed = success ? 0 : count; | 698 | ext->rs_failed = success ? 0 : count; |
711 | lc_changed(mdev->resync, &ext->lce); | 699 | /* we don't keep a persistent log of the resync lru, |
700 | * we can commit any change right away. */ | ||
701 | lc_committed(mdev->resync); | ||
712 | } | 702 | } |
713 | lc_put(mdev->resync, &ext->lce); | 703 | lc_put(mdev->resync, &ext->lce); |
714 | /* no race, we are within the al_lock! */ | 704 | /* no race, we are within the al_lock! */ |
@@ -892,7 +882,7 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) | |||
892 | if (bm_ext->lce.lc_number != enr) { | 882 | if (bm_ext->lce.lc_number != enr) { |
893 | bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); | 883 | bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); |
894 | bm_ext->rs_failed = 0; | 884 | bm_ext->rs_failed = 0; |
895 | lc_changed(mdev->resync, &bm_ext->lce); | 885 | lc_committed(mdev->resync); |
896 | wakeup = 1; | 886 | wakeup = 1; |
897 | } | 887 | } |
898 | if (bm_ext->lce.refcnt == 1) | 888 | if (bm_ext->lce.refcnt == 1) |
@@ -908,7 +898,7 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) | |||
908 | if (rs_flags & LC_STARVING) | 898 | if (rs_flags & LC_STARVING) |
909 | dev_warn(DEV, "Have to wait for element" | 899 | dev_warn(DEV, "Have to wait for element" |
910 | " (resync LRU too small?)\n"); | 900 | " (resync LRU too small?)\n"); |
911 | BUG_ON(rs_flags & LC_DIRTY); | 901 | BUG_ON(rs_flags & LC_LOCKED); |
912 | } | 902 | } |
913 | 903 | ||
914 | return bm_ext; | 904 | return bm_ext; |
@@ -916,26 +906,12 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) | |||
916 | 906 | ||
917 | static int _is_in_al(struct drbd_conf *mdev, unsigned int enr) | 907 | static int _is_in_al(struct drbd_conf *mdev, unsigned int enr) |
918 | { | 908 | { |
919 | struct lc_element *al_ext; | 909 | int rv; |
920 | int rv = 0; | ||
921 | 910 | ||
922 | spin_lock_irq(&mdev->al_lock); | 911 | spin_lock_irq(&mdev->al_lock); |
923 | if (unlikely(enr == mdev->act_log->new_number)) | 912 | rv = lc_is_used(mdev->act_log, enr); |
924 | rv = 1; | ||
925 | else { | ||
926 | al_ext = lc_find(mdev->act_log, enr); | ||
927 | if (al_ext) { | ||
928 | if (al_ext->refcnt) | ||
929 | rv = 1; | ||
930 | } | ||
931 | } | ||
932 | spin_unlock_irq(&mdev->al_lock); | 913 | spin_unlock_irq(&mdev->al_lock); |
933 | 914 | ||
934 | /* | ||
935 | if (unlikely(rv)) { | ||
936 | dev_info(DEV, "Delaying sync read until app's write is done\n"); | ||
937 | } | ||
938 | */ | ||
939 | return rv; | 915 | return rv; |
940 | } | 916 | } |
941 | 917 | ||
@@ -1065,13 +1041,13 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) | |||
1065 | if (rs_flags & LC_STARVING) | 1041 | if (rs_flags & LC_STARVING) |
1066 | dev_warn(DEV, "Have to wait for element" | 1042 | dev_warn(DEV, "Have to wait for element" |
1067 | " (resync LRU too small?)\n"); | 1043 | " (resync LRU too small?)\n"); |
1068 | BUG_ON(rs_flags & LC_DIRTY); | 1044 | BUG_ON(rs_flags & LC_LOCKED); |
1069 | goto try_again; | 1045 | goto try_again; |
1070 | } | 1046 | } |
1071 | if (bm_ext->lce.lc_number != enr) { | 1047 | if (bm_ext->lce.lc_number != enr) { |
1072 | bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); | 1048 | bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); |
1073 | bm_ext->rs_failed = 0; | 1049 | bm_ext->rs_failed = 0; |
1074 | lc_changed(mdev->resync, &bm_ext->lce); | 1050 | lc_committed(mdev->resync); |
1075 | wake_up(&mdev->al_wait); | 1051 | wake_up(&mdev->al_wait); |
1076 | D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0); | 1052 | D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0); |
1077 | } | 1053 | } |
@@ -1082,8 +1058,6 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) | |||
1082 | } | 1058 | } |
1083 | check_al: | 1059 | check_al: |
1084 | for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { | 1060 | for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { |
1085 | if (unlikely(al_enr+i == mdev->act_log->new_number)) | ||
1086 | goto try_again; | ||
1087 | if (lc_is_used(mdev->act_log, al_enr+i)) | 1061 | if (lc_is_used(mdev->act_log, al_enr+i)) |
1088 | goto try_again; | 1062 | goto try_again; |
1089 | } | 1063 | } |
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ae8f42e38e4f..0a92f5226c2a 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
@@ -760,7 +760,7 @@ static int drbd_check_al_size(struct drbd_conf *mdev) | |||
760 | 760 | ||
761 | in_use = 0; | 761 | in_use = 0; |
762 | t = mdev->act_log; | 762 | t = mdev->act_log; |
763 | n = lc_create("act_log", drbd_al_ext_cache, | 763 | n = lc_create("act_log", drbd_al_ext_cache, 1, |
764 | mdev->sync_conf.al_extents, sizeof(struct lc_element), 0); | 764 | mdev->sync_conf.al_extents, sizeof(struct lc_element), 0); |
765 | 765 | ||
766 | if (n == NULL) { | 766 | if (n == NULL) { |
@@ -1016,7 +1016,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp | |||
1016 | } | 1016 | } |
1017 | 1017 | ||
1018 | resync_lru = lc_create("resync", drbd_bm_ext_cache, | 1018 | resync_lru = lc_create("resync", drbd_bm_ext_cache, |
1019 | 61, sizeof(struct bm_extent), | 1019 | 1, 61, sizeof(struct bm_extent), |
1020 | offsetof(struct bm_extent, lce)); | 1020 | offsetof(struct bm_extent, lce)); |
1021 | if (!resync_lru) { | 1021 | if (!resync_lru) { |
1022 | retcode = ERR_NOMEM; | 1022 | retcode = ERR_NOMEM; |
diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index 4cceafb0732d..cbafae40c649 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h | |||
@@ -166,9 +166,11 @@ struct lc_element { | |||
166 | /* if we want to track a larger set of objects, | 166 | /* if we want to track a larger set of objects, |
167 | * it needs to become arch independend u64 */ | 167 | * it needs to become arch independend u64 */ |
168 | unsigned lc_number; | 168 | unsigned lc_number; |
169 | |||
170 | /* special label when on free list */ | 169 | /* special label when on free list */ |
171 | #define LC_FREE (~0U) | 170 | #define LC_FREE (~0U) |
171 | |||
172 | /* for pending changes */ | ||
173 | unsigned lc_new_number; | ||
172 | }; | 174 | }; |
173 | 175 | ||
174 | struct lru_cache { | 176 | struct lru_cache { |
@@ -176,6 +178,7 @@ struct lru_cache { | |||
176 | struct list_head lru; | 178 | struct list_head lru; |
177 | struct list_head free; | 179 | struct list_head free; |
178 | struct list_head in_use; | 180 | struct list_head in_use; |
181 | struct list_head to_be_changed; | ||
179 | 182 | ||
180 | /* the pre-created kmem cache to allocate the objects from */ | 183 | /* the pre-created kmem cache to allocate the objects from */ |
181 | struct kmem_cache *lc_cache; | 184 | struct kmem_cache *lc_cache; |
@@ -186,7 +189,7 @@ struct lru_cache { | |||
186 | size_t element_off; | 189 | size_t element_off; |
187 | 190 | ||
188 | /* number of elements (indices) */ | 191 | /* number of elements (indices) */ |
189 | unsigned int nr_elements; | 192 | unsigned int nr_elements; |
190 | /* Arbitrary limit on maximum tracked objects. Practical limit is much | 193 | /* Arbitrary limit on maximum tracked objects. Practical limit is much |
191 | * lower due to allocation failures, probably. For typical use cases, | 194 | * lower due to allocation failures, probably. For typical use cases, |
192 | * nr_elements should be a few thousand at most. | 195 | * nr_elements should be a few thousand at most. |
@@ -194,18 +197,19 @@ struct lru_cache { | |||
194 | * 8 high bits of .lc_index to be overloaded with flags in the future. */ | 197 | * 8 high bits of .lc_index to be overloaded with flags in the future. */ |
195 | #define LC_MAX_ACTIVE (1<<24) | 198 | #define LC_MAX_ACTIVE (1<<24) |
196 | 199 | ||
200 | /* allow to accumulate a few (index:label) changes, | ||
201 | * but no more than max_pending_changes */ | ||
202 | unsigned int max_pending_changes; | ||
203 | /* number of elements currently on to_be_changed list */ | ||
204 | unsigned int pending_changes; | ||
205 | |||
197 | /* statistics */ | 206 | /* statistics */ |
198 | unsigned used; /* number of lelements currently on in_use list */ | 207 | unsigned used; /* number of elements currently on in_use list */ |
199 | unsigned long hits, misses, starving, dirty, changed; | 208 | unsigned long hits, misses, starving, locked, changed; |
200 | 209 | ||
201 | /* see below: flag-bits for lru_cache */ | 210 | /* see below: flag-bits for lru_cache */ |
202 | unsigned long flags; | 211 | unsigned long flags; |
203 | 212 | ||
204 | /* when changing the label of an index element */ | ||
205 | unsigned int new_number; | ||
206 | |||
207 | /* for paranoia when changing the label of an index element */ | ||
208 | struct lc_element *changing_element; | ||
209 | 213 | ||
210 | void *lc_private; | 214 | void *lc_private; |
211 | const char *name; | 215 | const char *name; |
@@ -221,10 +225,15 @@ enum { | |||
221 | /* debugging aid, to catch concurrent access early. | 225 | /* debugging aid, to catch concurrent access early. |
222 | * user needs to guarantee exclusive access by proper locking! */ | 226 | * user needs to guarantee exclusive access by proper locking! */ |
223 | __LC_PARANOIA, | 227 | __LC_PARANOIA, |
224 | /* if we need to change the set, but currently there is a changing | 228 | |
225 | * transaction pending, we are "dirty", and must deferr further | 229 | /* annotate that the set is "dirty", possibly accumulating further |
226 | * changing requests */ | 230 | * changes, until a transaction is finally triggered */ |
227 | __LC_DIRTY, | 231 | __LC_DIRTY, |
232 | |||
233 | /* Locked, no further changes allowed. | ||
234 | * Also used to serialize changing transactions. */ | ||
235 | __LC_LOCKED, | ||
236 | |||
228 | /* if we need to change the set, but currently there is no free nor | 237 | /* if we need to change the set, but currently there is no free nor |
229 | * unused element available, we are "starving", and must not give out | 238 | * unused element available, we are "starving", and must not give out |
230 | * further references, to guarantee that eventually some refcnt will | 239 | * further references, to guarantee that eventually some refcnt will |
@@ -236,9 +245,11 @@ enum { | |||
236 | }; | 245 | }; |
237 | #define LC_PARANOIA (1<<__LC_PARANOIA) | 246 | #define LC_PARANOIA (1<<__LC_PARANOIA) |
238 | #define LC_DIRTY (1<<__LC_DIRTY) | 247 | #define LC_DIRTY (1<<__LC_DIRTY) |
248 | #define LC_LOCKED (1<<__LC_LOCKED) | ||
239 | #define LC_STARVING (1<<__LC_STARVING) | 249 | #define LC_STARVING (1<<__LC_STARVING) |
240 | 250 | ||
241 | extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, | 251 | extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, |
252 | unsigned max_pending_changes, | ||
242 | unsigned e_count, size_t e_size, size_t e_off); | 253 | unsigned e_count, size_t e_size, size_t e_off); |
243 | extern void lc_reset(struct lru_cache *lc); | 254 | extern void lc_reset(struct lru_cache *lc); |
244 | extern void lc_destroy(struct lru_cache *lc); | 255 | extern void lc_destroy(struct lru_cache *lc); |
@@ -249,7 +260,7 @@ extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr); | |||
249 | extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr); | 260 | extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr); |
250 | extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr); | 261 | extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr); |
251 | extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e); | 262 | extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e); |
252 | extern void lc_changed(struct lru_cache *lc, struct lc_element *e); | 263 | extern void lc_committed(struct lru_cache *lc); |
253 | 264 | ||
254 | struct seq_file; | 265 | struct seq_file; |
255 | extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc); | 266 | extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc); |
@@ -258,31 +269,40 @@ extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char | |||
258 | void (*detail) (struct seq_file *, struct lc_element *)); | 269 | void (*detail) (struct seq_file *, struct lc_element *)); |
259 | 270 | ||
260 | /** | 271 | /** |
261 | * lc_try_lock - can be used to stop lc_get() from changing the tracked set | 272 | * lc_try_lock_for_transaction - can be used to stop lc_get() from changing the tracked set |
262 | * @lc: the lru cache to operate on | 273 | * @lc: the lru cache to operate on |
263 | * | 274 | * |
264 | * Note that the reference counts and order on the active and lru lists may | 275 | * Allows (expects) the set to be "dirty". Note that the reference counts and |
265 | * still change. Returns true if we acquired the lock. | 276 | * order on the active and lru lists may still change. Used to serialize |
277 | * changing transactions. Returns true if we aquired the lock. | ||
266 | */ | 278 | */ |
267 | static inline int lc_try_lock(struct lru_cache *lc) | 279 | static inline int lc_try_lock_for_transaction(struct lru_cache *lc) |
268 | { | 280 | { |
269 | return !test_and_set_bit(__LC_DIRTY, &lc->flags); | 281 | return !test_and_set_bit(__LC_LOCKED, &lc->flags); |
270 | } | 282 | } |
271 | 283 | ||
272 | /** | 284 | /** |
285 | * lc_try_lock - variant to stop lc_get() from changing the tracked set | ||
286 | * @lc: the lru cache to operate on | ||
287 | * | ||
288 | * Note that the reference counts and order on the active and lru lists may | ||
289 | * still change. Only works on a "clean" set. Returns true if we aquired the | ||
290 | * lock, which means there are no pending changes, and any further attempt to | ||
291 | * change the set will not succeed until the next lc_unlock(). | ||
292 | */ | ||
293 | extern int lc_try_lock(struct lru_cache *lc); | ||
294 | |||
295 | /** | ||
273 | * lc_unlock - unlock @lc, allow lc_get() to change the set again | 296 | * lc_unlock - unlock @lc, allow lc_get() to change the set again |
274 | * @lc: the lru cache to operate on | 297 | * @lc: the lru cache to operate on |
275 | */ | 298 | */ |
276 | static inline void lc_unlock(struct lru_cache *lc) | 299 | static inline void lc_unlock(struct lru_cache *lc) |
277 | { | 300 | { |
278 | clear_bit_unlock(__LC_DIRTY, &lc->flags); | 301 | clear_bit(__LC_DIRTY, &lc->flags); |
302 | clear_bit_unlock(__LC_LOCKED, &lc->flags); | ||
279 | } | 303 | } |
280 | 304 | ||
281 | static inline int lc_is_used(struct lru_cache *lc, unsigned int enr) | 305 | extern bool lc_is_used(struct lru_cache *lc, unsigned int enr); |
282 | { | ||
283 | struct lc_element *e = lc_find(lc, enr); | ||
284 | return e && e->refcnt; | ||
285 | } | ||
286 | 306 | ||
287 | #define lc_entry(ptr, type, member) \ | 307 | #define lc_entry(ptr, type, member) \ |
288 | container_of(ptr, type, member) | 308 | container_of(ptr, type, member) |
diff --git a/lib/lru_cache.c b/lib/lru_cache.c index 17621684758a..d71d89498943 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c | |||
@@ -55,9 +55,40 @@ MODULE_LICENSE("GPL"); | |||
55 | BUG_ON(i >= lc_->nr_elements); \ | 55 | BUG_ON(i >= lc_->nr_elements); \ |
56 | BUG_ON(lc_->lc_element[i] != e_); } while (0) | 56 | BUG_ON(lc_->lc_element[i] != e_); } while (0) |
57 | 57 | ||
58 | |||
59 | /* We need to atomically | ||
60 | * - try to grab the lock (set LC_LOCKED) | ||
61 | * - only if there is no pending transaction | ||
62 | * (neither LC_DIRTY nor LC_STARVING is set) | ||
63 | * Because of PARANOIA_ENTRY() above abusing lc->flags as well, | ||
64 | * it is not sufficient to just say | ||
65 | * return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED); | ||
66 | */ | ||
67 | int lc_try_lock(struct lru_cache *lc) | ||
68 | { | ||
69 | unsigned long val; | ||
70 | do { | ||
71 | val = cmpxchg(&lc->flags, 0, LC_LOCKED); | ||
72 | } while (unlikely (val == LC_PARANOIA)); | ||
73 | /* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */ | ||
74 | return 0 == val; | ||
75 | #if 0 | ||
76 | /* Alternative approach, spin in case someone enters or leaves a | ||
77 | * PARANOIA_ENTRY()/RETURN() section. */ | ||
78 | unsigned long old, new, val; | ||
79 | do { | ||
80 | old = lc->flags & LC_PARANOIA; | ||
81 | new = old | LC_LOCKED; | ||
82 | val = cmpxchg(&lc->flags, old, new); | ||
83 | } while (unlikely (val == (old ^ LC_PARANOIA))); | ||
84 | return old == val; | ||
85 | #endif | ||
86 | } | ||
87 | |||
58 | /** | 88 | /** |
59 | * lc_create - prepares to track objects in an active set | 89 | * lc_create - prepares to track objects in an active set |
60 | * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details | 90 | * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details |
91 | * @max_pending_changes: maximum changes to accumulate until a transaction is required | ||
61 | * @e_count: number of elements allowed to be active simultaneously | 92 | * @e_count: number of elements allowed to be active simultaneously |
62 | * @e_size: size of the tracked objects | 93 | * @e_size: size of the tracked objects |
63 | * @e_off: offset to the &struct lc_element member in a tracked object | 94 | * @e_off: offset to the &struct lc_element member in a tracked object |
@@ -66,6 +97,7 @@ MODULE_LICENSE("GPL"); | |||
66 | * or NULL on (allocation) failure. | 97 | * or NULL on (allocation) failure. |
67 | */ | 98 | */ |
68 | struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, | 99 | struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, |
100 | unsigned max_pending_changes, | ||
69 | unsigned e_count, size_t e_size, size_t e_off) | 101 | unsigned e_count, size_t e_size, size_t e_off) |
70 | { | 102 | { |
71 | struct hlist_head *slot = NULL; | 103 | struct hlist_head *slot = NULL; |
@@ -98,12 +130,13 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, | |||
98 | INIT_LIST_HEAD(&lc->in_use); | 130 | INIT_LIST_HEAD(&lc->in_use); |
99 | INIT_LIST_HEAD(&lc->lru); | 131 | INIT_LIST_HEAD(&lc->lru); |
100 | INIT_LIST_HEAD(&lc->free); | 132 | INIT_LIST_HEAD(&lc->free); |
133 | INIT_LIST_HEAD(&lc->to_be_changed); | ||
101 | 134 | ||
102 | lc->name = name; | 135 | lc->name = name; |
103 | lc->element_size = e_size; | 136 | lc->element_size = e_size; |
104 | lc->element_off = e_off; | 137 | lc->element_off = e_off; |
105 | lc->nr_elements = e_count; | 138 | lc->nr_elements = e_count; |
106 | lc->new_number = LC_FREE; | 139 | lc->max_pending_changes = max_pending_changes; |
107 | lc->lc_cache = cache; | 140 | lc->lc_cache = cache; |
108 | lc->lc_element = element; | 141 | lc->lc_element = element; |
109 | lc->lc_slot = slot; | 142 | lc->lc_slot = slot; |
@@ -117,6 +150,7 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, | |||
117 | e = p + e_off; | 150 | e = p + e_off; |
118 | e->lc_index = i; | 151 | e->lc_index = i; |
119 | e->lc_number = LC_FREE; | 152 | e->lc_number = LC_FREE; |
153 | e->lc_new_number = LC_FREE; | ||
120 | list_add(&e->list, &lc->free); | 154 | list_add(&e->list, &lc->free); |
121 | element[i] = e; | 155 | element[i] = e; |
122 | } | 156 | } |
@@ -175,15 +209,15 @@ void lc_reset(struct lru_cache *lc) | |||
175 | INIT_LIST_HEAD(&lc->in_use); | 209 | INIT_LIST_HEAD(&lc->in_use); |
176 | INIT_LIST_HEAD(&lc->lru); | 210 | INIT_LIST_HEAD(&lc->lru); |
177 | INIT_LIST_HEAD(&lc->free); | 211 | INIT_LIST_HEAD(&lc->free); |
212 | INIT_LIST_HEAD(&lc->to_be_changed); | ||
178 | lc->used = 0; | 213 | lc->used = 0; |
179 | lc->hits = 0; | 214 | lc->hits = 0; |
180 | lc->misses = 0; | 215 | lc->misses = 0; |
181 | lc->starving = 0; | 216 | lc->starving = 0; |
182 | lc->dirty = 0; | 217 | lc->locked = 0; |
183 | lc->changed = 0; | 218 | lc->changed = 0; |
219 | lc->pending_changes = 0; | ||
184 | lc->flags = 0; | 220 | lc->flags = 0; |
185 | lc->changing_element = NULL; | ||
186 | lc->new_number = LC_FREE; | ||
187 | memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); | 221 | memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); |
188 | 222 | ||
189 | for (i = 0; i < lc->nr_elements; i++) { | 223 | for (i = 0; i < lc->nr_elements; i++) { |
@@ -194,6 +228,7 @@ void lc_reset(struct lru_cache *lc) | |||
194 | /* re-init it */ | 228 | /* re-init it */ |
195 | e->lc_index = i; | 229 | e->lc_index = i; |
196 | e->lc_number = LC_FREE; | 230 | e->lc_number = LC_FREE; |
231 | e->lc_new_number = LC_FREE; | ||
197 | list_add(&e->list, &lc->free); | 232 | list_add(&e->list, &lc->free); |
198 | } | 233 | } |
199 | } | 234 | } |
@@ -208,14 +243,14 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) | |||
208 | /* NOTE: | 243 | /* NOTE: |
209 | * total calls to lc_get are | 244 | * total calls to lc_get are |
210 | * (starving + hits + misses) | 245 | * (starving + hits + misses) |
211 | * misses include "dirty" count (update from an other thread in | 246 | * misses include "locked" count (update from an other thread in |
212 | * progress) and "changed", when this in fact lead to an successful | 247 | * progress) and "changed", when this in fact lead to an successful |
213 | * update of the cache. | 248 | * update of the cache. |
214 | */ | 249 | */ |
215 | return seq_printf(seq, "\t%s: used:%u/%u " | 250 | return seq_printf(seq, "\t%s: used:%u/%u " |
216 | "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n", | 251 | "hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n", |
217 | lc->name, lc->used, lc->nr_elements, | 252 | lc->name, lc->used, lc->nr_elements, |
218 | lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed); | 253 | lc->hits, lc->misses, lc->starving, lc->locked, lc->changed); |
219 | } | 254 | } |
220 | 255 | ||
221 | static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) | 256 | static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) |
@@ -224,16 +259,8 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) | |||
224 | } | 259 | } |
225 | 260 | ||
226 | 261 | ||
227 | /** | 262 | static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr, |
228 | * lc_find - find element by label, if present in the hash table | 263 | bool include_changing) |
229 | * @lc: The lru_cache object | ||
230 | * @enr: element number | ||
231 | * | ||
232 | * Returns the pointer to an element, if the element with the requested | ||
233 | * "label" or element number is present in the hash table, | ||
234 | * or NULL if not found. Does not change the refcnt. | ||
235 | */ | ||
236 | struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) | ||
237 | { | 264 | { |
238 | struct hlist_node *n; | 265 | struct hlist_node *n; |
239 | struct lc_element *e; | 266 | struct lc_element *e; |
@@ -241,29 +268,48 @@ struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) | |||
241 | BUG_ON(!lc); | 268 | BUG_ON(!lc); |
242 | BUG_ON(!lc->nr_elements); | 269 | BUG_ON(!lc->nr_elements); |
243 | hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { | 270 | hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { |
244 | if (e->lc_number == enr) | 271 | /* "about to be changed" elements, pending transaction commit, |
272 | * are hashed by their "new number". "Normal" elements have | ||
273 | * lc_number == lc_new_number. */ | ||
274 | if (e->lc_new_number != enr) | ||
275 | continue; | ||
276 | if (e->lc_new_number == e->lc_number || include_changing) | ||
245 | return e; | 277 | return e; |
278 | break; | ||
246 | } | 279 | } |
247 | return NULL; | 280 | return NULL; |
248 | } | 281 | } |
249 | 282 | ||
250 | /* returned element will be "recycled" immediately */ | 283 | /** |
251 | static struct lc_element *lc_evict(struct lru_cache *lc) | 284 | * lc_find - find element by label, if present in the hash table |
285 | * @lc: The lru_cache object | ||
286 | * @enr: element number | ||
287 | * | ||
288 | * Returns the pointer to an element, if the element with the requested | ||
289 | * "label" or element number is present in the hash table, | ||
290 | * or NULL if not found. Does not change the refcnt. | ||
291 | * Ignores elements that are "about to be used", i.e. not yet in the active | ||
292 | * set, but still pending transaction commit. | ||
293 | */ | ||
294 | struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) | ||
252 | { | 295 | { |
253 | struct list_head *n; | 296 | return __lc_find(lc, enr, 0); |
254 | struct lc_element *e; | 297 | } |
255 | |||
256 | if (list_empty(&lc->lru)) | ||
257 | return NULL; | ||
258 | |||
259 | n = lc->lru.prev; | ||
260 | e = list_entry(n, struct lc_element, list); | ||
261 | |||
262 | PARANOIA_LC_ELEMENT(lc, e); | ||
263 | 298 | ||
264 | list_del(&e->list); | 299 | /** |
265 | hlist_del(&e->colision); | 300 | * lc_is_used - find element by label |
266 | return e; | 301 | * @lc: The lru_cache object |
302 | * @enr: element number | ||
303 | * | ||
304 | * Returns true, if the element with the requested "label" or element number is | ||
305 | * present in the hash table, and is used (refcnt > 0). | ||
306 | * Also finds elements that are not _currently_ used but only "about to be | ||
307 | * used", i.e. on the "to_be_changed" list, pending transaction commit. | ||
308 | */ | ||
309 | bool lc_is_used(struct lru_cache *lc, unsigned int enr) | ||
310 | { | ||
311 | struct lc_element *e = __lc_find(lc, enr, 1); | ||
312 | return e && e->refcnt; | ||
267 | } | 313 | } |
268 | 314 | ||
269 | /** | 315 | /** |
@@ -280,22 +326,34 @@ void lc_del(struct lru_cache *lc, struct lc_element *e) | |||
280 | PARANOIA_LC_ELEMENT(lc, e); | 326 | PARANOIA_LC_ELEMENT(lc, e); |
281 | BUG_ON(e->refcnt); | 327 | BUG_ON(e->refcnt); |
282 | 328 | ||
283 | e->lc_number = LC_FREE; | 329 | e->lc_number = e->lc_new_number = LC_FREE; |
284 | hlist_del_init(&e->colision); | 330 | hlist_del_init(&e->colision); |
285 | list_move(&e->list, &lc->free); | 331 | list_move(&e->list, &lc->free); |
286 | RETURN(); | 332 | RETURN(); |
287 | } | 333 | } |
288 | 334 | ||
289 | static struct lc_element *lc_get_unused_element(struct lru_cache *lc) | 335 | static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number) |
290 | { | 336 | { |
291 | struct list_head *n; | 337 | struct list_head *n; |
338 | struct lc_element *e; | ||
339 | |||
340 | if (!list_empty(&lc->free)) | ||
341 | n = lc->free.next; | ||
342 | else if (!list_empty(&lc->lru)) | ||
343 | n = lc->lru.prev; | ||
344 | else | ||
345 | return NULL; | ||
346 | |||
347 | e = list_entry(n, struct lc_element, list); | ||
348 | PARANOIA_LC_ELEMENT(lc, e); | ||
292 | 349 | ||
293 | if (list_empty(&lc->free)) | 350 | e->lc_new_number = new_number; |
294 | return lc_evict(lc); | 351 | if (!hlist_unhashed(&e->colision)) |
352 | __hlist_del(&e->colision); | ||
353 | hlist_add_head(&e->colision, lc_hash_slot(lc, new_number)); | ||
354 | list_move(&e->list, &lc->to_be_changed); | ||
295 | 355 | ||
296 | n = lc->free.next; | 356 | return e; |
297 | list_del(n); | ||
298 | return list_entry(n, struct lc_element, list); | ||
299 | } | 357 | } |
300 | 358 | ||
301 | static int lc_unused_element_available(struct lru_cache *lc) | 359 | static int lc_unused_element_available(struct lru_cache *lc) |
@@ -318,8 +376,12 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool | |||
318 | RETURN(NULL); | 376 | RETURN(NULL); |
319 | } | 377 | } |
320 | 378 | ||
321 | e = lc_find(lc, enr); | 379 | e = __lc_find(lc, enr, 1); |
322 | if (e) { | 380 | /* if lc_new_number != lc_number, |
381 | * this enr is currently being pulled in already, | ||
382 | * and will be available once the pending transaction | ||
383 | * has been committed. */ | ||
384 | if (e && e->lc_new_number == e->lc_number) { | ||
323 | ++lc->hits; | 385 | ++lc->hits; |
324 | if (e->refcnt++ == 0) | 386 | if (e->refcnt++ == 0) |
325 | lc->used++; | 387 | lc->used++; |
@@ -331,6 +393,24 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool | |||
331 | if (!may_change) | 393 | if (!may_change) |
332 | RETURN(NULL); | 394 | RETURN(NULL); |
333 | 395 | ||
396 | /* It has been found above, but on the "to_be_changed" list, not yet | ||
397 | * committed. Don't pull it in twice, wait for the transaction, then | ||
398 | * try again */ | ||
399 | if (e) | ||
400 | RETURN(NULL); | ||
401 | |||
402 | /* To avoid races with lc_try_lock(), first, mark us dirty | ||
403 | * (using test_and_set_bit, as it implies memory barriers), ... */ | ||
404 | test_and_set_bit(__LC_DIRTY, &lc->flags); | ||
405 | |||
406 | /* ... only then check if it is locked anyways. If lc_unlock clears | ||
407 | * the dirty bit again, that's not a problem, we will come here again. | ||
408 | */ | ||
409 | if (test_bit(__LC_LOCKED, &lc->flags)) { | ||
410 | ++lc->locked; | ||
411 | RETURN(NULL); | ||
412 | } | ||
413 | |||
334 | /* In case there is nothing available and we can not kick out | 414 | /* In case there is nothing available and we can not kick out |
335 | * the LRU element, we have to wait ... | 415 | * the LRU element, we have to wait ... |
336 | */ | 416 | */ |
@@ -339,24 +419,19 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool | |||
339 | RETURN(NULL); | 419 | RETURN(NULL); |
340 | } | 420 | } |
341 | 421 | ||
342 | /* it was not present in the active set. | 422 | /* It was not present in the active set. We are going to recycle an |
343 | * we are going to recycle an unused (or even "free") element. | 423 | * unused (or even "free") element, but we won't accumulate more than |
344 | * user may need to commit a transaction to record that change. | 424 | * max_pending_changes changes. */ |
345 | * we serialize on flags & LC_DIRTY */ | 425 | if (lc->pending_changes >= lc->max_pending_changes) |
346 | if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { | ||
347 | ++lc->dirty; | ||
348 | RETURN(NULL); | 426 | RETURN(NULL); |
349 | } | ||
350 | 427 | ||
351 | e = lc_get_unused_element(lc); | 428 | e = lc_prepare_for_change(lc, enr); |
352 | BUG_ON(!e); | 429 | BUG_ON(!e); |
353 | 430 | ||
354 | clear_bit(__LC_STARVING, &lc->flags); | 431 | clear_bit(__LC_STARVING, &lc->flags); |
355 | BUG_ON(++e->refcnt != 1); | 432 | BUG_ON(++e->refcnt != 1); |
356 | lc->used++; | 433 | lc->used++; |
357 | 434 | lc->pending_changes++; | |
358 | lc->changing_element = e; | ||
359 | lc->new_number = enr; | ||
360 | 435 | ||
361 | RETURN(e); | 436 | RETURN(e); |
362 | } | 437 | } |
@@ -388,12 +463,15 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool | |||
388 | * pointer to an UNUSED element with some different element number, | 463 | * pointer to an UNUSED element with some different element number, |
389 | * where that different number may also be %LC_FREE. | 464 | * where that different number may also be %LC_FREE. |
390 | * | 465 | * |
391 | * In this case, the cache is marked %LC_DIRTY (blocking further changes), | 466 | * In this case, the cache is marked %LC_DIRTY, |
392 | * and the returned element pointer is removed from the lru list and | 467 | * so lc_try_lock() will no longer succeed. |
393 | * hash collision chains. The user now should do whatever housekeeping | 468 | * The returned element pointer is moved to the "to_be_changed" list, |
394 | * is necessary. | 469 | * and registered with the new element number on the hash collision chains, |
395 | * Then he must call lc_changed(lc,element_pointer), to finish | 470 | * so it is possible to pick it up from lc_is_used(). |
396 | * the change. | 471 | * Up to "max_pending_changes" (see lc_create()) can be accumulated. |
472 | * The user now should do whatever housekeeping is necessary, | ||
473 | * typically serialize on lc_try_lock_for_transaction(), then call | ||
474 | * lc_committed(lc) and lc_unlock(), to finish the change. | ||
397 | * | 475 | * |
398 | * NOTE: The user needs to check the lc_number on EACH use, so he recognizes | 476 | * NOTE: The user needs to check the lc_number on EACH use, so he recognizes |
399 | * any cache set change. | 477 | * any cache set change. |
@@ -425,22 +503,25 @@ struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) | |||
425 | } | 503 | } |
426 | 504 | ||
427 | /** | 505 | /** |
428 | * lc_changed - tell @lc that the change has been recorded | 506 | * lc_committed - tell @lc that pending changes have been recorded |
429 | * @lc: the lru cache to operate on | 507 | * @lc: the lru cache to operate on |
430 | * @e: the element pending label change | 508 | * |
509 | * User is expected to serialize on explicit lc_try_lock_for_transaction() | ||
510 | * before the transaction is started, and later needs to lc_unlock() explicitly | ||
511 | * as well. | ||
431 | */ | 512 | */ |
432 | void lc_changed(struct lru_cache *lc, struct lc_element *e) | 513 | void lc_committed(struct lru_cache *lc) |
433 | { | 514 | { |
515 | struct lc_element *e, *tmp; | ||
516 | |||
434 | PARANOIA_ENTRY(); | 517 | PARANOIA_ENTRY(); |
435 | BUG_ON(e != lc->changing_element); | 518 | list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) { |
436 | PARANOIA_LC_ELEMENT(lc, e); | 519 | /* count number of changes, not number of transactions */ |
437 | ++lc->changed; | 520 | ++lc->changed; |
438 | e->lc_number = lc->new_number; | 521 | e->lc_number = e->lc_new_number; |
439 | list_add(&e->list, &lc->in_use); | 522 | list_move(&e->list, &lc->in_use); |
440 | hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); | 523 | } |
441 | lc->changing_element = NULL; | 524 | lc->pending_changes = 0; |
442 | lc->new_number = LC_FREE; | ||
443 | clear_bit_unlock(__LC_DIRTY, &lc->flags); | ||
444 | RETURN(); | 525 | RETURN(); |
445 | } | 526 | } |
446 | 527 | ||
@@ -459,7 +540,7 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) | |||
459 | PARANOIA_ENTRY(); | 540 | PARANOIA_ENTRY(); |
460 | PARANOIA_LC_ELEMENT(lc, e); | 541 | PARANOIA_LC_ELEMENT(lc, e); |
461 | BUG_ON(e->refcnt == 0); | 542 | BUG_ON(e->refcnt == 0); |
462 | BUG_ON(e == lc->changing_element); | 543 | BUG_ON(e->lc_number != e->lc_new_number); |
463 | if (--e->refcnt == 0) { | 544 | if (--e->refcnt == 0) { |
464 | /* move it to the front of LRU. */ | 545 | /* move it to the front of LRU. */ |
465 | list_move(&e->list, &lc->lru); | 546 | list_move(&e->list, &lc->lru); |
@@ -504,16 +585,24 @@ unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e) | |||
504 | void lc_set(struct lru_cache *lc, unsigned int enr, int index) | 585 | void lc_set(struct lru_cache *lc, unsigned int enr, int index) |
505 | { | 586 | { |
506 | struct lc_element *e; | 587 | struct lc_element *e; |
588 | struct list_head *lh; | ||
507 | 589 | ||
508 | if (index < 0 || index >= lc->nr_elements) | 590 | if (index < 0 || index >= lc->nr_elements) |
509 | return; | 591 | return; |
510 | 592 | ||
511 | e = lc_element_by_index(lc, index); | 593 | e = lc_element_by_index(lc, index); |
512 | e->lc_number = enr; | 594 | BUG_ON(e->lc_number != e->lc_new_number); |
595 | BUG_ON(e->refcnt != 0); | ||
513 | 596 | ||
597 | e->lc_number = e->lc_new_number = enr; | ||
514 | hlist_del_init(&e->colision); | 598 | hlist_del_init(&e->colision); |
515 | hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); | 599 | if (enr == LC_FREE) |
516 | list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru); | 600 | lh = &lc->free; |
601 | else { | ||
602 | hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); | ||
603 | lh = &lc->lru; | ||
604 | } | ||
605 | list_move(&e->list, lh); | ||
517 | } | 606 | } |
518 | 607 | ||
519 | /** | 608 | /** |
@@ -553,8 +642,10 @@ EXPORT_SYMBOL(lc_try_get); | |||
553 | EXPORT_SYMBOL(lc_find); | 642 | EXPORT_SYMBOL(lc_find); |
554 | EXPORT_SYMBOL(lc_get); | 643 | EXPORT_SYMBOL(lc_get); |
555 | EXPORT_SYMBOL(lc_put); | 644 | EXPORT_SYMBOL(lc_put); |
556 | EXPORT_SYMBOL(lc_changed); | 645 | EXPORT_SYMBOL(lc_committed); |
557 | EXPORT_SYMBOL(lc_element_by_index); | 646 | EXPORT_SYMBOL(lc_element_by_index); |
558 | EXPORT_SYMBOL(lc_index_of); | 647 | EXPORT_SYMBOL(lc_index_of); |
559 | EXPORT_SYMBOL(lc_seq_printf_stats); | 648 | EXPORT_SYMBOL(lc_seq_printf_stats); |
560 | EXPORT_SYMBOL(lc_seq_dump_details); | 649 | EXPORT_SYMBOL(lc_seq_dump_details); |
650 | EXPORT_SYMBOL(lc_try_lock); | ||
651 | EXPORT_SYMBOL(lc_is_used); | ||