[DRBD-cvs] drbd by phil; Filly fixed the "lc_get() failed!" Bug. ...
drbd-user@lists.linbit.com
drbd-user@lists.linbit.com
Mon, 3 May 2004 17:00:38 +0200 (CEST)
DRBD CVS committal
Author : phil
Module : drbd
Dir : drbd/drbd
Modified Files:
Tag: rel-0_7-branch
drbd_actlog.c lru_cache.c lru_cache.h
Log Message:
Filly fixed the "lc_get() failed!" Bug. What I did not see for all
the time is:
We have a ussage pattern on an LRU cache where some elements have
a high reference count, but are otherwise idle.
A lot of other elements have a reference count of 0 but, and get'ed
and put'ed very frequently
-=>
The elements with the low reference count (actually 0 most of the time)
move to the front of the LRU list. The idle elements move to the back
of the LRU list.
Now, we have a third list: in_use
All elements with positive reference counts are on the in_use list.
All elements with a reference count of zero are on the LRU list, and may
be evicted.
Initially all elements are on the 'free' list. (And if something is
forced out with lc_del() it also goes to the 'free' list.)
No more "lc_get() failed!" messages!!! Yes!! Yes!
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/Attic/drbd_actlog.c,v
retrieving revision 1.1.2.94
retrieving revision 1.1.2.95
diff -u -3 -r1.1.2.94 -r1.1.2.95
--- drbd_actlog.c 29 Apr 2004 14:43:26 -0000 1.1.2.94
+++ drbd_actlog.c 3 May 2004 15:00:33 -0000 1.1.2.95
@@ -640,11 +640,6 @@
// a 16 MB extent border. (Currently this is true...)
enr = (sector >> (BM_EXTENT_SIZE_B-9));
- /*
- INFO("%s [%d]:%s(,%ld,%d)\n",
- current->comm, current->pid, __func__,
- sector, cleared);
- */
spin_lock_irqsave(&mdev->al_lock,flags);
ext = (struct bm_extent *) lc_get(mdev->resync,enr);
if (ext) {
@@ -656,24 +651,19 @@
// This element should be in the cache
// since drbd_rs_begin_io() pulled it already in.
ext->rs_left = bm_count_sectors(mdev->mbds_id,enr);
- //DUMPI(ext->lce.lc_number);
- //DUMPI(mdev->resync->new_number);
lc_changed(mdev->resync,&ext->lce);
}
lc_put(mdev->resync,&ext->lce);
} else {
- ERR("lc_get() failed! Probabely something stays"
- " dirty in the on disk BM. (resync LRU too small) \n");
- ERR("resync_locked=%d nr_elements=%d\n",
- atomic_read(&mdev->resync_locked),
- mdev->resync->nr_elements);
- ERR("flags=%lx\n",mdev->resync->flags);
+ ERR("lc_get() failed! locked=%d/%d flags=%lu\n",
+ atomic_read(&mdev->resync_locked),
+ mdev->resync->nr_elements,
+ mdev->resync->flags);
}
list_for_each_safe(le,tmp,&mdev->resync->lru) {
ext=(struct bm_extent *)list_entry(le,struct lc_element,list);
if(ext->rs_left == 0) {
- ERR_IF(ext->lce.refcnt) continue;
udw=kmalloc(sizeof(*udw),GFP_ATOMIC);
if(!udw) {
WARN("Could not kmalloc an udw\n");
@@ -723,7 +713,7 @@
unsigned long rs_flags;
if(atomic_read(&mdev->resync_locked) > mdev->resync->nr_elements-3 ) {
- ERR("bme_get() does not lock all elements\n");
+ //WARN("bme_get() does not lock all elements\n");
return 0;
}
@@ -731,11 +721,8 @@
bm_ext = (struct bm_extent*) lc_get(mdev->resync,enr);
if (bm_ext) {
if(bm_ext->lce.lc_number != enr) {
+ atomic_inc(&mdev->resync_locked);
bm_ext->rs_left = bm_count_sectors(mdev->mbds_id,enr);
- /*
- DUMPI(bm_ext->lce.lc_number);
- DUMPI(mdev->resync->new_number);
- */
lc_changed(mdev->resync,(struct lc_element*)bm_ext);
wake_up(&mdev->al_wait);
}
@@ -795,8 +782,6 @@
wait_event(mdev->al_wait, (bm_ext = _bme_get(mdev,enr)) );
if(test_bit(BME_LOCKED,&bm_ext->flags)) return;
-
- atomic_inc(&mdev->resync_locked);
for(i=0;i<SM;i++) {
wait_event(mdev->al_wait, !_is_in_al(mdev,enr*SM+i) );
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/Attic/lru_cache.c,v
retrieving revision 1.1.2.24
retrieving revision 1.1.2.25
diff -u -3 -r1.1.2.24 -r1.1.2.25
--- lru_cache.c 7 Mar 2004 20:31:31 -0000 1.1.2.24
+++ lru_cache.c 3 May 2004 15:00:33 -0000 1.1.2.25
@@ -36,12 +36,6 @@
#define PARANOIA_LEAVE() do { clear_bit(__LC_PARANOIA,&lc->flags); smp_mb__after_clear_bit(); } while (0)
#define RETURN(x...) do { PARANOIA_LEAVE(); return x ; } while (0)
-static inline void lc_touch(struct lru_cache *lc,struct lc_element *e)
-{
- // XXX paranoia: !list_empty(lru) && list_empty(free)
- list_move(&e->list,&lc->lru);
-}
-
/**
* lc_alloc: allocates memory for @e_count objects of @e_size bytes plus the
* struct lru_cache, and the hash table slots.
@@ -62,6 +56,7 @@
lc = vmalloc(bytes);
memset(lc, 0, bytes);
if (lc) {
+ INIT_LIST_HEAD(&lc->in_use);
INIT_LIST_HEAD(&lc->lru);
INIT_LIST_HEAD(&lc->free);
lc->element_size = e_size;
@@ -117,11 +112,11 @@
struct list_head *n;
struct lc_element *e;
+ if (list_empty(&lc->lru)) return 0;
+
n=lc->lru.prev;
e=list_entry(n, struct lc_element,list);
- if (e->refcnt) return NULL; // Dead code ?
-
list_del(&e->list);
hlist_del(&e->colision);
return e;
@@ -160,15 +155,10 @@
STATIC int lc_unused_element_available(struct lru_cache* lc)
{
- struct list_head *n;
- struct lc_element *e;
-
if (!list_empty(&lc->free)) return 1; // something on the free list
- n=lc->lru.prev;
- e=list_entry(n, struct lc_element,list);
+ if (!list_empty(&lc->lru)) return 1; // something to evict
- if (e->refcnt) return 0; // the LRU element is still in use
- return 1; // we can evict the LRU element
+ return 0;
}
@@ -212,7 +202,7 @@
e = lc_find(lc, enr);
if (e) {
++e->refcnt;
- lc_touch(lc,e);
+ list_move(&e->list,&lc->in_use); // Not evictable...
RETURN(e);
}
@@ -247,7 +237,7 @@
PARANOIA_ENTRY();
BUG_ON(e != lc->changing_element);
e->lc_number = lc->new_number;
- list_add(&e->list,&lc->lru);
+ list_add(&e->list,&lc->in_use);
hlist_add_head( &e->colision, lc->slot + lc_hash_fn(lc, lc->new_number) );
lc->changing_element = NULL;
lc->new_number = -1;
@@ -266,8 +256,9 @@
PARANOIA_ENTRY();
BUG_ON(e->refcnt == 0);
if ( --e->refcnt == 0) {
+ list_move(&e->list,&lc->lru); // move it to the front of LRU.
clear_bit(__LC_STARVING,&lc->flags);
- smp_mb__after_clear_bit();
+ smp_mb__after_clear_bit();
}
RETURN(e->refcnt);
}
@@ -291,6 +282,6 @@
hlist_del_init(&e->colision);
hlist_add_head( &e->colision, lc->slot + lc_hash_fn(lc,enr) );
- lc_touch(lc,e); // to make sure that his entry is not on the free list.
+ list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru);
}
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/Attic/lru_cache.h,v
retrieving revision 1.1.2.15
retrieving revision 1.1.2.16
diff -u -3 -r1.1.2.15 -r1.1.2.16
--- lru_cache.h 17 Feb 2004 09:10:35 -0000 1.1.2.15
+++ lru_cache.h 3 May 2004 15:00:33 -0000 1.1.2.16
@@ -61,6 +61,7 @@
struct lru_cache {
struct list_head lru;
struct list_head free;
+ struct list_head in_use;
size_t element_size;
unsigned int nr_elements;
unsigned int new_number;