[DRBD-user] 2.6.14-1.1656_FC4 kernel with drbd hangs

Todd Denniston Todd.Denniston at ssa.crane.navy.mil
Mon Jan 30 15:57:59 CET 2006

Note: "permalinks" may not be as permanent as we would like,
direct links of old sources may well be a few messages off.


Philipp Reisner wrote:
> Am Dienstag, 24. Januar 2006 02:38 schrieb Todd Denniston:
> 
>>I have found a way to consistently cause the hang with 2.6.14-1.1656_FC4 &
>>drbd, and I believe I have enough results to say the fault is related to
>>Neil Brown's "reduce stack consumption" patch.  
<SNIP>
> Could you please provide feedback, if this patch (applies to drbd-0.7.15)
> fixes the problem for you ?
> 

My script can no longer cause a lockup once I apply the patch to 0.7.15, using 
2.6.14-1.1656_FC4smp or 2.6.14-1.1656_FC4, even repeated runs don't bring it down.

  From this I would mark the patch as good.

Thanks.
-Todd

> -Philipp
> 
> 
> ------------------------------------------------------------------------
> 
> Index: drbd/drbd_actlog.c
> ===================================================================
> --- drbd/drbd_actlog.c	(revision 2054)
> +++ drbd/drbd_actlog.c	(working copy)
> @@ -191,8 +191,15 @@
>  	unsigned int enr;
>  };
>  
> -STATIC void drbd_al_write_transaction(struct Drbd_Conf *,struct lc_element *,
> -				      unsigned int );
> +struct update_al_work {
> +	struct drbd_work w;
> +	struct lc_element * al_ext;
> +	struct completion event;
> +	unsigned int enr;
> +};
> +
> +STATIC int w_al_write_transaction(struct Drbd_Conf *, struct drbd_work *, int);
> +
>  static inline
>  struct lc_element* _al_get(struct Drbd_Conf *mdev, unsigned int enr)
>  {
> @@ -229,6 +236,7 @@
>  {
>  	unsigned int enr = (sector >> (AL_EXTENT_SIZE_B-9));
>  	struct lc_element *al_ext;
> +	struct update_al_work al_work;
>  
>  	D_ASSERT(atomic_read(&mdev->local_cnt)>0);
>  	wait_event(mdev->al_wait, (al_ext = _al_get(mdev,enr)) );
> @@ -242,7 +250,19 @@
>  		if(mdev->cstate < Connected && evicted != LC_FREE ) {
>  			drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT );
>  		}
> -		drbd_al_write_transaction(mdev,al_ext,enr);
> +
> +		/* drbd_al_write_transaction(mdev,al_ext,enr);
> +		   generic_make_request() are serialized on the 
> +		   current->bio_tail list now. Therefore we have
> +		   to deligate writing something to AL to the
> +		   worker thread. */
> +		init_completion(&al_work.event);
> +		al_work.al_ext = al_ext;
> +		al_work.enr = enr;
> +		al_work.w.cb = w_al_write_transaction;
> +		drbd_queue_work_front(mdev,&mdev->data.work,&al_work.w);
> +		wait_for_completion(&al_work.event);
> +		
>  		mdev->al_writ_cnt++;
>  
>  		/*
> @@ -279,9 +299,8 @@
>  	spin_unlock_irqrestore(&mdev->al_lock,flags);
>  }
>  
> -STATIC void
> -drbd_al_write_transaction(struct Drbd_Conf *mdev,struct lc_element *updated,
> -			  unsigned int new_enr)
> +STATIC int
> +w_al_write_transaction(struct Drbd_Conf *mdev, struct drbd_work *w, int unused)
>  {
>  	int i,n,mx;
>  	unsigned int extent_nr;
> @@ -289,6 +308,9 @@
>  	sector_t sector;
>  	u32 xor_sum=0;
>  
> +	struct lc_element *updated = ((struct update_al_work*)w)->al_ext;
> +	unsigned int new_enr = ((struct update_al_work*)w)->enr;
> +
>  	down(&mdev->md_io_mutex); // protects md_io_buffer, al_tr_cycle, ...
>  	buffer = (struct al_transaction*)page_address(mdev->md_io_page);
>  
> @@ -340,6 +362,10 @@
>  	mdev->al_tr_number++;
>  
>  	up(&mdev->md_io_mutex);
> +
> +	complete(&((struct update_al_work*)w)->event);
> +
> +	return 1;
>  }
>  
>  STATIC int drbd_al_read_tr(struct Drbd_Conf *mdev,
> 
> 
> ------------------------------------------------------------------------
> 



More information about the drbd-user mailing list