diff options
Diffstat (limited to 'fs/btrfs')
| -rw-r--r-- | fs/btrfs/raid56.c | 279 | 
1 files changed, 193 insertions, 86 deletions
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index e14968a37c08..fd67ba4648c9 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1014,6 +1014,36 @@ static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)  }  /* + * Return the total numer of errors found in the vertical stripe of @sector_nr. + * + * @faila and @failb will also be updated to the first and second stripe + * number of the errors. + */ +static int get_rbio_veritical_errors(struct btrfs_raid_bio *rbio, int sector_nr, +				     int *faila, int *failb) +{ +	int stripe_nr; +	int found_errors = 0; + +	ASSERT(faila && failb); +	*faila = -1; +	*failb = -1; + +	for (stripe_nr = 0; stripe_nr < rbio->real_stripes; stripe_nr++) { +		int total_sector_nr = stripe_nr * rbio->stripe_nsectors + sector_nr; + +		if (test_bit(total_sector_nr, rbio->error_bitmap)) { +			found_errors++; +			if (*faila < 0) +				*faila = stripe_nr; +			else if (*failb < 0) +				*failb = stripe_nr; +		} +	} +	return found_errors; +} + +/*   * Add a single sector @sector into our list of bios for IO.   *   * Return 0 if everything went well. @@ -1740,14 +1770,15 @@ fail:   * @*pointers are the pre-allocated pointers by the caller, so we don't   * need to allocate/free the pointers again and again.   */ -static void recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr, -			     void **pointers, void **unmap_array) +static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr, +			    void **pointers, void **unmap_array)  {  	struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;  	struct sector_ptr *sector;  	const u32 sectorsize = fs_info->sectorsize; -	const int faila = rbio->faila; -	const int failb = rbio->failb; +	int found_errors; +	int faila; +	int failb;  	int stripe_nr;  	/* @@ -1756,7 +1787,19 @@ static void recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,  	 */  	if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB &&  	    !test_bit(sector_nr, &rbio->dbitmap)) -		return; +		return 0; + +	found_errors = get_rbio_veritical_errors(rbio, sector_nr, &faila, +						 &failb); +	/* +	 * No errors in the veritical stripe, skip it.  Can happen for recovery +	 * which only part of a stripe failed csum check. +	 */ +	if (!found_errors) +		return 0; + +	if (found_errors > rbio->bioc->max_errors) +		return -EIO;  	/*  	 * Setup our array of pointers with sectors from each stripe @@ -1766,12 +1809,11 @@ static void recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,  	 */  	for (stripe_nr = 0; stripe_nr < rbio->real_stripes; stripe_nr++) {  		/* -		 * If we're rebuilding a read, we have to use -		 * pages from the bio list +		 * If we're rebuilding a read, we have to use pages from the +		 * bio list if possible.  		 */  		if ((rbio->operation == BTRFS_RBIO_READ_REBUILD || -		     rbio->operation == BTRFS_RBIO_REBUILD_MISSING) && -		    (stripe_nr == faila || stripe_nr == failb)) { +		     rbio->operation == BTRFS_RBIO_REBUILD_MISSING)) {  			sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0);  		} else {  			sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr); @@ -1859,18 +1901,19 @@ pstripe:  	 * Especially if we determine to cache the rbio, we need to  	 * have at least all data sectors uptodate.  	 */ -	if (rbio->faila >= 0) { -		sector = rbio_stripe_sector(rbio, rbio->faila, sector_nr); +	if (faila >= 0) { +		sector = rbio_stripe_sector(rbio, faila, sector_nr);  		sector->uptodate = 1;  	} -	if (rbio->failb >= 0) { -		sector = rbio_stripe_sector(rbio, rbio->failb, sector_nr); +	if (failb >= 0) { +		sector = rbio_stripe_sector(rbio, failb, sector_nr);  		sector->uptodate = 1;  	}  cleanup:  	for (stripe_nr = rbio->real_stripes - 1; stripe_nr >= 0; stripe_nr--)  		kunmap_local(unmap_array[stripe_nr]); +	return 0;  }  static int recover_sectors(struct btrfs_raid_bio *rbio) @@ -1893,10 +1936,6 @@ static int recover_sectors(struct btrfs_raid_bio *rbio)  		goto out;  	} -	/* Make sure faila and fail b are in order. */ -	if (rbio->faila >= 0 && rbio->failb >= 0 && rbio->faila > rbio->failb) -		swap(rbio->faila, rbio->failb); -  	if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||  	    rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {  		spin_lock_irq(&rbio->bio_list_lock); @@ -1906,8 +1945,11 @@ static int recover_sectors(struct btrfs_raid_bio *rbio)  	index_rbio_pages(rbio); -	for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) -		recover_vertical(rbio, sectornr, pointers, unmap_array); +	for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) { +		ret = recover_vertical(rbio, sectornr, pointers, unmap_array); +		if (ret < 0) +			break; +	}  out:  	kfree(pointers); @@ -1937,13 +1979,21 @@ static int recover_assemble_read_bios(struct btrfs_raid_bio *rbio,  		int sectornr = total_sector_nr % rbio->stripe_nsectors;  		struct sector_ptr *sector; -		if (rbio->faila == stripe || rbio->failb == stripe) { -			/* Skip the current stripe. */ -			ASSERT(sectornr == 0); -			total_sector_nr += rbio->stripe_nsectors - 1; -			atomic_inc(&rbio->error); +		/* +		 * Skip the range which has error.  It can be a range which is +		 * marked error (for csum mismatch), or it can be a missing +		 * device. +		 */ +		if (!rbio->bioc->stripes[stripe].dev->bdev || +		    test_bit(total_sector_nr, rbio->error_bitmap)) { +			/* +			 * Also set the error bit for missing device, which +			 * may not yet have its error bit set. +			 */ +			set_bit(total_sector_nr, rbio->error_bitmap);  			continue;  		} +  		sector = rbio_stripe_sector(rbio, stripe, sectornr);  		ret = rbio_add_io_sector(rbio, bio_list, sector, stripe,  					 sectornr, REQ_OP_READ); @@ -1966,9 +2016,8 @@ static int recover_rbio(struct btrfs_raid_bio *rbio)  	/*  	 * Either we're doing recover for a read failure or degraded write, -	 * caller should have set faila/b and error bitmap correctly. +	 * caller should have set error bitmap correctly.  	 */ -	ASSERT(rbio->faila >= 0 || rbio->failb >= 0);  	ASSERT(bitmap_weight(rbio->error_bitmap, rbio->nr_sectors));  	bio_list_init(&bio_list); @@ -1992,12 +2041,6 @@ static int recover_rbio(struct btrfs_raid_bio *rbio)  	submit_read_bios(rbio, &bio_list);  	wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0); -	/* We have more errors than our tolerance during the read. */ -	if (atomic_read(&rbio->error) > rbio->bioc->max_errors) { -		ret = -EIO; -		goto out; -	} -  	ret = recover_sectors(rbio);  out: @@ -2032,6 +2075,51 @@ static void recover_rbio_work_locked(struct work_struct *work)  	rbio_orig_end_io(rbio, errno_to_blk_status(ret));  } +static void set_rbio_raid6_extra_error(struct btrfs_raid_bio *rbio, int mirror_num) +{ +	bool found = false; +	int sector_nr; + +	/* +	 * This is for RAID6 extra recovery tries, thus mirror number should +	 * be large than 2. +	 * Mirror 1 means read from data stripes. Mirror 2 means rebuild using +	 * RAID5 methods. +	 */ +	ASSERT(mirror_num > 2); +	for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) { +		int found_errors; +		int faila; +		int failb; + +		found_errors = get_rbio_veritical_errors(rbio, sector_nr, +							 &faila, &failb); +		/* This vertical stripe doesn't have errors. */ +		if (!found_errors) +			continue; + +		/* +		 * If we found errors, there should be only one error marked +		 * by previous set_rbio_range_error(). +		 */ +		ASSERT(found_errors == 1); +		found = true; + +		/* Now select another stripe to mark as error. */ +		failb = rbio->real_stripes - (mirror_num - 1); +		if (failb <= faila) +			failb--; + +		/* Set the extra bit in error bitmap. */ +		if (failb >= 0) +			set_bit(failb * rbio->stripe_nsectors + sector_nr, +				rbio->error_bitmap); +	} + +	/* We should found at least one vertical stripe with error.*/ +	ASSERT(found); +} +  /*   * the main entry point for reads from the higher layers.  This   * is really only called when the normal read path had a failure, @@ -2074,11 +2162,7 @@ void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,  	 * for 'mirror_num > 2', select a stripe to fail on every retry.  	 */  	if (mirror_num > 2) { -		/* -		 * 'mirror == 3' is to fail the p stripe and -		 * reconstruct from the q stripe.  'mirror > 3' is to -		 * fail a data stripe and reconstruct from p+q stripe. -		 */ +		set_rbio_raid6_extra_error(rbio, mirror_num);  		rbio->failb = rbio->real_stripes - (mirror_num - 1);  		ASSERT(rbio->failb > 0);  		if (rbio->failb <= rbio->faila) @@ -2507,48 +2591,85 @@ static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)  static int recover_scrub_rbio(struct btrfs_raid_bio *rbio)  { -	int dfail = 0, failp = -1; +	void **pointers = NULL; +	void **unmap_array = NULL; +	int sector_nr;  	int ret; -	/* No error case should be already handled by the caller. */ -	ASSERT(rbio->faila >= 0 || rbio->failb >= 0); +	/* +	 * @pointers array stores the pointer for each sector. +	 * +	 * @unmap_array stores copy of pointers that does not get reordered +	 * during reconstruction so that kunmap_local works. +	 */ +	pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS); +	unmap_array = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS); +	if (!pointers || !unmap_array) { +		ret = -ENOMEM; +		goto out; +	} -	if (is_data_stripe(rbio, rbio->faila)) -		dfail++; -	else if (is_parity_stripe(rbio->faila)) -		failp = rbio->faila; +	for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) { +		int dfail = 0, failp = -1; +		int faila; +		int failb; +		int found_errors; -	if (is_data_stripe(rbio, rbio->failb)) -		dfail++; -	else if (is_parity_stripe(rbio->failb)) -		failp = rbio->failb; +		found_errors = get_rbio_veritical_errors(rbio, sector_nr, +							 &faila, &failb); +		if (found_errors > rbio->bioc->max_errors) { +			ret = -EIO; +			goto out; +		} +		if (found_errors == 0) +			continue; -	/* -	 * Because we can not use a scrubbing parity to repair -	 * the data, so the capability of the repair is declined. -	 * (In the case of RAID5, we can not repair anything) -	 */ -	if (dfail > rbio->bioc->max_errors - 1) -		return -EIO; +		/* We should have at least one error here. */ +		ASSERT(faila >= 0 || failb >= 0); -	/* -	 * If all data is good, only parity is correctly, just -	 * repair the parity. -	 */ -	if (dfail == 0) -		return 0; +		if (is_data_stripe(rbio, faila)) +			dfail++; +		else if (is_parity_stripe(faila)) +			failp = faila; -	/* -	 * Here means we got one corrupted data stripe and one -	 * corrupted parity on RAID6, if the corrupted parity -	 * is scrubbing parity, luckily, use the other one to repair -	 * the data, or we can not repair the data stripe. -	 */ -	if (failp != rbio->scrubp) -		return -EIO; +		if (is_data_stripe(rbio, failb)) +			dfail++; +		else if (is_parity_stripe(failb)) +			failp = failb; +		/* +		 * Because we can not use a scrubbing parity to repair the +		 * data, so the capability of the repair is declined.  (In the +		 * case of RAID5, we can not repair anything.) +		 */ +		if (dfail > rbio->bioc->max_errors - 1) { +			ret = -EIO; +			goto out; +		} +		/* +		 * If all data is good, only parity is correctly, just repair +		 * the parity, no need to recover data stripes. +		 */ +		if (dfail == 0) +			continue; -	/* We have some corrupted sectors, need to repair them. */ -	ret = recover_sectors(rbio); +		/* +		 * Here means we got one corrupted data stripe and one +		 * corrupted parity on RAID6, if the corrupted parity is +		 * scrubbing parity, luckily, use the other one to repair the +		 * data, or we can not repair the data stripe. +		 */ +		if (failp != rbio->scrubp) { +			ret = -EIO; +			goto out; +		} + +		ret = recover_vertical(rbio, sector_nr, pointers, unmap_array); +		if (ret < 0) +			goto out; +	} +out: +	kfree(pointers); +	kfree(unmap_array);  	return ret;  } @@ -2624,25 +2745,11 @@ static int scrub_rbio(struct btrfs_raid_bio *rbio)  	submit_read_bios(rbio, &bio_list);  	wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0); -	if (atomic_read(&rbio->error) > rbio->bioc->max_errors) { -		ret = -EIO; -		goto cleanup; -	} -	/* -	 * No error during read, can finish the scrub and need to verify the -	 * P/Q sectors; -	 */ -	if (atomic_read(&rbio->error) == 0) { -		need_check = true; -		goto finish; -	} - -	/* We have some failures, need to recover the failed sectors first. */ +	/* We may have some failures, recover the failed sectors first. */  	ret = recover_scrub_rbio(rbio);  	if (ret < 0)  		goto cleanup; -finish:  	/*  	 * We have every sector properly prepared. Can finish the scrub  	 * and writeback the good content.  | 
