Gitweb:
http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=c194015e07a...
Commit: c194015e07a134ec26551fdd65d3463696136628
Parent: 0c5ff64499bbb510a83afab6a7180185e08060f9
Author: Bob Peterson <rpeterso(a)redhat.com>
AuthorDate: Thu Jul 19 08:27:18 2012 -0500
Committer: Bob Peterson <rpeterso(a)redhat.com>
CommitterDate: Thu Aug 2 07:42:21 2012 -0500
gfs_fsck: Back-port some fixes from fsck.gfs2
This patch back-ports some patches from fsck.gfs2 to gfs_fsck
to allow gfs_fsck to better detect and fix file systems that have
invalid indirect metadata blocks.
rhbz#807882
---
gfs/gfs_fsck/bio.c | 3 -
gfs/gfs_fsck/metawalk.c | 26 ++++++----
gfs/gfs_fsck/pass1.c | 132 ++++++++++++++++++++++++++++++++++++++++++++--
gfs/gfs_fsck/util.c | 7 +--
4 files changed, 144 insertions(+), 24 deletions(-)
diff --git a/gfs/gfs_fsck/bio.c b/gfs/gfs_fsck/bio.c
index cd6833c..bf743e7 100644
--- a/gfs/gfs_fsck/bio.c
+++ b/gfs/gfs_fsck/bio.c
@@ -134,9 +134,6 @@ int write_buf(struct fsck_sb *sdp, osi_buf_t *bh, int flags){
return -1;
}
- log_debug("Writing to %"PRIu64" - %"PRIu64" %u\n",
- (uint64)(BH_BLKNO(bh) * BH_SIZE(bh)),
- BH_BLKNO(bh), BH_SIZE(bh));
if(do_write(disk_fd, BH_DATA(bh), BH_SIZE(bh))) {
log_err("Unable to write %u bytes to position %"PRIu64"\n",
BH_SIZE(bh), (uint64)(BH_BLKNO(bh) * BH_SIZE(bh)));
diff --git a/gfs/gfs_fsck/metawalk.c b/gfs/gfs_fsck/metawalk.c
index e14d4dc..e485428 100644
--- a/gfs/gfs_fsck/metawalk.c
+++ b/gfs/gfs_fsck/metawalk.c
@@ -664,7 +664,7 @@ static int build_metalist(struct fsck_inode *ip, osi_list_t *mlp,
osi_list_t *prev_list, *cur_list, *tmp;
int i, head_size;
uint64 *ptr, block;
- int err;
+ int error = 0, err;
if(get_and_read_buf(ip->i_sbd, ip->i_di.di_num.no_addr, &metabh)) {
stack;
@@ -708,13 +708,22 @@ static int build_metalist(struct fsck_inode *ip, osi_list_t *mlp,
pass->private);
if(err < 0) {
stack;
+ error = err;
goto fail;
}
if(err > 0) {
+ if (!error)
+ error = err;
log_debug("Skipping block %"PRIu64
"\n", block);
continue;
}
+ if (check_range(ip->i_sbd, block)) {
+ log_debug("Skipping invalid block "
+ "%lld\n",
+ (unsigned long long)block);
+ continue;
+ }
if(!nbh) {
if(get_and_read_buf(ip->i_sbd, block,
&nbh)) {
@@ -726,7 +735,7 @@ static int build_metalist(struct fsck_inode *ip, osi_list_t *mlp,
}
}
}
- return 0;
+ return error;
fail:
for (i = 0; i < GFS_MAX_META_HEIGHT; i++) {
@@ -739,9 +748,8 @@ static int build_metalist(struct fsck_inode *ip, osi_list_t *mlp,
relse_buf(ip->i_sbd, bh);
}
}
- /* This is an error path, so we need to release the buffer here: */
- relse_buf(ip->i_sbd, metabh);
- return -1;
+ /* No need to relse_buf on metabh because it's in the list */
+ return error;
}
/**
@@ -769,9 +777,10 @@ int check_metatree(struct fsck_inode *ip, struct metawalk_fxns
*pass)
osi_list_init(&metalist[i]);
/* create metalist for each level */
- if(build_metalist(ip, &metalist[0], pass)){
+ error = build_metalist(ip, &metalist[0], pass);
+ if (error) {
stack;
- return -1;
+ return error;
}
/* We don't need to record directory blocks - they will be
@@ -1072,9 +1081,6 @@ int delete_blocks(struct fsck_inode *ip, uint64_t block, osi_buf_t
**bh,
if (block_check(ip->i_sbd->bl, block, &q))
return 0;
if (!q.dup_block) {
- log_info("Deleting %s block %lld as part "
- "of inode %lld \n",
- btype, block, ip->i_di.di_num.no_addr);
block_set(ip->i_sbd->bl, block, block_free);
free_block(ip->i_sbd, block);
}
diff --git a/gfs/gfs_fsck/pass1.c b/gfs/gfs_fsck/pass1.c
index 4b9de02..a5cac7b 100644
--- a/gfs/gfs_fsck/pass1.c
+++ b/gfs/gfs_fsck/pass1.c
@@ -35,6 +35,8 @@
#include "link.h"
#include "metawalk.h"
+#define BAD_POINTER_TOLERANCE 10
+
struct block_count {
uint64_t indir_count;
uint64_t data_count;
@@ -101,6 +103,8 @@ static int check_metalist(struct fsck_inode *ip, uint64_t block,
*bh = NULL;
+ if (block == 135116188)
+ printf("here.");
if (check_range(ip->i_sbd, block)){ /* blk outside of FS */
block_set(sdp->bl, ip->i_di.di_num.no_addr, bad_block);
log_debug("Bad indirect block pointer (out of range).\n");
@@ -120,8 +124,11 @@ static int check_metalist(struct fsck_inode *ip, uint64_t block,
get_and_read_buf(ip->i_sbd, block, &nbh);
if (check_meta(nbh, GFS_METATYPE_IN)){
- log_debug("Bad indirect block pointer (points to "
- "something that is not an indirect block).\n");
+ log_err("Inode %lld has a bad indirect block pointer %lld "
+ "(points to something that is not an "
+ "indirect block).\n",
+ (unsigned long long)ip->i_di.di_num.no_addr,
+ (unsigned long long)block);
if(!found_dup) {
block_set(sdp->bl, block, meta_inval);
relse_buf(ip->i_sbd, nbh);
@@ -129,14 +136,19 @@ static int check_metalist(struct fsck_inode *ip, uint64_t block,
}
relse_buf(ip->i_sbd, nbh);
+ nbh = NULL;
} else /* blk check ok */
*bh = nbh;
- if (!found_dup) {
+ bc->indir_count++;
+ if (found_dup) {
+ if (nbh)
+ relse_buf(ip->i_sbd, nbh);
+ return 1; /* don't process the metadata again */
+ } else {
log_debug("Setting %" PRIu64 " to indirect block.\n", block);
block_set(sdp->bl, block, indir_blk);
}
- bc->indir_count++;
return 0;
}
@@ -734,6 +746,93 @@ int add_to_dir_list(struct fsck_sb *sbp, uint64_t block)
return 0;
}
+/**
+ * Check for massive amounts of pointer corruption. If the block has
+ * lots of out-of-range pointers, we can't trust any of the pointers.
+ * For example, a stray pointer with a value of 0x1d might be
+ * corruption/nonsense, and if so, we don't want to delete an
+ * important file (like master or the root directory) because of it.
+ * We need to check for a large number of bad pointers BEFORE we start
+ * messing with them because we don't want to mark a block as a
+ * duplicate (for example) until we know if the pointers in general can
+ * be trusted. Thus it needs to be in a separate loop.
+ */
+static int rangecheck_block(struct fsck_inode *ip, uint64_t block,
+ osi_buf_t **bh, const char *btype, void *private)
+{
+ long *bad_pointers = (long *)private;
+ struct block_query q = {0};
+
+ if (check_range(ip->i_sbd, block) != 0) {
+ (*bad_pointers)++;
+ log_debug("Bad %s block pointer (out of range #%ld) "
+ "found in inode %lld.\n", btype, *bad_pointers,
+ (unsigned long long)ip->i_di.di_num.no_addr);
+ if ((*bad_pointers) <= BAD_POINTER_TOLERANCE)
+ return ENOENT;
+ else
+ return -ENOENT; /* Exits check_metatree quicker */
+ }
+ /* See how many duplicate blocks it has */
+ if(block_check(ip->i_sbd->bl, block, &q)) {
+ stack;
+ return -1;
+ }
+ if (q.block_type != block_free) {
+ (*bad_pointers)++;
+ log_debug("Duplicated %s block pointer (violation #%ld) "
+ "found in inode %lld.\n", btype, *bad_pointers,
+ (unsigned long long)ip->i_di.di_num.no_addr);
+ if ((*bad_pointers) <= BAD_POINTER_TOLERANCE)
+ return ENOENT;
+ else
+ return -ENOENT; /* Exits check_metatree quicker */
+ }
+ return 0;
+}
+
+static int rangecheck_metadata(struct fsck_inode *ip, uint64_t block,
+ osi_buf_t **bh, void *private)
+{
+ return rangecheck_block(ip, block, bh, "metadata", private);
+}
+
+static int rangecheck_leaf(struct fsck_inode *ip, uint64_t block,
+ osi_buf_t *bh, void *private)
+{
+ return rangecheck_block(ip, block, &bh, "leaf", private);
+}
+
+static int rangecheck_data(struct fsck_inode *ip, uint64_t block,
+ struct fsck_rgrp **rgd, void *private)
+{
+ return rangecheck_block(ip, block, NULL, "data", private);
+}
+
+static int rangecheck_eattr_indir(struct fsck_inode *ip, uint64_t block,
+ uint64_t parent, osi_buf_t **bh,
+ void *private)
+{
+ return rangecheck_block(ip, block, NULL, "indirect extended attribute",
+ private);
+}
+
+static int rangecheck_eattr_leaf(struct fsck_inode *ip, uint64_t block,
+ uint64_t parent, osi_buf_t **bh,
+ void *private)
+{
+ return rangecheck_block(ip, block, NULL, "extended attribute",
+ private);
+}
+
+struct metawalk_fxns rangecheck_fxns = {
+ .private = NULL,
+ .check_metalist = rangecheck_metadata,
+ .check_data = rangecheck_data,
+ .check_leaf = rangecheck_leaf,
+ .check_eattr_indir = rangecheck_eattr_indir,
+ .check_eattr_leaf = rangecheck_eattr_leaf,
+};
static int handle_di(struct fsck_sb *sdp, osi_buf_t *bh, uint64_t block, int mfree)
{
@@ -741,6 +840,7 @@ static int handle_di(struct fsck_sb *sdp, osi_buf_t *bh, uint64_t
block, int mfr
struct fsck_inode *ip;
int error;
struct block_count bc = {0};
+ long bad_pointers = 0L;
struct metawalk_fxns invalidate_metatree = {0};
invalidate_metatree.check_metalist = clear_metalist;
invalidate_metatree.check_data = clear_data;
@@ -823,6 +923,25 @@ static int handle_di(struct fsck_sb *sdp, osi_buf_t *bh, uint64_t
block, int mfr
goto success;
}
+
+ /* First, check the metadata for massive amounts of pointer corruption.
+ Such corruption can only lead us to ruin trying to clean it up,
+ so it's better to check it up front and delete the inode if
+ there is corruption. */
+ rangecheck_fxns.private = &bad_pointers;
+ error = check_metatree(ip, &rangecheck_fxns);
+ if (bad_pointers > BAD_POINTER_TOLERANCE) {
+ log_err("Error: inode %llu has more than %d bad pointers.\n",
+ (unsigned long long)ip->i_di.di_num.no_addr,
+ BAD_POINTER_TOLERANCE);
+ if (block_set(sdp->bl, block, meta_inval)) {
+ stack;
+ goto fail;
+ }
+ fs_set_bitmap(sdp, block, GFS_BLKST_FREE);
+ goto success;
+ }
+
switch(ip->i_di.di_type) {
case GFS_FILE_DIR:
@@ -930,8 +1049,9 @@ static int handle_di(struct fsck_sb *sdp, osi_buf_t *bh, uint64_t
block, int mfr
return 0;
}
if(error > 0) {
- log_warn("Marking inode %lld invalid\n",
- ip->i_di.di_num.no_addr);
+ log_err("Error: inode %llu has unrecoverable errors; "
+ "invalidating.\n",
+ (unsigned long long)ip->i_di.di_num.no_addr);
/* FIXME: Must set all leaves invalid as well */
check_metatree(ip, &invalidate_metatree);
block_set(ip->i_sbd->bl, ip->i_di.di_num.no_addr, meta_inval);
diff --git a/gfs/gfs_fsck/util.c b/gfs/gfs_fsck/util.c
index bb53d8c..6d2a9a2 100644
--- a/gfs/gfs_fsck/util.c
+++ b/gfs/gfs_fsck/util.c
@@ -107,11 +107,8 @@ int check_meta(osi_buf_t *bh, int type){
check_magic = gfs32_to_cpu(check_magic);
check_type = gfs32_to_cpu(check_type);
- if((check_magic != GFS_MAGIC) || (type && (check_type != type))){
- log_debug("For %"PRIu64" Expected %X:%X - got %X:%X\n",
BH_BLKNO(bh), GFS_MAGIC, type,
- check_magic, check_type);
- return -1;
- }
+ if((check_magic != GFS_MAGIC) || (type && (check_type != type)))
+ return -1;
return 0;
}