Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=d45a4fe2fa265…
Commit: d45a4fe2fa265a19a4130161caa4ce92367f9072
Parent: 1d7a0335016cae0246f455a707f78bfc5d1ff880
Author: John Ruemker <jruemker(a)redhat.com>
AuthorDate: Tue May 14 15:19:19 2013 -0500
Committer: Ryan McCabe <rmccabe(a)redhat.com>
CommitterDate: Mon May 20 23:46:49 2013 -0400
HA LVM should only remove missing PVs on stop when they belong to mirrors
This adds --mirrorsonly to the 3 'vgreduce --removemissing' calls in the
LVM agents.
You'll also notice that it adds another self_fence check after we fail to
remove tags. In my previous comment, I pointed out that in the case of
single-host by_lv, after we vgreduce we then can't deactivate the logical
volume again because it doesn't exist. This results in us executing
self_fence, which may have just been a happy accident. But when we avoid
making metadata changes by adding --mirrorsonly, the subsequent deactivation
is still successful, and thus we miss the self_fence logic. So, I added
another check so we still catch the failure and fence ourselves in this
situation.
Resolves: rhbz#962376
Signed-off-by: John Ruemker <jruemker(a)redhat.com>
Signed-off-by: Jonthan Brassow <jbrassow(a)redhat.com>
Signed-off-by: Ryan McCabe <rmccabe(a)redhat.com>
---
rgmanager/src/resources/lvm_by_lv.sh | 14 ++++++++++----
rgmanager/src/resources/lvm_by_vg.sh | 4 ++--
2 files changed, 12 insertions(+), 6 deletions(-)
diff --git a/rgmanager/src/resources/lvm_by_lv.sh b/rgmanager/src/resources/lvm_by_lv.sh
index 4971173..7140076 100644
--- a/rgmanager/src/resources/lvm_by_lv.sh
+++ b/rgmanager/src/resources/lvm_by_lv.sh
@@ -243,13 +243,19 @@ lv_activate_and_tag()
# be removed from the VG via a separate call before
# the tag can be removed.
ocf_log err "Attempting volume group clean-up and retry"
- vgreduce --removemissing --force $OCF_RESKEY_vg_name
+ vgreduce --removemissing --mirrorsonly --force $OCF_RESKEY_vg_name
# Retry tag deletion
lvchange --deltag $tag $lv_path
if [ $? -ne 0 ]; then
- ocf_log err "Failed to delete tag from $lv_path"
- return $OCF_ERR_GENERIC
+ if [ "$self_fence" ]; then
+ ocf_log err "Failed to delete tag from $lv_path: REBOOTING"
+ sync
+ reboot -fn
+ else
+ ocf_log err "Failed to delete tag from $lv_path"
+ fi
+ return $OCF_ERR_GENERIC
fi
fi
@@ -322,7 +328,7 @@ lv_activate()
ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name"
- if vgreduce --removemissing --force --config \
+ if vgreduce --removemissing --mirrorsonly --force --config \
"activation { volume_list = \"$OCF_RESKEY_vg_name\" }" \
$OCF_RESKEY_vg_name; then
ocf_log notice "$OCF_RESKEY_vg_name now consistent"
diff --git a/rgmanager/src/resources/lvm_by_vg.sh b/rgmanager/src/resources/lvm_by_vg.sh
index 0dd2aaa..819d0b8 100755
--- a/rgmanager/src/resources/lvm_by_vg.sh
+++ b/rgmanager/src/resources/lvm_by_vg.sh
@@ -202,7 +202,7 @@ function vg_start_clustered
ocf_log err "Failed to activate volume group, $OCF_RESKEY_vg_name"
ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name"
- if ! vgreduce --removemissing --force $OCF_RESKEY_vg_name; then
+ if ! vgreduce --removemissing --mirrorsonly --force $OCF_RESKEY_vg_name; then
ocf_log err "Failed to make $OCF_RESKEY_vg_name consistent"
return $OCF_ERR_GENERIC
fi
@@ -398,7 +398,7 @@ function vg_stop_single
# Shut down the volume group
# Do we need to make this resilient?
- vgchange -an $OCF_RESKEY_vg_name
+ vgchange -aln $OCF_RESKEY_vg_name
# Make sure all the logical volumes are inactive
results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name 2> /dev/null`)
Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=d45a4fe2fa265…
Commit: d45a4fe2fa265a19a4130161caa4ce92367f9072
Parent: 1d7a0335016cae0246f455a707f78bfc5d1ff880
Author: John Ruemker <jruemker(a)redhat.com>
AuthorDate: Tue May 14 15:19:19 2013 -0500
Committer: Ryan McCabe <rmccabe(a)redhat.com>
CommitterDate: Mon May 20 23:46:49 2013 -0400
HA LVM should only remove missing PVs on stop when they belong to mirrors
This adds --mirrorsonly to the 3 'vgreduce --removemissing' calls in the
LVM agents.
You'll also notice that it adds another self_fence check after we fail to
remove tags. In my previous comment, I pointed out that in the case of
single-host by_lv, after we vgreduce we then can't deactivate the logical
volume again because it doesn't exist. This results in us executing
self_fence, which may have just been a happy accident. But when we avoid
making metadata changes by adding --mirrorsonly, the subsequent deactivation
is still successful, and thus we miss the self_fence logic. So, I added
another check so we still catch the failure and fence ourselves in this
situation.
Resolves: rhbz#962376
Signed-off-by: John Ruemker <jruemker(a)redhat.com>
Signed-off-by: Jonthan Brassow <jbrassow(a)redhat.com>
Signed-off-by: Ryan McCabe <rmccabe(a)redhat.com>
---
rgmanager/src/resources/lvm_by_lv.sh | 14 ++++++++++----
rgmanager/src/resources/lvm_by_vg.sh | 4 ++--
2 files changed, 12 insertions(+), 6 deletions(-)
diff --git a/rgmanager/src/resources/lvm_by_lv.sh b/rgmanager/src/resources/lvm_by_lv.sh
index 4971173..7140076 100644
--- a/rgmanager/src/resources/lvm_by_lv.sh
+++ b/rgmanager/src/resources/lvm_by_lv.sh
@@ -243,13 +243,19 @@ lv_activate_and_tag()
# be removed from the VG via a separate call before
# the tag can be removed.
ocf_log err "Attempting volume group clean-up and retry"
- vgreduce --removemissing --force $OCF_RESKEY_vg_name
+ vgreduce --removemissing --mirrorsonly --force $OCF_RESKEY_vg_name
# Retry tag deletion
lvchange --deltag $tag $lv_path
if [ $? -ne 0 ]; then
- ocf_log err "Failed to delete tag from $lv_path"
- return $OCF_ERR_GENERIC
+ if [ "$self_fence" ]; then
+ ocf_log err "Failed to delete tag from $lv_path: REBOOTING"
+ sync
+ reboot -fn
+ else
+ ocf_log err "Failed to delete tag from $lv_path"
+ fi
+ return $OCF_ERR_GENERIC
fi
fi
@@ -322,7 +328,7 @@ lv_activate()
ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name"
- if vgreduce --removemissing --force --config \
+ if vgreduce --removemissing --mirrorsonly --force --config \
"activation { volume_list = \"$OCF_RESKEY_vg_name\" }" \
$OCF_RESKEY_vg_name; then
ocf_log notice "$OCF_RESKEY_vg_name now consistent"
diff --git a/rgmanager/src/resources/lvm_by_vg.sh b/rgmanager/src/resources/lvm_by_vg.sh
index 0dd2aaa..819d0b8 100755
--- a/rgmanager/src/resources/lvm_by_vg.sh
+++ b/rgmanager/src/resources/lvm_by_vg.sh
@@ -202,7 +202,7 @@ function vg_start_clustered
ocf_log err "Failed to activate volume group, $OCF_RESKEY_vg_name"
ocf_log notice "Attempting cleanup of $OCF_RESKEY_vg_name"
- if ! vgreduce --removemissing --force $OCF_RESKEY_vg_name; then
+ if ! vgreduce --removemissing --mirrorsonly --force $OCF_RESKEY_vg_name; then
ocf_log err "Failed to make $OCF_RESKEY_vg_name consistent"
return $OCF_ERR_GENERIC
fi
@@ -398,7 +398,7 @@ function vg_stop_single
# Shut down the volume group
# Do we need to make this resilient?
- vgchange -an $OCF_RESKEY_vg_name
+ vgchange -aln $OCF_RESKEY_vg_name
# Make sure all the logical volumes are inactive
results=(`lvs -o name,attr --noheadings $OCF_RESKEY_vg_name 2> /dev/null`)
Gitweb: http://git.fedorahosted.org/git/?p=gfs2-utils.git;a=commitdiff;h=e8d58780c4…
Commit: e8d58780c43e0befeacab299c6d099e196bc83b9
Parent: 58a213659fb8afd5d25fa25d7ec3ec0a8d5e21dd
Author: Bob Peterson <rpeterso(a)redhat.com>
AuthorDate: Fri Apr 19 09:25:51 2013 -0700
Committer: Bob Peterson <rpeterso(a)redhat.com>
CommitterDate: Mon May 20 11:12:47 2013 -0500
fsck.gfs2: take hash table start boundaries into account
When checking the hash table in pass2, we can't just keep doubling
the length for each consecutive check because the number of pointer
copies (aka length) is also tied to the starting offset. If the
starting offset is invalid for the length, it might treat a chunk of
the hash table as bigger than it should, eventually overwriting good
entries. Along the same lines, while we're trying to determine the
length, it's not good enough to double the length and check if the
hash table entry matches. The reason is: there can be several values
overwritten with the same value, 0x00, that indicates places where
pass1 found an invalid leaf block pointer. To avoid that, we need to
check intermediate values as well, and stop if we find a gap.
---
gfs2/fsck/metawalk.c | 5 +++--
gfs2/fsck/pass2.c | 43 ++++++++++++++++++++++++++++++++++---------
2 files changed, 37 insertions(+), 11 deletions(-)
diff --git a/gfs2/fsck/metawalk.c b/gfs2/fsck/metawalk.c
index ffc3555..44b5c66 100644
--- a/gfs2/fsck/metawalk.c
+++ b/gfs2/fsck/metawalk.c
@@ -473,11 +473,12 @@ static int check_entries(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
if ((char *)dent + de.de_rec_len >= bh_end){
log_debug( _("Last entry processed for %lld->%lld "
- "(0x%llx->0x%llx).\n"),
+ "(0x%llx->0x%llx), di_blocks=%llu.\n"),
(unsigned long long)ip->i_di.di_num.no_addr,
(unsigned long long)bh->b_blocknr,
(unsigned long long)ip->i_di.di_num.no_addr,
- (unsigned long long)bh->b_blocknr);
+ (unsigned long long)bh->b_blocknr,
+ (unsigned long long)ip->i_di.di_blocks);
break;
}
diff --git a/gfs2/fsck/pass2.c b/gfs2/fsck/pass2.c
index a24edbe..3d0bb49 100644
--- a/gfs2/fsck/pass2.c
+++ b/gfs2/fsck/pass2.c
@@ -370,9 +370,10 @@ static int wrong_leaf(struct gfs2_inode *ip, struct gfs2_inum *entry,
gfs2_get_leaf_nr(ip, hash_index, &real_leaf);
if (real_leaf != planned_leaf) {
log_err(_("The planned leaf was split. The new leaf "
- "is: %llu (0x%llx)"),
+ "is: %llu (0x%llx). di_blocks=%llu\n"),
(unsigned long long)real_leaf,
- (unsigned long long)real_leaf);
+ (unsigned long long)real_leaf,
+ (unsigned long long)ip->i_di.di_blocks);
fsck_blockmap_set(ip, real_leaf, _("split leaf"),
gfs2_indir_blk);
}
@@ -1032,6 +1033,7 @@ static int basic_check_dentry(struct gfs2_inode *ip, struct gfs2_dirent *dent,
log_err( _("Bad directory entry '%s' cleared.\n"), tmp_name);
return 1;
} else {
+ (*count)++;
return 0;
}
}
@@ -1150,11 +1152,13 @@ static int fix_hashtable(struct gfs2_inode *ip, uint64_t *tbl, unsigned hsize,
/* Look at the first dirent and check its hash value to see if it's
at the proper starting offset. */
hash_index = hash_table_index(dentry.de_hash, ip);
+ /* Need to use len here, not *proper_len because the leaf block may
+ be valid within the range, but starts too soon in the hash table. */
if (hash_index < lindex || hash_index > lindex + len) {
log_err(_("This leaf block has hash index %d, which is out of "
"bounds for where it appears in the hash table "
"(%d - %d)\n"),
- hash_index, lindex, lindex + len);
+ hash_index, lindex, lindex + *proper_len);
error = lost_leaf(ip, tbl, leafblk, len, lindex, lbh);
brelse(lbh);
return error;
@@ -1291,6 +1295,8 @@ static int check_hash_tbl(struct gfs2_inode *ip, uint64_t *tbl,
struct gfs2_buffer_head *lbh;
int factor;
uint32_t proper_start;
+ uint32_t next_proper_start;
+ int anomaly;
lindex = 0;
while (lindex < hsize) {
@@ -1299,10 +1305,23 @@ static int check_hash_tbl(struct gfs2_inode *ip, uint64_t *tbl,
len = 1;
factor = 0;
leafblk = be64_to_cpu(tbl[lindex]);
+ next_proper_start = lindex;
+ anomaly = 0;
while (lindex + (len << 1) - 1 < hsize) {
if (be64_to_cpu(tbl[lindex + (len << 1) - 1]) !=
leafblk)
break;
+ next_proper_start = (lindex & ~((len << 1) - 1));
+ if (lindex != next_proper_start)
+ anomaly = 1;
+ /* Check if there are other values written between
+ here and the next factor. */
+ for (i = len; !anomaly && i + lindex < hsize &&
+ i < (len << 1); i++)
+ if (be64_to_cpu(tbl[lindex + i]) != leafblk)
+ anomaly = 1;
+ if (anomaly)
+ break;
len <<= 1;
factor++;
}
@@ -1344,8 +1363,10 @@ static int check_hash_tbl(struct gfs2_inode *ip, uint64_t *tbl,
proper_start = (lindex & ~(proper_len - 1));
if (lindex != proper_start) {
log_debug(_("lindex 0x%llx is not a proper starting "
- "point for this leaf: 0x%llx\n"),
+ "point for leaf %llu (0x%llx): 0x%llx\n"),
(unsigned long long)lindex,
+ (unsigned long long)leafblk,
+ (unsigned long long)leafblk,
(unsigned long long)proper_start);
changes = fix_hashtable(ip, tbl, hsize, leafblk,
lindex, proper_start, len,
@@ -1368,9 +1389,11 @@ static int check_hash_tbl(struct gfs2_inode *ip, uint64_t *tbl,
depth, and adjust the hash table accordingly. */
if (len != proper_len) {
log_err(_("Length %d (0x%x) is not a proper length "
- "for this leaf. Valid boundary assumed to "
- "be %d (0x%x).\n"),
- len, len, proper_len, proper_len);
+ "for leaf %llu (0x%llx). Valid boundary "
+ "assumed to be %d (0x%x).\n"), len, len,
+ (unsigned long long)leafblk,
+ (unsigned long long)leafblk,
+ proper_len, proper_len);
lbh = bread(ip->i_sbd, leafblk);
gfs2_leaf_in(&leaf, lbh);
if (gfs2_check_meta(lbh, GFS2_METATYPE_LF) ||
@@ -1419,8 +1442,10 @@ static int check_hash_tbl(struct gfs2_inode *ip, uint64_t *tbl,
proper_len = 1 << (ip->i_di.di_depth - leaf.lf_depth);
if (proper_len != len) {
log_debug(_("Length 0x%x is not proper for "
- "this leaf: 0x%x"),
- len, proper_len);
+ "leaf %llu (0x%llx): 0x%x"),
+ len, (unsigned long long)leafblk,
+ (unsigned long long)leafblk,
+ proper_len);
changes = fix_hashtable(ip, tbl, hsize,
leafblk, lindex,
lindex, len,
Gitweb: http://git.fedorahosted.org/git/?p=gfs2-utils.git;a=commitdiff;h=58a213659f…
Commit: 58a213659fb8afd5d25fa25d7ec3ec0a8d5e21dd
Parent: c2a39034d9f2888dc0a9431cea86998a929c30ba
Author: Bob Peterson <rpeterso(a)redhat.com>
AuthorDate: Wed Apr 17 14:09:30 2013 -0700
Committer: Bob Peterson <rpeterso(a)redhat.com>
CommitterDate: Mon May 20 11:12:47 2013 -0500
fsck.gfs2: Don't allocate leaf blocks in pass1
Before this patch, if leaf blocks were found to be corrupt, pass1
tried to fix them by allocating new leaf blocks in place of the bad
ones. That's a bad idea, because pass1 populates the blockmap and
sets the bitmap accordingly. In other words, it's dynamically changing.
Say, for example, that you're checking a directory a dinode 0x1234, and
it has a corrupt hash table, and needs new leaf blocks inserted.
Now suppose you have a second directory that occurs later in the bitmap,
say at block 0x2345, and it references leaf block 0x2346, but for some
reason that block (0x2346) is improperly set to "free" in the bitmap.
If pass1 goes out looking for a free block in order to allocate a new
leaf for 0x1234, it will naturally find block 0x2346, because it's
marked free. It writes a new leaf at that block and adds a new
reference in the hash table of 0x1234. Later, when pass1 processes
directory 0x2345, it discovers the reference to 0x2346. Not only has
it wiped out the perfectly good leaf block, it has also created a
duplicate block reference that it needs to sort out in pass1b, which
will likely keep the replaced reference and throw the good one we
had. Thus, we introduced corruption into the file system when we
should have kept the only good reference to 0x2346 and fixed the
bitmap.
The solution provided by this patch is to simply zero out the bad
hash table entries when pass1 comes across them. Later, when pass2
discovers the zero leaf blocks, it can safely allocate new blocks
(since pass1 synced the bitmap according to the blockmap) for the new
leaf blocks and replace the zeros with valid block references.
---
gfs2/fsck/metawalk.c | 31 ++++++++++++++++++++++++++++++-
gfs2/fsck/metawalk.h | 2 +-
gfs2/fsck/pass1.c | 9 ++-------
gfs2/fsck/pass2.c | 2 +-
4 files changed, 34 insertions(+), 10 deletions(-)
diff --git a/gfs2/fsck/metawalk.c b/gfs2/fsck/metawalk.c
index 161c183..ffc3555 100644
--- a/gfs2/fsck/metawalk.c
+++ b/gfs2/fsck/metawalk.c
@@ -1955,7 +1955,7 @@ int write_new_leaf(struct gfs2_inode *dip, int start_lindex, int num_copies,
* leaf a bit, but it's better than deleting the whole directory,
* which is what used to happen before. */
int repair_leaf(struct gfs2_inode *ip, uint64_t *leaf_no, int lindex,
- int ref_count, const char *msg)
+ int ref_count, const char *msg, int allow_alloc)
{
int new_leaf_blks = 0, error, refs;
uint64_t bn = 0;
@@ -1970,6 +1970,35 @@ int repair_leaf(struct gfs2_inode *ip, uint64_t *leaf_no, int lindex,
log_err( _("Bad leaf left in place.\n"));
goto out;
}
+ if (!allow_alloc) {
+ uint64_t *cpyptr;
+ char *padbuf;
+ int pad_size, i;
+
+ padbuf = malloc(ref_count * sizeof(uint64_t));
+ cpyptr = (uint64_t *)padbuf;
+ for (i = 0; i < ref_count; i++) {
+ *cpyptr = 0;
+ cpyptr++;
+ }
+ pad_size = ref_count * sizeof(uint64_t);
+ log_err(_("Writing zeros to the hash table of directory %lld "
+ "(0x%llx) at index: 0x%x for 0x%x pointers.\n"),
+ (unsigned long long)ip->i_di.di_num.no_addr,
+ (unsigned long long)ip->i_di.di_num.no_addr,
+ lindex, ref_count);
+ if (ip->i_sbd->gfs1)
+ gfs1_writei(ip, padbuf, lindex * sizeof(uint64_t),
+ pad_size);
+ else
+ gfs2_writei(ip, padbuf, lindex * sizeof(uint64_t),
+ pad_size);
+ free(padbuf);
+ log_err( _("Directory Inode %llu (0x%llx) patched.\n"),
+ (unsigned long long)ip->i_di.di_num.no_addr,
+ (unsigned long long)ip->i_di.di_num.no_addr);
+ goto out;
+ }
/* We can only write leafs in quantities that are factors of
two, since leaves are doubled, not added sequentially.
So if we have a hole that's not a factor of 2, we have to
diff --git a/gfs2/fsck/metawalk.h b/gfs2/fsck/metawalk.h
index aacb962..a5a51c2 100644
--- a/gfs2/fsck/metawalk.h
+++ b/gfs2/fsck/metawalk.h
@@ -61,7 +61,7 @@ extern int write_new_leaf(struct gfs2_inode *dip, int start_lindex,
int num_copies, const char *before_or_after,
uint64_t *bn);
extern int repair_leaf(struct gfs2_inode *ip, uint64_t *leaf_no, int lindex,
- int ref_count, const char *msg);
+ int ref_count, const char *msg, int allow_alloc);
#define is_duplicate(dblock) ((dupfind(dblock)) ? 1 : 0)
diff --git a/gfs2/fsck/pass1.c b/gfs2/fsck/pass1.c
index 2c1c046..df778ef 100644
--- a/gfs2/fsck/pass1.c
+++ b/gfs2/fsck/pass1.c
@@ -84,13 +84,8 @@ static int pass1_repair_leaf(struct gfs2_inode *ip, uint64_t *leaf_no,
int lindex, int ref_count, const char *msg,
void *private)
{
- struct block_count *bc = (struct block_count *)private;
- int new_leaf_blks;
-
- new_leaf_blks = repair_leaf(ip, leaf_no, lindex, ref_count, msg);
- bc->indir_count += new_leaf_blks;
-
- return new_leaf_blks;
+ repair_leaf(ip, leaf_no, lindex, ref_count, msg, 0);
+ return 0;
}
struct metawalk_fxns pass1_fxns = {
diff --git a/gfs2/fsck/pass2.c b/gfs2/fsck/pass2.c
index 5767c4d..a24edbe 100644
--- a/gfs2/fsck/pass2.c
+++ b/gfs2/fsck/pass2.c
@@ -1040,7 +1040,7 @@ static int pass2_repair_leaf(struct gfs2_inode *ip, uint64_t *leaf_no,
int lindex, int ref_count, const char *msg,
void *private)
{
- return repair_leaf(ip, leaf_no, lindex, ref_count, msg);
+ return repair_leaf(ip, leaf_no, lindex, ref_count, msg, 1);
}
/* The purpose of leafck_fxns is to provide a means for function fix_hashtable
Gitweb: http://git.fedorahosted.org/git/?p=gfs2-utils.git;a=commitdiff;h=fb2ef82d8d…
Commit: fb2ef82d8dd9b4c5304d377b9d2fa1ad3da1a82c
Parent: a3c643a9c98dd68138ff6b623fd86923a16fc626
Author: Bob Peterson <rpeterso(a)redhat.com>
AuthorDate: Thu Apr 11 07:22:33 2013 -0700
Committer: Bob Peterson <rpeterso(a)redhat.com>
CommitterDate: Mon May 20 11:12:47 2013 -0500
fsck.gfs2: double-check transitions from dinode to data
If a corrupt dinode references a bunch of blocks as data blocks,
and those blocks occur later in the bitmap (as is usually the case)
but they're really dinodes, we have a problem. Before it finds the
corruption, it can change the bitmap markings from 'dinode' to 'data'
blocks. Later, when it determines the dinode is corrupt. It tries
to "undo" all those data blocks, but since pass1 hasn't processed
them yet, it marks them as 'free' in the bitmap, and we've lost the
fact that they're dinodes. The result is that the files/dinodes
being improperly referenced are deleted by mistake.
This patch adds a check for bitmap transitions in pass1 from 'dinode'
to 'data', where the block hasn't been checked yet. We don't care about
transitions from dinode to free because that's a normal delete of a
dinode. We also don't care about transitions between dinode to
metadata, because all those checks validate that the metadata type is
the correct type of metadata, so we know we're making the right
decision. So the only issue are data blocks referencing dinodes.
What this patch does is: when the bitmap is making a transition from
'dinode' to 'data' in pass1, it basically puts up a red flag.
The block is read in and checked to see if it really looks like a
dinode. We have to be careful here, because customer data is allowed
to look like a dinode. If the block really seems to be a dinode, we
DO NOT want to treat it as a data block and assume the duplicate
reference handler in pass1b will handle it, because the dinode's
metadata blocks will not have been checked in pass1.
Instead, we want to flag it as corruption in the referencing file
dinode, not change the bitmap or blockmap, and allow pass1 to treat
it properly as a dinode when it gets there. The corrupt dinode
referencing the dinode as 'data' should be deleted and the work done
thusfar should be backed out by the pass1 'undo' functions.
---
gfs2/fsck/metawalk.c | 21 +++++++++++++---
gfs2/fsck/metawalk.h | 14 +++++++----
gfs2/fsck/pass1.c | 65 +++++++++++++++++++++++++++++++++++++++++++++----
gfs2/fsck/pass1b.c | 2 +-
gfs2/fsck/pass2.c | 2 +-
gfs2/fsck/pass3.c | 4 +-
6 files changed, 89 insertions(+), 19 deletions(-)
diff --git a/gfs2/fsck/metawalk.c b/gfs2/fsck/metawalk.c
index 22b16ee..6e9e593 100644
--- a/gfs2/fsck/metawalk.c
+++ b/gfs2/fsck/metawalk.c
@@ -27,7 +27,7 @@
is used to set the latter. The two must be kept in sync, otherwise
you'll get bitmap mismatches. This function checks the status of the
bitmap whenever the blockmap changes, and fixes it accordingly. */
-int check_n_fix_bitmap(struct gfs2_sbd *sdp, uint64_t blk,
+int check_n_fix_bitmap(struct gfs2_sbd *sdp, uint64_t blk, int error_on_dinode,
enum gfs2_mark_block new_blockmap_state)
{
int old_bitmap_state, new_bitmap_state;
@@ -49,6 +49,16 @@ int check_n_fix_bitmap(struct gfs2_sbd *sdp, uint64_t blk,
/* gfs1 descriptions: */
{"free", "data", "free meta", "metadata", "reserved"}};
+ if (error_on_dinode && old_bitmap_state == GFS2_BLKST_DINODE &&
+ new_bitmap_state != GFS2_BLKST_FREE) {
+ log_debug(_("Reference as '%s' to block %llu (0x%llx) "
+ "which was marked as dinode. Needs "
+ "further investigation.\n"),
+ allocdesc[sdp->gfs1][new_bitmap_state],
+ (unsigned long long)blk,
+ (unsigned long long)blk);
+ return 1;
+ }
/* Keep these messages as short as possible, or the output
gets to be huge and unmanageable. */
log_err( _("Block %llu (0x%llx) was '%s', should be %s.\n"),
@@ -106,6 +116,7 @@ int check_n_fix_bitmap(struct gfs2_sbd *sdp, uint64_t blk,
*/
int _fsck_blockmap_set(struct gfs2_inode *ip, uint64_t bblock,
const char *btype, enum gfs2_mark_block mark,
+ int error_on_dinode,
const char *caller, int fline)
{
int error;
@@ -164,9 +175,11 @@ int _fsck_blockmap_set(struct gfs2_inode *ip, uint64_t bblock,
/* First, check the rgrp bitmap against what we think it should be.
If that fails, it's an invalid block--part of an rgrp. */
- error = check_n_fix_bitmap(ip->i_sbd, bblock, mark);
+ error = check_n_fix_bitmap(ip->i_sbd, bblock, error_on_dinode, mark);
if (error) {
- log_err( _("This block is not represented in the bitmap.\n"));
+ if (error < 0)
+ log_err( _("This block is not represented in the "
+ "bitmap.\n"));
return error;
}
@@ -517,7 +530,7 @@ int check_leaf(struct gfs2_inode *ip, int lindex, struct metawalk_fxns *pass,
if (pass->check_leaf) {
error = pass->check_leaf(ip, *leaf_no, pass->private);
- if (error) {
+ if (error == -EEXIST) {
log_info(_("Previous reference to leaf %lld (0x%llx) "
"has already checked it; skipping.\n"),
(unsigned long long)*leaf_no,
diff --git a/gfs2/fsck/metawalk.h b/gfs2/fsck/metawalk.h
index 56f57d9..aacb962 100644
--- a/gfs2/fsck/metawalk.h
+++ b/gfs2/fsck/metawalk.h
@@ -45,10 +45,12 @@ extern int delete_eattr_extentry(struct gfs2_inode *ip, uint64_t *ea_data_ptr,
void *private);
extern int _fsck_blockmap_set(struct gfs2_inode *ip, uint64_t bblock,
- const char *btype, enum gfs2_mark_block mark,
- const char *caller, int line);
+ const char *btype, enum gfs2_mark_block mark,
+ int error_on_dinode,
+ const char *caller, int line);
extern int check_n_fix_bitmap(struct gfs2_sbd *sdp, uint64_t blk,
- enum gfs2_mark_block new_blockmap_state);
+ int error_on_dinode,
+ enum gfs2_mark_block new_blockmap_state);
extern void reprocess_inode(struct gfs2_inode *ip, const char *desc);
extern struct duptree *dupfind(uint64_t block);
extern struct gfs2_inode *fsck_system_inode(struct gfs2_sbd *sdp,
@@ -63,8 +65,10 @@ extern int repair_leaf(struct gfs2_inode *ip, uint64_t *leaf_no, int lindex,
#define is_duplicate(dblock) ((dupfind(dblock)) ? 1 : 0)
-#define fsck_blockmap_set(ip, b, bt, m) _fsck_blockmap_set(ip, b, bt, m, \
- __FUNCTION__, __LINE__)
+#define fsck_blockmap_set(ip, b, bt, m) \
+ _fsck_blockmap_set(ip, b, bt, m, 0, __FUNCTION__, __LINE__)
+#define fsck_blkmap_set_noino(ip, b, bt, m) \
+ _fsck_blockmap_set(ip, b, bt, m, 1, __FUNCTION__, __LINE__)
enum meta_check_rc {
meta_error = -1,
diff --git a/gfs2/fsck/pass1.c b/gfs2/fsck/pass1.c
index ad6690b..ee828d8 100644
--- a/gfs2/fsck/pass1.c
+++ b/gfs2/fsck/pass1.c
@@ -150,7 +150,7 @@ static int resuscitate_metalist(struct gfs2_inode *ip, uint64_t block,
if (fsck_system_inode(ip->i_sbd, block))
fsck_blockmap_set(ip, block, _("system file"), gfs2_indir_blk);
else
- check_n_fix_bitmap(ip->i_sbd, block, gfs2_indir_blk);
+ check_n_fix_bitmap(ip->i_sbd, block, 0, gfs2_indir_blk);
bc->indir_count++;
return meta_is_good;
}
@@ -204,7 +204,7 @@ static int resuscitate_dentry(struct gfs2_inode *ip, struct gfs2_dirent *dent,
if (fsck_system_inode(sdp, block))
fsck_blockmap_set(ip, block, _("system file"), dinode_type);
else
- check_n_fix_bitmap(sdp, block, dinode_type);
+ check_n_fix_bitmap(sdp, block, 0, dinode_type);
/* Return the number of leaf entries so metawalk doesn't flag this
leaf as having none. */
*count = be16_to_cpu(((struct gfs2_leaf *)bh->b_data)->lf_entries);
@@ -339,6 +339,8 @@ static int undo_reference(struct gfs2_inode *ip, uint64_t block, int meta,
struct block_count *bc = (struct block_count *)private;
struct duptree *dt;
struct inode_with_dups *id;
+ int old_bitmap_state = 0;
+ struct rgrp_tree *rgd;
if (!valid_block(ip->i_sbd, block)) { /* blk outside of FS */
fsck_blockmap_set(ip, ip->i_di.di_num.no_addr,
@@ -367,6 +369,12 @@ static int undo_reference(struct gfs2_inode *ip, uint64_t block, int meta,
return 1;
}
}
+ if (!meta) {
+ rgd = gfs2_blk2rgrpd(ip->i_sbd, block);
+ old_bitmap_state = lgfs2_get_bitmap(ip->i_sbd, block, rgd);
+ if (old_bitmap_state == GFS2_BLKST_DINODE)
+ return -1;
+ }
fsck_blockmap_set(ip, block,
meta ? _("bad indirect") : _("referenced data"),
gfs2_block_free);
@@ -385,6 +393,51 @@ static int undo_check_data(struct gfs2_inode *ip, uint64_t block,
return undo_reference(ip, block, 0, private);
}
+/* blockmap_set_as_data - set block as 'data' in the blockmap, if not dinode
+ *
+ * This function tries to set a block that's referenced as data as 'data'
+ * in the fsck blockmap. But if that block is marked as 'dinode' in the
+ * rgrp bitmap, it does additional checks to see if it looks like a dinode.
+ * Note that previous checks were done for duplicate references, so this
+ * is checking for dinodes that we haven't processed yet.
+ */
+static int blockmap_set_as_data(struct gfs2_inode *ip, uint64_t block)
+{
+ int error;
+ struct gfs2_buffer_head *bh;
+ struct gfs2_dinode *di;
+
+ error = fsck_blkmap_set_noino(ip, block, _("data"), gfs2_block_used);
+ if (!error)
+ return 0;
+
+ error = 0;
+ /* The bitmap says it's a dinode, but a block reference begs to differ.
+ So which is it? */
+ bh = bread(ip->i_sbd, block);
+ if (gfs2_check_meta(bh, GFS2_METATYPE_DI) != 0)
+ goto out;
+
+ /* The meta header agrees it's a dinode. But it might be data in
+ disguise, so do some extra checks. */
+ di = (struct gfs2_dinode *)bh->b_data;
+ if (be64_to_cpu(di->di_num.no_addr) != block)
+ goto out;
+
+ log_err(_("Inode %lld (0x%llx) has a reference to block %lld (0x%llx) "
+ "as a data block, but it appears to be a dinode we "
+ "haven't checked yet.\n"),
+ (unsigned long long)ip->i_di.di_num.no_addr,
+ (unsigned long long)ip->i_di.di_num.no_addr,
+ (unsigned long long)block, (unsigned long long)block);
+ error = -1;
+out:
+ if (!error)
+ fsck_blockmap_set(ip, block, _("data"), gfs2_block_used);
+ brelse(bh);
+ return error;
+}
+
static int check_data(struct gfs2_inode *ip, uint64_t metablock,
uint64_t block, void *private)
{
@@ -469,7 +522,7 @@ static int check_data(struct gfs2_inode *ip, uint64_t metablock,
(unsigned long long)block, (unsigned long long)block);
fsck_blockmap_set(ip, block, _("jdata"), gfs2_jdata);
} else
- fsck_blockmap_set(ip, block, _("data"), gfs2_block_used);
+ return blockmap_set_as_data(ip, block);
return 0;
}
@@ -1199,7 +1252,7 @@ static int check_system_inode(struct gfs2_sbd *sdp,
(unsigned long long)iblock,
(unsigned long long)iblock);
gfs2_blockmap_set(bl, iblock, gfs2_block_free);
- check_n_fix_bitmap(sdp, iblock, gfs2_block_free);
+ check_n_fix_bitmap(sdp, iblock, 0, gfs2_block_free);
inode_put(sysinode);
}
}
@@ -1486,7 +1539,7 @@ static int pass1_process_bitmap(struct gfs2_sbd *sdp, struct rgrp_tree *rgd, uin
"%llu (0x%llx)\n"),
(unsigned long long)block,
(unsigned long long)block);
- check_n_fix_bitmap(sdp, block, gfs2_block_free);
+ check_n_fix_bitmap(sdp, block, 0, gfs2_block_free);
} else if (handle_di(sdp, bh) < 0) {
stack;
brelse(bh);
@@ -1596,7 +1649,7 @@ int pass1(struct gfs2_sbd *sdp)
}
/* rgrps and bitmaps don't have bits to represent
their blocks, so don't do this:
- check_n_fix_bitmap(sdp, rgd->ri.ri_addr + i,
+ check_n_fix_bitmap(sdp, rgd->ri.ri_addr + i, 0,
gfs2_meta_rgrp);*/
}
diff --git a/gfs2/fsck/pass1b.c b/gfs2/fsck/pass1b.c
index 9c76eda..9a23197 100644
--- a/gfs2/fsck/pass1b.c
+++ b/gfs2/fsck/pass1b.c
@@ -501,7 +501,7 @@ static int handle_dup_blk(struct gfs2_sbd *sdp, struct duptree *dt)
dup_delete(dh.dt);
/* Now fix the block type of the block in question. */
gfs2_blockmap_set(bl, dup_blk, gfs2_block_free);
- check_n_fix_bitmap(sdp, dup_blk, gfs2_block_free);
+ check_n_fix_bitmap(sdp, dup_blk, 0, gfs2_block_free);
}
}
return 0;
diff --git a/gfs2/fsck/pass2.c b/gfs2/fsck/pass2.c
index dc99869..5767c4d 100644
--- a/gfs2/fsck/pass2.c
+++ b/gfs2/fsck/pass2.c
@@ -1713,7 +1713,7 @@ int pass2(struct gfs2_sbd *sdp)
/* Can't use fsck_blockmap_set here because we don't
have an inode in memory. */
gfs2_blockmap_set(bl, dirblk, gfs2_inode_invalid);
- check_n_fix_bitmap(sdp, dirblk, gfs2_inode_invalid);
+ check_n_fix_bitmap(sdp, dirblk, 0, gfs2_inode_invalid);
}
ip = fsck_load_inode(sdp, dirblk);
if (!ds.dotdir) {
diff --git a/gfs2/fsck/pass3.c b/gfs2/fsck/pass3.c
index 53052b6..4894d8c 100644
--- a/gfs2/fsck/pass3.c
+++ b/gfs2/fsck/pass3.c
@@ -275,7 +275,7 @@ int pass3(struct gfs2_sbd *sdp)
gfs2_blockmap_set(bl, di->dinode.no_addr,
gfs2_block_free);
check_n_fix_bitmap(sdp, di->dinode.no_addr,
- gfs2_block_free);
+ 0, gfs2_block_free);
break;
} else
log_err( _("Unlinked directory with bad block remains\n"));
@@ -299,7 +299,7 @@ int pass3(struct gfs2_sbd *sdp)
because we don't have ip */
gfs2_blockmap_set(bl, di->dinode.no_addr,
gfs2_block_free);
- check_n_fix_bitmap(sdp, di->dinode.no_addr,
+ check_n_fix_bitmap(sdp, di->dinode.no_addr, 0,
gfs2_block_free);
log_err( _("The block was cleared\n"));
break;