June 2017 - lvm2-commits - Fedora Mailing-Lists

master - cleanup: use 'dm_get_status_raid'

by Zdenek Kabelac

Gitweb: https://sourceware.org/git/?p=lvm2.git;a=commitdiff;h=b7c9ec8a24a0271e55e... Commit: b7c9ec8a24a0271e55ea5aa0c9f6525011d0fa0c Parent: 59d646167f8f47fbef3231469675f52d90432205 Author: Zdenek Kabelac <zkabelac(a)redhat.com> AuthorDate: Fri Jun 16 10:48:38 2017 +0200 Committer: Zdenek Kabelac <zkabelac(a)redhat.com> CommitterDate: Fri Jun 16 17:04:01 2017 +0200 cleanup: use 'dm_get_status_raid' Use single 'dm' call to parse raid status. (Avoiding multiple parsers - even when we know it's slighly less efficient). --- lib/raid/raid.c | 37 +++++++++++-------------------------- 1 files changed, 11 insertions(+), 26 deletions(-) diff --git a/lib/raid/raid.c b/lib/raid/raid.c index 969007c..c5cfb0f 100644 --- a/lib/raid/raid.c +++ b/lib/raid/raid.c @@ -358,36 +358,21 @@ static int _raid_target_percent(void **target_state, uint64_t *total_numerator, uint64_t *total_denominator) { - int i; - uint64_t numerator, denominator; - char *pos = params; - /* - * Status line: - * <raid_type> <#devs> <status_chars> <synced>/<total> - * Example: - * raid1 2 AA 1024000/1024000 - */ - for (i = 0; i < 3; i++) { - pos = strstr(pos, " "); - if (pos) - pos++; - else - break; - } - if (!pos || (sscanf(pos, FMTu64 "/" FMTu64 "%n", &numerator, &denominator, &i) != 2) || - !denominator) { - log_error("Failed to parse %s status fraction: %s", - (seg) ? seg->segtype->name : "segment", params); - return 0; - } + struct dm_status_raid *sr; + + if (!dm_get_status_raid(mem, params, &sr)) + return_0; - *total_numerator += numerator; - *total_denominator += denominator; + *total_numerator += sr->insync_regions; + *total_denominator += sr->total_regions; if (seg) - seg->extents_copied = (uint64_t) seg->area_len * dm_make_percent(numerator, denominator) / DM_PERCENT_100; + seg->extents_copied = (uint64_t) seg->area_len + * dm_make_percent(sr->insync_regions , sr->total_regions) / DM_PERCENT_100; + + *percent = dm_make_percent(sr->insync_regions, sr->total_regions); - *percent = dm_make_percent(numerator, denominator); + dm_pool_free(mem, sr); return 1; }

6 years, 10 months

1
0
0 / 0

master - raid: report percent with segtype info

by Zdenek Kabelac

Gitweb: https://sourceware.org/git/?p=lvm2.git;a=commitdiff;h=59d646167f8f47fbef3... Commit: 59d646167f8f47fbef3231469675f52d90432205 Parent: 529dcaf6a3c4fea4a15baf13cf057d2333860c05 Author: Zdenek Kabelac <zkabelac(a)redhat.com> AuthorDate: Fri Jun 16 13:20:25 2017 +0200 Committer: Zdenek Kabelac <zkabelac(a)redhat.com> CommitterDate: Fri Jun 16 17:04:01 2017 +0200 raid: report percent with segtype info Enhance reporting code, so it does not need to do 'extra' ioctl to get 'status' of normal raid and provide percentage directly. When we have 'merging' snapshot into raid origin, we still need to get this secondary number with extra status call - however, since 'raid' is always a single segment LV - we may skip 'copy_percent' call as we directly know the percent and also with better precision. NOTE: for mirror we still base reported number on the percetage of transferred extents which might get quite imprecisse if big size of extent is used while volume itself is smaller as reporting jump steps are much bigger the actual reported number provides. 2nd.NOTE: raid lvs line report already requires quite a few extra status calls for the same device - but fix will be need slight code improval. --- WHATS_NEW | 1 + lib/metadata/lv.c | 9 +++++++++ lib/report/report.c | 10 ++++++---- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/WHATS_NEW b/WHATS_NEW index b2796f6..305e185 100644 --- a/WHATS_NEW +++ b/WHATS_NEW @@ -1,5 +1,6 @@ Version 2.02.172 - =============================== + Improve raid status reporting with lvs. No longer necessary to '--force' a repair for RAID1 Linear to RAID1 upconverts now use "recover" sync action, not "resync". Improve lvcreate --cachepool arg validation. diff --git a/lib/metadata/lv.c b/lib/metadata/lv.c index b24c4aa..555df1a 100644 --- a/lib/metadata/lv.c +++ b/lib/metadata/lv.c @@ -395,6 +395,15 @@ dm_percent_t lvseg_percent_with_info_and_seg_status(const struct lv_with_info_an } } break; + case SEG_STATUS_RAID: + switch (type) { + case PERCENT_GET_DIRTY: + p = dm_make_percent(s->raid->insync_regions, s->raid->total_regions); + break; + default: + p = DM_PERCENT_INVALID; + } + break; case SEG_STATUS_SNAPSHOT: if (s->snapshot->merge_failed) p = DM_PERCENT_INVALID; diff --git a/lib/report/report.c b/lib/report/report.c index d9880b2..f61776e 100644 --- a/lib/report/report.c +++ b/lib/report/report.c @@ -3079,11 +3079,13 @@ static int _copypercent_disp(struct dm_report *rh, dm_percent_t percent = DM_PERCENT_INVALID; /* TODO: just cache passes through lvseg_percent... */ - if (lv_is_cache(lv) || lv_is_used_cache_pool(lv)) + if (lv_is_cache(lv) || lv_is_used_cache_pool(lv) || + (!lv_is_merging_origin(lv) && lv_is_raid(lv) && !seg_is_any_raid0(first_seg(lv)))) percent = lvseg_percent_with_info_and_seg_status(lvdm, PERCENT_GET_DIRTY); - else if (((lv_is_raid(lv) && !seg_is_any_raid0(first_seg(lv)) && - lv_raid_percent(lv, &percent)) || - (lv_is_mirror(lv) && + else if (lv_is_raid(lv) && !seg_is_any_raid0(first_seg(lv))) + /* old way for percentage when merging snapshot into raid origin */ + (void) lv_raid_percent(lv, &percent); + else if (((lv_is_mirror(lv) && lv_mirror_percent(lv->vg->cmd, lv, 0, &percent, NULL))) && (percent != DM_PERCENT_INVALID)) percent = copy_percent(lv);

6 years, 10 months

1
0
0 / 0

master - libdm: workarounds reported raid status info

by Zdenek Kabelac

Gitweb: https://sourceware.org/git/?p=lvm2.git;a=commitdiff;h=529dcaf6a3c4fea4a15... Commit: 529dcaf6a3c4fea4a15baf13cf057d2333860c05 Parent: 40e0dcf70d5a719671916aa88adf9a8f18e096c6 Author: Zdenek Kabelac <zkabelac(a)redhat.com> AuthorDate: Fri Jun 16 13:20:47 2017 +0200 Committer: Zdenek Kabelac <zkabelac(a)redhat.com> CommitterDate: Fri Jun 16 17:04:00 2017 +0200 libdm: workarounds reported raid status info Current existing kernels reports status sometimes in weird form. Instead of showing what is the exact progress, we need to estimate this in-sync state from several surrounding states. Main reason here is to never report 100% sync state for a raid device which will be undergoing i.e. recovery. --- WHATS_NEW_DM | 1 + libdm/libdm-targets.c | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 0 deletions(-) diff --git a/WHATS_NEW_DM b/WHATS_NEW_DM index 5718ab7..581cd42 100644 --- a/WHATS_NEW_DM +++ b/WHATS_NEW_DM @@ -1,5 +1,6 @@ Version 1.02.141 - =============================== + dm_get_status_raid() handle better some incosistent md statuses. Accept truncated files in calls to dm_stats_update_regions_from_fd(). Restore Warning by 5% increment when thin-pool is over 80% (1.02.138). diff --git a/libdm/libdm-targets.c b/libdm/libdm-targets.c index 1709c2b..6577f07 100644 --- a/libdm/libdm-targets.c +++ b/libdm/libdm-targets.c @@ -99,6 +99,7 @@ int dm_get_status_raid(struct dm_pool *mem, const char *params, unsigned num_fields; const char *p, *pp, *msg_fields = ""; struct dm_status_raid *s = NULL; + unsigned a = 0; if ((num_fields = _count_fields(params)) < 4) goto_bad; @@ -168,6 +169,23 @@ int dm_get_status_raid(struct dm_pool *mem, const char *params, out: *status = s; + if (s->insync_regions == s->total_regions) { + /* FIXME: kernel gives misleading info here + * Trying to recognize a true state */ + while (i-- > 0) + if (s->dev_health[i] == 'a') + a++; /* Count number of 'a' */ + + if (a && a < s->dev_count) { + /* SOME legs are in 'a' */ + if (!strcasecmp(s->sync_action, "recover") + || !strcasecmp(s->sync_action, "idle")) + /* Kernel may possibly start some action + * in near-by future, do not report 100% */ + s->insync_regions--; + } + } + return 1; bad:

6 years, 10 months

1
0
0 / 0

master - raid: adjust reshape feature flag check

by Heinz Mauelshagen

Gitweb: https://sourceware.org/git/?p=lvm2.git;a=commitdiff;h=40e0dcf70d5a7196719... Commit: 40e0dcf70d5a719671916aa88adf9a8f18e096c6 Parent: ddf2a1d6564800f6d7f87e91cb8a7dfaa0edac1f Author: Heinz Mauelshagen <heinzm(a)redhat.com> AuthorDate: Fri Jun 16 15:58:47 2017 +0200 Committer: Heinz Mauelshagen <heinzm(a)redhat.com> CommitterDate: Fri Jun 16 15:58:47 2017 +0200 raid: adjust reshape feature flag check Relative to last comit ddf2a1d6564800f6d7f87e91cb8a7dfaa0edac1f: adjust the dm-raid target version to 1.12.0 which shows mandatory kernel MD deadlock fixes related to reshaping are presant in the kernel. Related: rhbz1443999 --- lib/raid/raid.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/lib/raid/raid.c b/lib/raid/raid.c index 8a53d7e..969007c 100644 --- a/lib/raid/raid.c +++ b/lib/raid/raid.c @@ -475,7 +475,7 @@ static int _raid_target_present(struct cmd_context *cmd, { 1, 7, 0, RAID_FEATURE_RAID0, SEG_TYPE_NAME_RAID0 }, { 1, 9, 0, RAID_FEATURE_SHRINK, "shrinking" }, { 1, 9, 0, RAID_FEATURE_NEW_DEVICES_ACCEPT_REBUILD, "rebuild+emptymeta" }, - { 1, 10, 1, RAID_FEATURE_RESHAPE, "reshaping" }, + { 1, 12, 0, RAID_FEATURE_RESHAPE, "reshaping" }, }; static int _raid_checked = 0;

6 years, 10 months

1
0
0 / 0

master - Revert "lvconvert: reject changing number of stripes on single core

by Heinz Mauelshagen

Gitweb: https://sourceware.org/git/?p=lvm2.git;a=commitdiff;h=ddf2a1d6564800f6d7f... Commit: ddf2a1d6564800f6d7f87e91cb8a7dfaa0edac1f Parent: 3592243afb4bd1b54926b7d8bfef3a0213ca4908 Author: Heinz Mauelshagen <heinzm(a)redhat.com> AuthorDate: Fri Jun 16 15:43:23 2017 +0200 Committer: Heinz Mauelshagen <heinzm(a)redhat.com> CommitterDate: Fri Jun 16 15:43:23 2017 +0200 Revert "lvconvert: reject changing number of stripes on single core This reverts commit 3719f4bc5441cb5f29ad4beb91ccaa6b234ea8e1 to allow for single core testing on kernels with deadlock fixes relative to rhbz1443999." --- lib/metadata/raid_manip.c | 12 ------------ 1 files changed, 0 insertions(+), 12 deletions(-) diff --git a/lib/metadata/raid_manip.c b/lib/metadata/raid_manip.c index ade27e6..214aa19 100644 --- a/lib/metadata/raid_manip.c +++ b/lib/metadata/raid_manip.c @@ -2346,12 +2346,6 @@ static int _raid_reshape(struct logical_volume *lv, /* Handle disk addition reshaping */ if (old_image_count < new_image_count) { - /* FIXME: remove once MD kernel rhbz1443999 got fixed. */ - if (sysconf(_SC_NPROCESSORS_ONLN) < 2) { - log_error("Can't add stripes to LV %s on single core.", display_lvname(lv)); - return 0; - } - if (!_raid_reshape_add_images(lv, new_segtype, yes, old_image_count, new_image_count, new_stripes, new_stripe_size, allocate_pvs)) @@ -2359,12 +2353,6 @@ static int _raid_reshape(struct logical_volume *lv, /* Handle disk removal reshaping */ } else if (old_image_count > new_image_count) { - /* FIXME: remove once MD kernel rhbz1443999 got fixed. */ - if (sysconf(_SC_NPROCESSORS_ONLN) < 2) { - log_error("Can't remove stripes from LV %s on single core.", display_lvname(lv)); - return 0; - } - if (!_raid_reshape_remove_images(lv, new_segtype, yes, force, old_image_count, new_image_count, new_stripes, new_stripe_size,

6 years, 10 months

1
0
0 / 0

master - test: New test file for validating kernel status during sync ops

by Jonathan Brassow

Gitweb: https://sourceware.org/git/?p=lvm2.git;a=commitdiff;h=3592243afb4bd1b5492... Commit: 3592243afb4bd1b54926b7d8bfef3a0213ca4908 Parent: 6c4b2a6aa16cdb4aff2bd80909dcf43032348a3a Author: Jonathan Brassow <jbrassow(a)redhat.com> AuthorDate: Thu Jun 15 11:06:08 2017 -0500 Committer: Jonathan Brassow <jbrassow(a)redhat.com> CommitterDate: Thu Jun 15 11:06:08 2017 -0500 test: New test file for validating kernel status during sync ops First test in this file checks whether 'aa' is ever spotted during a "recover" operation (it should not be). More tests should follow in this file to look for oddities in status output - especially as it relates to the sync_ratio, dev_health, and sync_action fields. --- test/shell/lvconvert-raid-status-validation.sh | 34 ++++++++++++++++++++++++ 1 files changed, 34 insertions(+), 0 deletions(-) diff --git a/test/shell/lvconvert-raid-status-validation.sh b/test/shell/lvconvert-raid-status-validation.sh new file mode 100644 index 0000000..0da0b7a --- /dev/null +++ b/test/shell/lvconvert-raid-status-validation.sh @@ -0,0 +1,34 @@ +####################################################################### +# This series of tests is meant to validate the correctness of +# 'dmsetup status' for RAID LVs - especially during various sync action +# transitions, like: recover, resync, check, repair, idle, reshape, etc +####################################################################### +SKIP_WITH_LVMLOCKD=1 +SKIP_WITH_LVMPOLLD=1 + +export LVM_TEST_LVMETAD_DEBUG_OPTS=${LVM_TEST_LVMETAD_DEBUG_OPTS-} + +. lib/inittest + +# check for version 1.9.0 +# - it is the point at which linear->raid1 uses "recover" +aux have_raid 1 9 0 || skip + +aux prepare_pvs 9 +vgcreate -s 2m $vg $(cat DEVICES) + +########################################### +# Upconverted RAID1 should never have all 'a's in status output +########################################### +aux delay_dev $dev2 0 100 +lvcreate -aey -l 2 -n $lv1 $vg $dev1 +lvconvert --type raid1 -y -m 1 $vg/$lv1 $dev2 +while ! check in_sync $vg $lv1; do + a=( $(dmsetup status $vg-$lv1) ) || die "Unable to get status of $vg/$lv1" + [ ${a[5]} != "aa" ] + sleep .1 +done +aux enable_dev $dev2 +lvremove -ff $vg + +vgremove -ff $vg

6 years, 10 months

1
0
0 / 0

master - clean-up: Very picky update to comment - hopefully making it clearer

by Jonathan Brassow

Gitweb: https://sourceware.org/git/?p=lvm2.git;a=commitdiff;h=6c4b2a6aa16cdb4aff2... Commit: 6c4b2a6aa16cdb4aff2bd80909dcf43032348a3a Parent: 1f57a5263e3ef2dd95e91ea39a40e45993d31028 Author: Jonathan Brassow <jbrassow(a)redhat.com> AuthorDate: Wed Jun 14 15:22:04 2017 -0500 Committer: Jonathan Brassow <jbrassow(a)redhat.com> CommitterDate: Wed Jun 14 15:22:04 2017 -0500 clean-up: Very picky update to comment - hopefully making it clearer --- lib/metadata/raid_manip.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/lib/metadata/raid_manip.c b/lib/metadata/raid_manip.c index dca5ba3..ade27e6 100644 --- a/lib/metadata/raid_manip.c +++ b/lib/metadata/raid_manip.c @@ -309,7 +309,7 @@ static int _deactivate_and_remove_lvs(struct volume_group *vg, struct dm_list *r "LVs must be set visible before removing."); return 0; } - /* Got to get any cluster lock an SubLVs to be removed. */ + /* Must get a cluster lock on SubLVs that will be removed. */ if (!activate_lv_excl_local(vg->cmd, lvl->lv)) return_0; }

6 years, 10 months

1
0
0 / 0

master - clean-ups: remove unused var, add 'static' for local fn, adjust test

by Jonathan Brassow

Gitweb: https://sourceware.org/git/?p=lvm2.git;a=commitdiff;h=1f57a5263e3ef2dd95e... Commit: 1f57a5263e3ef2dd95e91ea39a40e45993d31028 Parent: ddb14b6b05e0f75a97ab8ab1ed99091268c239ba Author: Jonathan Brassow <jbrassow(a)redhat.com> AuthorDate: Wed Jun 14 14:49:42 2017 -0500 Committer: Jonathan Brassow <jbrassow(a)redhat.com> CommitterDate: Wed Jun 14 14:49:42 2017 -0500 clean-ups: remove unused var, add 'static' for local fn, adjust test For the test clean-up, I was providing too many devices to the first command - possibly allowing it to allocate in the wrong place. I was also not providing a device for the second command - virtually ensuring the test was not performing correctly at times. --- lib/metadata/raid_manip.c | 3 +-- test/shell/lvconvert-raid.sh | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/metadata/raid_manip.c b/lib/metadata/raid_manip.c index 52def78..dca5ba3 100644 --- a/lib/metadata/raid_manip.c +++ b/lib/metadata/raid_manip.c @@ -2877,7 +2877,6 @@ static int _raid_allow_extraction(struct logical_volume *lv, char *dev_health; char *sync_action; struct lv_segment *seg = first_seg(lv); - struct cmd_context *cmd = lv->vg->cmd; /* If in-sync or hanlding repairs, allow to proceed. */ if (_raid_in_sync(lv) || lv->vg->cmd->handles_missing_pvs) @@ -6510,7 +6509,7 @@ has_enough_space: * Returns: 1 if the state is detected, 0 otherwise. * FIXME: would be better to return -1,0,1 to allow error report. */ -int _lv_raid_has_primary_failure_on_recover(struct logical_volume *lv) +static int _lv_raid_has_primary_failure_on_recover(struct logical_volume *lv) { char *tmp_dev_health; char *tmp_sync_action; diff --git a/test/shell/lvconvert-raid.sh b/test/shell/lvconvert-raid.sh index fba7864..6c05c67 100644 --- a/test/shell/lvconvert-raid.sh +++ b/test/shell/lvconvert-raid.sh @@ -228,9 +228,9 @@ done # - DO allow removal of secondaries while syncing ########################################### aux delay_dev $dev2 0 100 -lvcreate -aey -l 2 -n $lv1 $vg $dev1 $dev2 +lvcreate -aey -l 2 -n $lv1 $vg $dev1 lvconvert -y -m 1 $vg/$lv1 \ - --config 'global { mirror_segtype_default = "raid1" }' + --config 'global { mirror_segtype_default = "raid1" }' $dev2 lvs --noheadings -o attr $vg/$lv1 | grep '^[[:space:]]*r' not lvconvert --yes -m 0 $vg/$lv1 $dev1 lvconvert --yes -m 0 $vg/$lv1 $dev2

6 years, 10 months

1
0
0 / 0

master - lvconvert: Disallow removal of primary when up-converting (recovering)

by Jonathan Brassow

Gitweb: https://sourceware.org/git/?p=lvm2.git;a=commitdiff;h=ddb14b6b05e0f75a97a... Commit: ddb14b6b05e0f75a97ab8ab1ed99091268c239ba Parent: 4c0e908b0ac012a5517e61420ea1eccc6193c00a Author: Jonathan Brassow <jbrassow(a)redhat.com> AuthorDate: Wed Jun 14 08:41:05 2017 -0500 Committer: Jonathan Brassow <jbrassow(a)redhat.com> CommitterDate: Wed Jun 14 08:41:05 2017 -0500 lvconvert: Disallow removal of primary when up-converting (recovering) This patch ensures that under normal conditions (i.e. not during repair operations) that users are prevented from removing devices that would cause data loss. When a RAID1 is undergoing its initial sync, it is ok to remove all but one of the images because they have all existed since creation and contain all the data written since the array was created. OTOH, if the RAID1 was created as a result of an up-convert from linear, it is very important not to let the user remove the primary image (the source of all the data). They should be allowed to remove any devices they want and as many as they want as long as one original (primary) device is left during a "recover" (aka up-convert). This fixes bug 1461187 and includes the necessary regression tests. --- lib/metadata/raid_manip.c | 85 ++++++++++++++++++++++++++++++++++++++++++ test/shell/lvconvert-raid.sh | 61 ++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 0 deletions(-) diff --git a/lib/metadata/raid_manip.c b/lib/metadata/raid_manip.c index 0925594..52def78 100644 --- a/lib/metadata/raid_manip.c +++ b/lib/metadata/raid_manip.c @@ -2862,6 +2862,87 @@ static int _extract_image_components(struct lv_segment *seg, uint32_t idx, } /* + * _raid_allow_extraction + * @lv + * @extract_count + * @target_pvs + * + * returns: 0 if no, 1 if yes + */ +static int _raid_allow_extraction(struct logical_volume *lv, + int extract_count, + struct dm_list *target_pvs) +{ + int s, redundancy = 0; + char *dev_health; + char *sync_action; + struct lv_segment *seg = first_seg(lv); + struct cmd_context *cmd = lv->vg->cmd; + + /* If in-sync or hanlding repairs, allow to proceed. */ + if (_raid_in_sync(lv) || lv->vg->cmd->handles_missing_pvs) + return 1; + + /* + * FIXME: + * Right now, we are primarily concerned with down-converting of + * RAID1 LVs, but parity RAIDs and RAID10 will also have to be + * considered. + * (e.g. It would not be good to allow extracting a dev from a + * stripe set while upconverting to RAID5/6.) + */ + if (!segtype_is_raid1(seg->segtype)) + return 1; + + /* + * We can allow extracting images if the array is performing a + * sync operation as long as it is "recover" and the image is not + * a primary image or if "resync". + */ + if (!lv_raid_sync_action(lv, &sync_action) || + !lv_raid_dev_health(lv, &dev_health)) + return_0; + + if (!strcmp("idle", sync_action)) { + log_error(INTERNAL_ERROR + "RAID LV should not be out-of-sync and \"idle\""); + return 0; + } + + if (!strcmp("resync", sync_action)) + return 1; + + /* If anything other than "recover" */ + if (strcmp("recover", sync_action)) { + log_error("Unable to remove RAID image while array" + " is performing \"%s\"", sync_action); + return 0; + } + + if (seg->area_count != strlen(dev_health)) { + log_error(INTERNAL_ERROR + "RAID LV area_count differs from number of health characters"); + return 0; + } + + for (s = 0; s < seg->area_count; s++) + if (dev_health[s] == 'A') + redundancy++; + + for (s = 0; (s < seg->area_count) && extract_count; s++) { + if (!lv_is_on_pvs(seg_lv(seg, s), target_pvs) && + !lv_is_on_pvs(seg_metalv(seg, s), target_pvs)) + continue; + if ((dev_health[s] == 'A') && !--redundancy) { + log_error("Unable to remove all primary source devices"); + return 0; + } + extract_count--; + } + return 1; +} + +/* * _raid_extract_images * @lv * @force: force a replacement in case of primary mirror leg @@ -2892,6 +2973,10 @@ static int _raid_extract_images(struct logical_volume *lv, struct segment_type *error_segtype; extract = seg->area_count - new_count; + + if (!_raid_allow_extraction(lv, extract, target_pvs)) + return_0; + log_verbose("Extracting %u %s from %s.", extract, (extract > 1) ? "images" : "image", display_lvname(lv)); diff --git a/test/shell/lvconvert-raid.sh b/test/shell/lvconvert-raid.sh index e173d66..fba7864 100644 --- a/test/shell/lvconvert-raid.sh +++ b/test/shell/lvconvert-raid.sh @@ -223,6 +223,67 @@ for i in 1 2 3 ; do done ########################################### +# Upconverted RAID1 should not allow loss of primary +# - don't allow removal of primary while syncing +# - DO allow removal of secondaries while syncing +########################################### +aux delay_dev $dev2 0 100 +lvcreate -aey -l 2 -n $lv1 $vg $dev1 $dev2 +lvconvert -y -m 1 $vg/$lv1 \ + --config 'global { mirror_segtype_default = "raid1" }' +lvs --noheadings -o attr $vg/$lv1 | grep '^[[:space:]]*r' +not lvconvert --yes -m 0 $vg/$lv1 $dev1 +lvconvert --yes -m 0 $vg/$lv1 $dev2 +aux enable_dev $dev2 +lvremove -ff $vg + +########################################### +# lvcreated RAID1 should allow all down-conversion +# - DO allow removal of primary while syncing +# - DO allow removal of secondaries while syncing +########################################### +aux delay_dev $dev2 0 100 +lvcreate --type raid1 -m 2 -aey -l 2 -n $lv1 $vg $dev1 $dev2 $dev3 +lvconvert --yes -m 1 $vg/$lv1 $dev3 +lvconvert --yes -m 0 $vg/$lv1 $dev1 +aux enable_dev $dev2 +lvremove -ff $vg + +########################################### +# Converting from 2-way RAID1 to 3-way +# - DO allow removal of one of primary sources +# - Do not allow removal of all primary sources +########################################### +lvcreate --type raid1 -m 1 -aey -l 2 -n $lv1 $vg $dev1 $dev2 +aux wait_for_sync $vg $lv1 +aux delay_dev $dev3 0 100 +lvconvert --yes -m +1 $vg/$lv1 $dev3 +# should allow 1st primary to be removed +lvconvert --yes -m -1 $vg/$lv1 $dev1 +# should NOT allow last primary to be removed +not lvconvert --yes -m -1 $vg/$lv1 $dev2 +# should allow non-primary to be removed +lvconvert --yes -m 0 $vg/$lv1 $dev3 +aux enable_dev $dev3 +lvremove -ff $vg + +########################################### +# Converting from 2-way RAID1 to 3-way +# - Should allow removal of two devices, +# as long as they aren't both primary +########################################### +lvcreate --type raid1 -m 1 -aey -l 2 -n $lv1 $vg $dev1 $dev2 +aux wait_for_sync $vg $lv1 +aux delay_dev $dev3 0 100 +lvconvert --yes -m +1 $vg/$lv1 $dev3 +# should NOT allow both primaries to be removed +not lvconvert -m 0 $vg/$lv1 $dev1 $dev2 +# should allow primary + non-primary +lvconvert --yes -m 0 $vg/$lv1 $dev1 $dev3 +aux enable_dev $dev3 +lvremove -ff $vg + +########################################### # Device Replacement Testing ########################################### # RAID1: Replace up to n-1 devices - trying different combinations

6 years, 10 months

1
0
0 / 0

master - RAID (lvconvert/dmeventd): Cleanly handle primary failure during 'recover' op

by Jonathan Brassow

Gitweb: https://sourceware.org/git/?p=lvm2.git;a=commitdiff;h=4c0e908b0ac012a5517... Commit: 4c0e908b0ac012a5517e61420ea1eccc6193c00a Parent: d34d2068ddf20b7d683ee06205c31ec673b32813 Author: Jonathan Brassow <jbrassow(a)redhat.com> AuthorDate: Wed Jun 14 08:39:50 2017 -0500 Committer: Jonathan Brassow <jbrassow(a)redhat.com> CommitterDate: Wed Jun 14 08:39:50 2017 -0500 RAID (lvconvert/dmeventd): Cleanly handle primary failure during 'recover' op Add the checks necessary to distiguish the state of a RAID when the primary source for syncing fails during the "recover" process. It has been possible to hit this condition before (like when converting from 2-way RAID1 to 3-way and having the first two devices die during the "recover" process). However, this condition is now more likely since we treat linear -> RAID1 conversions as "recover" now - so it is especially important we cleanly handle this condition. --- daemons/dmeventd/plugins/raid/dmeventd_raid.c | 16 +++++++ lib/metadata/raid_manip.c | 60 +++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 0 deletions(-) diff --git a/daemons/dmeventd/plugins/raid/dmeventd_raid.c b/daemons/dmeventd/plugins/raid/dmeventd_raid.c index 4f204bf..afeac28 100644 --- a/daemons/dmeventd/plugins/raid/dmeventd_raid.c +++ b/daemons/dmeventd/plugins/raid/dmeventd_raid.c @@ -58,6 +58,22 @@ static int _process_raid_event(struct dso_state *state, char *params, const char dead = 1; } + /* + * if we are converting from non-RAID to RAID (e.g. linear -> raid1) + * and too many original devices die, such that we cannot continue + * the "recover" operation, the sync action will go to "idle", the + * unsynced devs will remain at 'a', and the original devices will + * NOT SWITCH TO 'D', but will remain at 'A' - hoping to be revived. + * + * This is simply the way the kernel works... + */ + if (!strcmp(status->sync_action, "idle") && + strchr(status->dev_health, 'a')) { + log_error("Primary sources for new RAID, %s, have failed.", + device); + dead = 1; /* run it through LVM repair */ + } + if (dead) { if (status->insync_regions < status->total_regions) { if (!state->warned) { diff --git a/lib/metadata/raid_manip.c b/lib/metadata/raid_manip.c index ac0b8f1..0925594 100644 --- a/lib/metadata/raid_manip.c +++ b/lib/metadata/raid_manip.c @@ -6408,6 +6408,39 @@ has_enough_space: } /* + * _lv_raid_has_primary_failure_on_recover + * @lv + * + * The kernel behaves strangely in the presense of a primary failure + * during a "recover" sync operation. It's not technically a bug, I + * suppose, but the output of the status line can make it difficult + * to determine that we are in this state. The sync ratio will be + * 100% and the sync action will be "idle", but the health characters + * will be e.g. "Aaa" or "Aa", where the 'A' is the dead + * primary source that cannot be marked dead by the kernel b/c + * it is the only source for the remainder of data. + * + * This function helps to detect that condition. + * + * Returns: 1 if the state is detected, 0 otherwise. + * FIXME: would be better to return -1,0,1 to allow error report. + */ +int _lv_raid_has_primary_failure_on_recover(struct logical_volume *lv) +{ + char *tmp_dev_health; + char *tmp_sync_action; + + if (!lv_raid_sync_action(lv, &tmp_sync_action) || + !lv_raid_dev_health(lv, &tmp_dev_health)) + return_0; + + if (!strcmp(tmp_sync_action, "idle") && strchr(tmp_dev_health, 'a')) + return 1; + + return 0; +} + +/* * Helper: * * _lv_raid_rebuild_or_replace @@ -6458,11 +6491,38 @@ static int _lv_raid_rebuild_or_replace(struct logical_volume *lv, } if (!_raid_in_sync(lv)) { + /* + * FIXME: There is a bug in the kernel that prevents 'rebuild' + * from being specified when the array is not in-sync. + * There are conditions where this should be allowed, + * but only when we are doing a repair - as indicated by + * 'lv->vg->cmd->handles_missing_pvs'. The above + * conditional should be: + (!lv->vg->cmd->handles_missing_pvs && !_raid_in_sync(lv)) + */ log_error("Unable to replace devices in %s while it is " "not in-sync.", display_lvname(lv)); return 0; } + if (_lv_raid_has_primary_failure_on_recover(lv)) { + /* + * I hate having multiple error lines, but this + * seems to work best for syslog and CLI. + */ + log_error("Unable to repair %s/%s. Source devices failed" + " before the RAID could synchronize.", + lv->vg->name, lv->name); + log_error("You should choose one of the following:"); + log_error(" 1) deactivate %s/%s, revive failed " + "device, re-activate LV, and proceed.", + lv->vg->name, lv->name); + log_error(" 2) remove the LV (all data is lost)."); + log_error(" 3) Seek expert advice to attempt to salvage any" + " data from remaining devices."); + return 0; + } + /* * How many sub-LVs are being removed? */

6 years, 10 months

1
0
0 / 0

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

lvm2-commits June 2017