[mdadm/f15: 2/3] Fix problem with 0.9 metadata and > 2TB drives + systemd script fix

Jes Sorensen jsorensen at fedoraproject.org
Wed Nov 9 17:54:35 UTC 2011


commit d01dbca1865fd15b2af2b0e241d3fb35aab5ef6c
Author: Jes Sorensen <Jes.Sorensen at redhat.com>
Date:   Wed Nov 9 17:58:29 2011 +0100

    Fix problem with 0.9 metadata and > 2TB drives + systemd script fix
    
    Signed-off-by: Jes Sorensen <Jes.Sorensen at redhat.com>

 ...large-devices-from-being-added-to-0.90-ar.patch |  163 ++++++++++++++++++++
 ...-component-size-checks-in-validate_super0.patch |   76 +++++++++
 ...ow-refuse-to-grow-a-0.90-array-beyond-2TB.patch |   77 +++++++++
 ...uper0-fix-overflow-when-checking-max-size.patch |   29 ++++
 mdadm.spec                                         |   19 ++-
 5 files changed, 362 insertions(+), 2 deletions(-)
---
diff --git a/mdadm-3.2.2-Discourage-large-devices-from-being-added-to-0.90-ar.patch b/mdadm-3.2.2-Discourage-large-devices-from-being-added-to-0.90-ar.patch
new file mode 100644
index 0000000..b3884e1
--- /dev/null
+++ b/mdadm-3.2.2-Discourage-large-devices-from-being-added-to-0.90-ar.patch
@@ -0,0 +1,163 @@
+From 11b391ece9fa284a151362537af093aa44883696 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb at suse.de>
+Date: Thu, 8 Sep 2011 13:05:31 +1000
+Subject: [PATCH 2/3] Discourage large devices from being added to 0.90
+ arrays.
+
+0.90 arrays can only use up to 4TB per device.  So when a larger
+device is added, complain a bit.  Still allow it if --force is given
+as there could be a valid use.
+
+Signed-off-by: NeilBrown <neilb at suse.de>
+---
+ Grow.c        |    2 +-
+ Incremental.c |    8 ++++----
+ Manage.c      |   29 +++++++++++++++++++++++++----
+ mdadm.c       |    3 ++-
+ mdadm.h       |    2 +-
+ 5 files changed, 33 insertions(+), 11 deletions(-)
+
+diff --git a/Grow.c b/Grow.c
+index 1aab113..048351d 100644
+--- a/Grow.c
++++ b/Grow.c
+@@ -1893,7 +1893,7 @@ static int reshape_array(char *container, int fd, char *devname,
+ 	 */
+ 	if (devlist)
+ 		Manage_subdevs(devname, fd, devlist, !quiet,
+-			       0,NULL);
++			       0,NULL, 0);
+ 
+ 	if (reshape.backup_blocks == 0) {
+ 		/* No restriping needed, but we might need to impose
+diff --git a/Incremental.c b/Incremental.c
+index 09cdd9b..791ad85 100644
+--- a/Incremental.c
++++ b/Incremental.c
+@@ -1035,7 +1035,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
+ 			close(dfd);
+ 			*dfdp = -1;
+ 			rv =  Manage_subdevs(chosen->sys_name, mdfd, &devlist,
+-					     -1, 0, NULL);
++					     -1, 0, NULL, 0);
+ 			close(mdfd);
+ 		}
+ 		if (verbose > 0) {
+@@ -1666,15 +1666,15 @@ int IncrementalRemove(char *devname, char *id_path, int verbose)
+ 				if (subfd >= 0) {
+ 					Manage_subdevs(memb->dev, subfd,
+ 						       &devlist, verbose, 0,
+-						       NULL);
++						       NULL, 0);
+ 					close(subfd);
+ 				}
+ 			}
+ 		free_mdstat(mdstat);
+ 	} else
+-		Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0, NULL);
++		Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0, NULL, 0);
+ 	devlist.disposition = 'r';
+-	rv = Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0, NULL);
++	rv = Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0, NULL, 0);
+ 	close(mdfd);
+ 	free_mdstat(ent);
+ 	return rv;
+diff --git a/Manage.c b/Manage.c
+index 66d6978..1b2b75a 100644
+--- a/Manage.c
++++ b/Manage.c
+@@ -371,7 +371,7 @@ int Manage_resize(char *devname, int fd, long long size, int raid_disks)
+ 
+ int Manage_subdevs(char *devname, int fd,
+ 		   struct mddev_dev *devlist, int verbose, int test,
+-		   char *update)
++		   char *update, int force)
+ {
+ 	/* do something to each dev.
+ 	 * devmode can be
+@@ -632,6 +632,27 @@ int Manage_subdevs(char *devname, int fd,
+ 				continue;
+ 			}
+ 
++			if (tst->ss->validate_geometry(
++				    tst, array.level, array.layout,
++				    array.raid_disks, NULL,
++				    ldsize >> 9, NULL, NULL, 0) == 0) {
++				if (!force) {
++					fprintf(stderr, Name
++						": %s is larger than %s can "
++						"effectively use.\n"
++						"       Add --force is you "
++						"really wan to add this device.\n",
++						add_dev, devname);
++					close(tfd);
++					return 1;
++				}
++				fprintf(stderr, Name
++					": %s is larger than %s can "
++					"effectively use.\n"
++					"       Adding anyway as --force "
++					"was given.\n",
++					add_dev, devname);
++			}
+ 			if (!tst->ss->external &&
+ 			    array.major_version == 0 &&
+ 			    md_get_version(fd)%100 < 2) {
+@@ -1188,9 +1209,9 @@ int move_spare(char *from_devname, char *to_devname, dev_t devid)
+ 	sprintf(devname, "%d:%d", major(devid), minor(devid));
+ 
+ 	devlist.disposition = 'r';
+-	if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL) == 0) {
++	if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0) == 0) {
+ 		devlist.disposition = 'a';
+-		if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0, NULL) == 0) {
++		if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0, NULL, 0) == 0) {
+ 			/* make sure manager is aware of changes */
+ 			ping_manager(to_devname);
+ 			ping_manager(from_devname);
+@@ -1198,7 +1219,7 @@ int move_spare(char *from_devname, char *to_devname, dev_t devid)
+ 			close(fd2);
+ 			return 1;
+ 		}
+-		else Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL);
++		else Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0);
+ 	}
+ 	close(fd1);
+ 	close(fd2);
+diff --git a/mdadm.c b/mdadm.c
+index fb51051..4b817ab 100644
+--- a/mdadm.c
++++ b/mdadm.c
+@@ -609,6 +609,7 @@ int main(int argc, char *argv[])
+ 		case O(ASSEMBLE,Force): /* force assembly */
+ 		case O(MISC,'f'): /* force zero */
+ 		case O(MISC,Force): /* force zero */
++		case O(MANAGE,Force): /* add device which is too large */
+ 			force=1;
+ 			continue;
+ 
+@@ -1202,7 +1203,7 @@ int main(int argc, char *argv[])
+ 		if (!rv && devs_found>1)
+ 			rv = Manage_subdevs(devlist->devname, mdfd,
+ 					    devlist->next, verbose-quiet, test,
+-					    update);
++					    update, force);
+ 		if (!rv && readonly < 0)
+ 			rv = Manage_ro(devlist->devname, mdfd, readonly);
+ 		if (!rv && runstop)
+diff --git a/mdadm.h b/mdadm.h
+index 8bd0077..d616966 100644
+--- a/mdadm.h
++++ b/mdadm.h
+@@ -1010,7 +1010,7 @@ extern int Manage_runstop(char *devname, int fd, int runstop, int quiet);
+ extern int Manage_resize(char *devname, int fd, long long size, int raid_disks);
+ extern int Manage_subdevs(char *devname, int fd,
+ 			  struct mddev_dev *devlist, int verbose, int test,
+-			  char *update);
++			  char *update, int force);
+ extern int autodetect(void);
+ extern int Grow_Add_device(char *devname, int fd, char *newdev);
+ extern int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int write_behind, int force);
+-- 
+1.7.6.4
+
diff --git a/mdadm-3.2.2-Fix-component-size-checks-in-validate_super0.patch b/mdadm-3.2.2-Fix-component-size-checks-in-validate_super0.patch
new file mode 100644
index 0000000..5206d40
--- /dev/null
+++ b/mdadm-3.2.2-Fix-component-size-checks-in-validate_super0.patch
@@ -0,0 +1,76 @@
+From 01619b481883926f13da2b1b88f3125359a6a08b Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb at suse.de>
+Date: Thu, 8 Sep 2011 12:20:36 +1000
+Subject: [PATCH 1/3] Fix component size checks in validate_super0.
+
+A 0.90 array can use at most 4TB of each device - 2TB between
+2.6.39 and 3.1 due to a kernel bug.
+
+The test for this in validate_super0 is very wrong.  'size' is sectors
+and the number it is compared against is just confusing.
+
+So fix it all up and correct the spelling of terabytes and remove
+a second redundant test on 'size'.
+
+Signed-off-by: NeilBrown <neilb at suse.de>
+---
+ super0.c |   14 ++++++++++----
+ util.c   |    2 +-
+ 2 files changed, 11 insertions(+), 5 deletions(-)
+
+diff --git a/super0.c b/super0.c
+index 4a165f9..62c4ff0 100644
+--- a/super0.c
++++ b/super0.c
+@@ -1115,6 +1115,13 @@ static int validate_geometry0(struct supertype *st, int level,
+ {
+ 	unsigned long long ldsize;
+ 	int fd;
++	unsigned int tbmax = 4;
++
++	/* prior to linux 3.1, a but limits usable device size to 2TB.
++	 * It was introduced in 2.6.29, but we won't worry about that detail
++	 */
++	if (get_linux_version() < 3001000)
++		tbmax = 2;
+ 
+ 	if (level == LEVEL_CONTAINER) {
+ 		if (verbose)
+@@ -1127,9 +1134,10 @@ static int validate_geometry0(struct supertype *st, int level,
+ 				MD_SB_DISKS);
+ 		return 0;
+ 	}
+-	if (size > (0x7fffffffULL<<9)) {
++	if (size >= tbmax * 1024*1024*1024*2ULL) {
+ 		if (verbose)
+-			fprintf(stderr, Name ": 0.90 metadata supports at most 2 terrabytes per device\n");
++			fprintf(stderr, Name ": 0.90 metadata supports at most "
++				"%d terabytes per device\n", tbmax);
+ 		return 0;
+ 	}
+ 	if (chunk && *chunk == UnSet)
+@@ -1154,8 +1162,6 @@ static int validate_geometry0(struct supertype *st, int level,
+ 
+ 	if (ldsize < MD_RESERVED_SECTORS * 512)
+ 		return 0;
+-	if (size > (0x7fffffffULL<<9))
+-		return 0;
+ 	*freesize = MD_NEW_SIZE_SECTORS(ldsize >> 9);
+ 	return 1;
+ }
+diff --git a/util.c b/util.c
+index e68d55f..0ea7e0d 100644
+--- a/util.c
++++ b/util.c
+@@ -640,7 +640,7 @@ char *human_size(long long bytes)
+ 	 * We allow upto 2048Megabytes before converting to
+ 	 * gigabytes, as that shows more precision and isn't
+ 	 * too large a number.
+-	 * Terrabytes are not yet handled.
++	 * Terabytes are not yet handled.
+ 	 */
+ 
+ 	if (bytes < 5000*1024)
+-- 
+1.7.6.4
+
diff --git a/mdadm-3.2.2-Grow-refuse-to-grow-a-0.90-array-beyond-2TB.patch b/mdadm-3.2.2-Grow-refuse-to-grow-a-0.90-array-beyond-2TB.patch
new file mode 100644
index 0000000..4785af9
--- /dev/null
+++ b/mdadm-3.2.2-Grow-refuse-to-grow-a-0.90-array-beyond-2TB.patch
@@ -0,0 +1,77 @@
+From 20a4675688e0384a1b4eac61b05f60fbf7747df9 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb at suse.de>
+Date: Thu, 8 Sep 2011 13:08:51 +1000
+Subject: [PATCH 3/3] Grow: refuse to grow a 0.90 array beyond 2TB
+
+A kernel bug makes handling for arrays using more than 2TB per device
+incorrect, and the kernel doesn't stop an array from growing beyond
+any limit.
+This is fixed in 3.1
+
+So prior to 3.1, make sure not to ask for an array to grow bigger than
+2TB per device.
+
+Signed-off-by: NeilBrown <neilb at suse.de>
+---
+ Grow.c |   37 ++++++++++++++++++++++++++++++++++---
+ 1 files changed, 34 insertions(+), 3 deletions(-)
+
+diff --git a/Grow.c b/Grow.c
+index 048351d..17d14b6 100644
+--- a/Grow.c
++++ b/Grow.c
+@@ -1446,6 +1446,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
+ 	/* ========= set size =============== */
+ 	if (size >= 0 && (size == 0 || size != array.size)) {
+ 		long long orig_size = get_component_size(fd)/2;
++		long long min_csize;
+ 		struct mdinfo *mdi;
+ 
+ 		if (orig_size == 0)
+@@ -1461,10 +1462,40 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
+ 		/* Update the size of each member device in case
+ 		 * they have been resized.  This will never reduce
+ 		 * below the current used-size.  The "size" attribute
+-		 * understand '0' to mean 'max'.
++		 * understands '0' to mean 'max'.
+ 		 */
+-		for (mdi = sra->devs; mdi; mdi = mdi->next)
+-			sysfs_set_num(sra, mdi, "size", size);
++		min_csize = 0;
++		for (mdi = sra->devs; mdi; mdi = mdi->next) {
++			if (sysfs_set_num(sra, mdi, "size", size) < 0)
++				break;
++			if (array.not_persistent == 0 &&
++			    array.major_version == 0 &&
++			    get_linux_version() < 3001000) {
++				/* Dangerous to allow size to exceed 2TB */
++				unsigned long long csize;
++				if (sysfs_get_ll(sra, mdi, "size", &csize) == 0) {
++					if (csize >= 2ULL*1024*1024*1024)
++						csize = 2ULL*1024*1024*1024;
++					if ((min_csize == 0 || (min_csize
++								> (long long)csize)))
++						min_csize = csize;
++				}
++			}
++		}
++		if (min_csize && size > min_csize) {
++			fprintf(stderr, Name ": Cannot safely make this array "
++				"use more than 2TB per device on this kernel.\n");
++			rv = 1;
++			goto release;
++		}
++		if (min_csize && size == 0) {
++			/* Don't let the kernel choose a size - it will get
++			 * it wrong
++			 */
++			fprintf(stderr, Name ": Limited v0.90 array to "
++				"2TB per device\n");
++			size = min_csize;
++		}
+ 
+ 		array.size = size;
+ 		if (array.size != size) {
+-- 
+1.7.6.4
+
diff --git a/mdadm-3.2.2-super0-fix-overflow-when-checking-max-size.patch b/mdadm-3.2.2-super0-fix-overflow-when-checking-max-size.patch
new file mode 100644
index 0000000..5839509
--- /dev/null
+++ b/mdadm-3.2.2-super0-fix-overflow-when-checking-max-size.patch
@@ -0,0 +1,29 @@
+From 9c8c121881769c9ce77fd7d981608c976aac8b5b Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb at suse.de>
+Date: Thu, 20 Oct 2011 13:14:26 +1100
+Subject: [PATCH] super0: fix overflow when checking max size.
+
+We need to force multiplication to use ULL before they
+get to big, else it overflows.  So move the "2ULL" to the start.
+
+Signed-off-by: NeilBrown <neilb at suse.de>
+---
+ super0.c |    2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/super0.c b/super0.c
+index 3061ecf..f3d0c07 100644
+--- a/super0.c
++++ b/super0.c
+@@ -1142,7 +1142,7 @@ static int validate_geometry0(struct supertype *st, int level,
+ 				MD_SB_DISKS);
+ 		return 0;
+ 	}
+-	if (size >= tbmax * 1024*1024*1024*2ULL) {
++	if (size >= tbmax * 2ULL*1024*1024*1024) {
+ 		if (verbose)
+ 			fprintf(stderr, Name ": 0.90 metadata supports at most "
+ 				"%d terabytes per device\n", tbmax);
+-- 
+1.7.6.4
+
diff --git a/mdadm.spec b/mdadm.spec
index ed01a2e..ef7215c 100644
--- a/mdadm.spec
+++ b/mdadm.spec
@@ -1,7 +1,7 @@
 Summary:     The mdadm program controls Linux md devices (software RAID arrays)
 Name:        mdadm
 Version:     3.2.2
-Release:     13%{?dist}
+Release:     14%{?dist}
 Source:      http://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}.tar.bz2
 Source1:     mdmonitor.init
 Source2:     raid-check
@@ -21,6 +21,10 @@ Patch6:      mdadm-3.2.2-resync-does-not-continue-after-auto.patch
 Patch7:      mdadm-3.2.2-Remove-race-for-starting-container-devices.patch
 Patch8:      mdadm-3.2.2-no-sysfs-launch-container.patch
 Patch9:      mdadm-3.2.2-hold-lock-during-assemble.patch
+Patch10:     mdadm-3.2.2-Fix-component-size-checks-in-validate_super0.patch
+Patch11:     mdadm-3.2.2-Discourage-large-devices-from-being-added-to-0.90-ar.patch
+Patch12:     mdadm-3.2.2-Grow-refuse-to-grow-a-0.90-array-beyond-2TB.patch
+Patch13:     mdadm-3.2.2-super0-fix-overflow-when-checking-max-size.patch
 Patch19:     mdadm-3.1.3-udev.patch
 Patch20:     mdadm-2.5.2-static.patch
 URL:         http://www.kernel.org/pub/linux/utils/raid/mdadm/
@@ -65,6 +69,10 @@ is not used as the system init process.
 %patch7 -p1 -b .container
 %patch8 -p1 -b .sysfs
 %patch9 -p1 -b .assemble
+%patch10 -p1 -b .component
+%patch11 -p1 -b .discourage
+%patch12 -p1 -b .grow
+%patch13 -p1 -b .overflow
 %patch19 -p1 -b .udev
 %patch20 -p1 -b .static
 
@@ -102,7 +110,7 @@ fi
 
 %preun
 if [ $1 = 0 ]; then
-    /bin/systemctl --no-reload mdmonitor.service > /dev/null 2>&1 || :
+    /bin/systemctl --no-reload disable mdmonitor.service > /dev/null 2>&1 || :
     /bin/systemctl stop mdmonitor.service > /dev/null 2>&1 || :
 fi
 
@@ -139,6 +147,13 @@ fi
 %{_initrddir}/*
 
 %changelog
+* Wed Nov 9 2011 Jes Sorensen <Jes.Sorensen at redhat.com> - 3.2.2-14
+- Backport upstream fixes to prevent growing v0.90 metadata raid out
+  of supported size.
+- Add missing 'disable' argument to systemctl in preun script
+- Resolves: bz735306 (Fedora 15) bz748731 (Fedora 16) bz748732 (rawhide),
+  Resolves: bz751716
+
 * Wed Oct 26 2011 Fedora Release Engineering <rel-eng at lists.fedoraproject.org> - 3.2.2-13
 - Rebuilt for glibc bug#747377
 


More information about the scm-commits mailing list