[mdadm/f15: 1/2] Backport memory leak fix for RAID5, and fix for mounting device during reshape

Tue Nov 22 10:08:06 UTC 2011

commit 82fbb33bf7748d0b21e0e36756345111dd8c511d
Author: Jes Sorensen <Jes.Sorensen at redhat.com>
Date:   Mon Nov 21 17:01:10 2011 +0100

    Backport memory leak fix for RAID5, and fix for mounting device during reshape
    
    Resolves: bz755005 bz755009
    
    Signed-off-by: Jes Sorensen <Jes.Sorensen at redhat.com>

 ...nused-handle-in-child-process-during-resh.patch |   84 +++++++++++++++
 mdadm-3.2.2-Fix-serious-memory-leak.patch          |  113 ++++++++++++++++++++
 mdadm.spec                                         |   13 ++-
 3 files changed, 209 insertions(+), 1 deletions(-)
---

diff --git a/mdadm-3.2.2-FIX-Close-unused-handle-in-child-process-during-resh.patch b/mdadm-3.2.2-FIX-Close-unused-handle-in-child-process-during-resh.patch
new file mode 100644
index 0000000..127b076
--- /dev/null
+++ b/mdadm-3.2.2-FIX-Close-unused-handle-in-child-process-during-resh.patch
@@ -0,0 +1,84 @@
+From 9ad6f6e65a535f77f180e87393043a8ffcfb30d8 Mon Sep 17 00:00:00 2001
+From: Adam Kwolek <adam.kwolek at intel.com>
+Date: Wed, 26 Oct 2011 18:16:55 +0200
+Subject: [PATCH] FIX: Close unused handle in child process during reshape
+ restart
+
+When array reshape (e.g. raid0->raid5 migration) is restarted during
+array assembly, file system placed on this array cannot be mounted until
+reshape is finished due to "busy" error.
+
+This is caused when reshape is executed on array for external metadata
+and array handle is cloned /forked/ to child process environment but not
+closed.
+
+Handle can't be closed before executing Grow_continue() because it is
+used later in code.
+
+Close unused handle in child process /reshape_container()/.
+It is similar to close fd handle in reshape_array() before calling
+manage_reshape()/child_monitor() in Grow.c:2290.
+
+Signed-off-by: Adam Kwolek <adam.kwolek at intel.com>
+Signed-off-by: NeilBrown <neilb at suse.de>
+---
+ Grow.c |   13 +++++++++++--
+ 1 files changed, 11 insertions(+), 2 deletions(-)
+
+diff --git a/Grow.c b/Grow.c
+index 0e4dd10..93a69fd 100644
+--- a/Grow.c
++++ b/Grow.c
+@@ -1350,6 +1350,7 @@ static int reshape_array(char *container, int fd, char *devname,
+ 			 char *backup_file, int quiet, int forked,
+ 			 int restart);
+ static int reshape_container(char *container, char *devname,
++			     int mdfd,
+ 			     struct supertype *st, 
+ 			     struct mdinfo *info,
+ 			     int force,
+@@ -1768,7 +1769,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
+ 		 * number of devices (On-Line Capacity Expansion) must be
+ 		 * performed at the level of the container
+ 		 */
+-		rv = reshape_container(container, devname, st, &info,
++		rv = reshape_container(container, devname, -1, st, &info,
+ 				       force, backup_file, quiet, 0);
+ 		frozen = 0;
+ 	} else {
+@@ -2403,7 +2404,10 @@ release:
+ 	return 1;
+ }
+ 
++/* mdfd handle is passed to be closed in child process (after fork).
++ */
+ int reshape_container(char *container, char *devname,
++		      int mdfd,
+ 		      struct supertype *st, 
+ 		      struct mdinfo *info,
+ 		      int force,
+@@ -2446,6 +2450,11 @@ int reshape_container(char *container, char *devname,
+ 		break;
+ 	}
+ 
++	/* close unused handle in child process
++	 */
++	if (mdfd > -1)
++		close(mdfd);
++
+ 	while(1) {
+ 		/* For each member array with reshape_active,
+ 		 * we need to perform the reshape.
+@@ -3571,7 +3580,7 @@
+ 				return 1;
+ 			st->ss->load_container(st, cfd, container);
+ 			close(cfd);
+-			return reshape_container(container, NULL,
++			return reshape_container(container, NULL, mdfd,
+ 						 st, info, 0, backup_file,
+ 						 0, 1);
+ 		}
+
+-- 
+1.7.7.3
+
diff --git a/mdadm-3.2.2-Fix-serious-memory-leak.patch b/mdadm-3.2.2-Fix-serious-memory-leak.patch
new file mode 100644
index 0000000..4b4137d
--- /dev/null
+++ b/mdadm-3.2.2-Fix-serious-memory-leak.patch
@@ -0,0 +1,113 @@
+From 758be4f1c9cda8eefb2fd241835521462196e16c Mon Sep 17 00:00:00 2001
+From: Lukasz Dorau <lukasz.dorau at intel.com>
+Date: Mon, 19 Sep 2011 13:26:05 +1000
+Subject: [PATCH] Fix serious memory leak
+
+During reshape function restore_stripes is called periodically
+and every time the buffer stripe_buf (of size raid_disks*chunk_size)
+is allocated but is not freed. It happens also upon successful completion.
+In case of huge arrays it can lead to the seizure of the entire
+system memory (even of the order of gigabytes).
+
+Signed-off-by: Lukasz Dorau <lukasz.dorau at intel.com>
+Signed-off-by: NeilBrown <neilb at suse.de>
+---
+ restripe.c |   50 +++++++++++++++++++++++++++++++++-----------------
+ 1 files changed, 33 insertions(+), 17 deletions(-)
+
+diff --git a/restripe.c b/restripe.c
+index 9c83e2e..00e7a82 100644
+--- a/restripe.c
++++ b/restripe.c
+@@ -687,6 +687,7 @@ int restore_stripes(int *dest, unsigned long long *offsets,
+ 	char **stripes = malloc(raid_disks * sizeof(char*));
+ 	char **blocks = malloc(raid_disks * sizeof(char*));
+ 	int i;
++	int rv;
+ 
+ 	int data_disks = raid_disks - (level == 0 ? 0 : level <= 5 ? 1 : 2);
+ 
+@@ -704,11 +705,8 @@ int restore_stripes(int *dest, unsigned long long *offsets,
+ 
+ 	if (stripe_buf == NULL || stripes == NULL || blocks == NULL
+ 	    || zero == NULL) {
+-		free(stripe_buf);
+-		free(stripes);
+-		free(blocks);
+-		free(zero);
+-		return -2;
++		rv = -2;
++		goto abort;
+ 	}
+ 	for (i = 0; i < raid_disks; i++)
+ 		stripes[i] = stripe_buf + i * chunk_size;
+@@ -717,20 +715,26 @@ int restore_stripes(int *dest, unsigned long long *offsets,
+ 		unsigned long long offset;
+ 		int disk, qdisk;
+ 		int syndrome_disks;
+-		if (length < len)
+-			return -3;
++		if (length < len) {
++			rv = -3;
++			goto abort;
++		}
+ 		for (i = 0; i < data_disks; i++) {
+ 			int disk = geo_map(i, start/chunk_size/data_disks,
+ 					   raid_disks, level, layout);
+ 			if (src_buf == NULL) {
+ 				/* read from file */
+-				if (lseek64(source,
+-					read_offset, 0) != (off64_t)read_offset)
+-					return -1;
++				if (lseek64(source, read_offset, 0) !=
++					 (off64_t)read_offset) {
++					rv = -1;
++					goto abort;
++				}
+ 				if (read(source,
+ 					 stripes[disk],
+-					 chunk_size) != chunk_size)
+-					return -1;
++					 chunk_size) != chunk_size) {
++					rv = -1;
++					goto abort;
++				}
+ 			} else {
+ 				/* read from input buffer */
+ 				memcpy(stripes[disk],
+@@ -782,15 +786,27 @@ int restore_stripes(int *dest, unsigned long long *offsets,
+ 		}
+ 		for (i=0; i < raid_disks ; i++)
+ 			if (dest[i] >= 0) {
+-				if (lseek64(dest[i], offsets[i]+offset, 0) < 0)
+-					return -1;
+-				if (write(dest[i], stripes[i], chunk_size) != chunk_size)
+-					return -1;
++				if (lseek64(dest[i],
++					 offsets[i]+offset, 0) < 0) {
++					rv = -1;
++					goto abort;
++				}
++				if (write(dest[i], stripes[i],
++					 chunk_size) != chunk_size) {
++					rv = -1;
++					goto abort;
++				}
+ 			}
+ 		length -= len;
+ 		start += len;
+ 	}
+-	return 0;
++	rv = 0;
++
++abort:
++	free(stripe_buf);
++	free(stripes);
++	free(blocks);
++	return rv;
+ }
+ 
+ #ifdef MAIN
+-- 
+1.7.7.3
+
diff --git a/mdadm.spec b/mdadm.spec
index ef7215c..5012ede 100644
--- a/mdadm.spec
+++ b/mdadm.spec
@@ -1,7 +1,7 @@
 Summary:     The mdadm program controls Linux md devices (software RAID arrays)
 Name:        mdadm
 Version:     3.2.2
-Release:     14%{?dist}
+Release:     15%{?dist}
 Source:      http://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}.tar.bz2
 Source1:     mdmonitor.init
 Source2:     raid-check
@@ -25,6 +25,8 @@ Patch10:     mdadm-3.2.2-Fix-component-size-checks-in-validate_super0.patch
 Patch11:     mdadm-3.2.2-Discourage-large-devices-from-being-added-to-0.90-ar.patch
 Patch12:     mdadm-3.2.2-Grow-refuse-to-grow-a-0.90-array-beyond-2TB.patch
 Patch13:     mdadm-3.2.2-super0-fix-overflow-when-checking-max-size.patch
+Patch14:     mdadm-3.2.2-Fix-serious-memory-leak.patch
+Patch15:     mdadm-3.2.2-FIX-Close-unused-handle-in-child-process-during-resh.patch
 Patch19:     mdadm-3.1.3-udev.patch
 Patch20:     mdadm-2.5.2-static.patch
 URL:         http://www.kernel.org/pub/linux/utils/raid/mdadm/
@@ -73,6 +75,8 @@ is not used as the system init process.
 %patch11 -p1 -b .discourage
 %patch12 -p1 -b .grow
 %patch13 -p1 -b .overflow
+%patch14 -p1 -b .memleak
+%patch15 -p1 -b .reshape
 %patch19 -p1 -b .udev
 %patch20 -p1 -b .static
 
@@ -147,6 +151,13 @@ fi
 %{_initrddir}/*
 
 %changelog
+* Mon Nov 21 2011 Jes Sorensen <Jes.Sorensen at redhat.com> - 3.2.2-15
+- Backport upstream fix for memory leak that can prevent migration to
+  RAID5 from completing.
+- Backport upstream fix preventing mounting a device while it is in
+  process of reshaping
+- Resolves: bz755005 bz755009
+
 * Wed Nov 9 2011 Jes Sorensen <Jes.Sorensen at redhat.com> - 3.2.2-14
 - Backport upstream fixes to prevent growing v0.90 metadata raid out
   of supported size.