[mdadm/f15: 1/2] Backport memory leak fix for RAID5, and fix for mounting device during reshape
Jes Sorensen
jsorensen at fedoraproject.org
Tue Nov 22 10:08:06 UTC 2011
commit 82fbb33bf7748d0b21e0e36756345111dd8c511d
Author: Jes Sorensen <Jes.Sorensen at redhat.com>
Date: Mon Nov 21 17:01:10 2011 +0100
Backport memory leak fix for RAID5, and fix for mounting device during reshape
Resolves: bz755005 bz755009
Signed-off-by: Jes Sorensen <Jes.Sorensen at redhat.com>
...nused-handle-in-child-process-during-resh.patch | 84 +++++++++++++++
mdadm-3.2.2-Fix-serious-memory-leak.patch | 113 ++++++++++++++++++++
mdadm.spec | 13 ++-
3 files changed, 209 insertions(+), 1 deletions(-)
---
diff --git a/mdadm-3.2.2-FIX-Close-unused-handle-in-child-process-during-resh.patch b/mdadm-3.2.2-FIX-Close-unused-handle-in-child-process-during-resh.patch
new file mode 100644
index 0000000..127b076
--- /dev/null
+++ b/mdadm-3.2.2-FIX-Close-unused-handle-in-child-process-during-resh.patch
@@ -0,0 +1,84 @@
+From 9ad6f6e65a535f77f180e87393043a8ffcfb30d8 Mon Sep 17 00:00:00 2001
+From: Adam Kwolek <adam.kwolek at intel.com>
+Date: Wed, 26 Oct 2011 18:16:55 +0200
+Subject: [PATCH] FIX: Close unused handle in child process during reshape
+ restart
+
+When array reshape (e.g. raid0->raid5 migration) is restarted during
+array assembly, file system placed on this array cannot be mounted until
+reshape is finished due to "busy" error.
+
+This is caused when reshape is executed on array for external metadata
+and array handle is cloned /forked/ to child process environment but not
+closed.
+
+Handle can't be closed before executing Grow_continue() because it is
+used later in code.
+
+Close unused handle in child process /reshape_container()/.
+It is similar to close fd handle in reshape_array() before calling
+manage_reshape()/child_monitor() in Grow.c:2290.
+
+Signed-off-by: Adam Kwolek <adam.kwolek at intel.com>
+Signed-off-by: NeilBrown <neilb at suse.de>
+---
+ Grow.c | 13 +++++++++++--
+ 1 files changed, 11 insertions(+), 2 deletions(-)
+
+diff --git a/Grow.c b/Grow.c
+index 0e4dd10..93a69fd 100644
+--- a/Grow.c
++++ b/Grow.c
+@@ -1350,6 +1350,7 @@ static int reshape_array(char *container, int fd, char *devname,
+ char *backup_file, int quiet, int forked,
+ int restart);
+ static int reshape_container(char *container, char *devname,
++ int mdfd,
+ struct supertype *st,
+ struct mdinfo *info,
+ int force,
+@@ -1768,7 +1769,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
+ * number of devices (On-Line Capacity Expansion) must be
+ * performed at the level of the container
+ */
+- rv = reshape_container(container, devname, st, &info,
++ rv = reshape_container(container, devname, -1, st, &info,
+ force, backup_file, quiet, 0);
+ frozen = 0;
+ } else {
+@@ -2403,7 +2404,10 @@ release:
+ return 1;
+ }
+
++/* mdfd handle is passed to be closed in child process (after fork).
++ */
+ int reshape_container(char *container, char *devname,
++ int mdfd,
+ struct supertype *st,
+ struct mdinfo *info,
+ int force,
+@@ -2446,6 +2450,11 @@ int reshape_container(char *container, char *devname,
+ break;
+ }
+
++ /* close unused handle in child process
++ */
++ if (mdfd > -1)
++ close(mdfd);
++
+ while(1) {
+ /* For each member array with reshape_active,
+ * we need to perform the reshape.
+@@ -3571,7 +3580,7 @@
+ return 1;
+ st->ss->load_container(st, cfd, container);
+ close(cfd);
+- return reshape_container(container, NULL,
++ return reshape_container(container, NULL, mdfd,
+ st, info, 0, backup_file,
+ 0, 1);
+ }
+
+--
+1.7.7.3
+
diff --git a/mdadm-3.2.2-Fix-serious-memory-leak.patch b/mdadm-3.2.2-Fix-serious-memory-leak.patch
new file mode 100644
index 0000000..4b4137d
--- /dev/null
+++ b/mdadm-3.2.2-Fix-serious-memory-leak.patch
@@ -0,0 +1,113 @@
+From 758be4f1c9cda8eefb2fd241835521462196e16c Mon Sep 17 00:00:00 2001
+From: Lukasz Dorau <lukasz.dorau at intel.com>
+Date: Mon, 19 Sep 2011 13:26:05 +1000
+Subject: [PATCH] Fix serious memory leak
+
+During reshape function restore_stripes is called periodically
+and every time the buffer stripe_buf (of size raid_disks*chunk_size)
+is allocated but is not freed. It happens also upon successful completion.
+In case of huge arrays it can lead to the seizure of the entire
+system memory (even of the order of gigabytes).
+
+Signed-off-by: Lukasz Dorau <lukasz.dorau at intel.com>
+Signed-off-by: NeilBrown <neilb at suse.de>
+---
+ restripe.c | 50 +++++++++++++++++++++++++++++++++-----------------
+ 1 files changed, 33 insertions(+), 17 deletions(-)
+
+diff --git a/restripe.c b/restripe.c
+index 9c83e2e..00e7a82 100644
+--- a/restripe.c
++++ b/restripe.c
+@@ -687,6 +687,7 @@ int restore_stripes(int *dest, unsigned long long *offsets,
+ char **stripes = malloc(raid_disks * sizeof(char*));
+ char **blocks = malloc(raid_disks * sizeof(char*));
+ int i;
++ int rv;
+
+ int data_disks = raid_disks - (level == 0 ? 0 : level <= 5 ? 1 : 2);
+
+@@ -704,11 +705,8 @@ int restore_stripes(int *dest, unsigned long long *offsets,
+
+ if (stripe_buf == NULL || stripes == NULL || blocks == NULL
+ || zero == NULL) {
+- free(stripe_buf);
+- free(stripes);
+- free(blocks);
+- free(zero);
+- return -2;
++ rv = -2;
++ goto abort;
+ }
+ for (i = 0; i < raid_disks; i++)
+ stripes[i] = stripe_buf + i * chunk_size;
+@@ -717,20 +715,26 @@ int restore_stripes(int *dest, unsigned long long *offsets,
+ unsigned long long offset;
+ int disk, qdisk;
+ int syndrome_disks;
+- if (length < len)
+- return -3;
++ if (length < len) {
++ rv = -3;
++ goto abort;
++ }
+ for (i = 0; i < data_disks; i++) {
+ int disk = geo_map(i, start/chunk_size/data_disks,
+ raid_disks, level, layout);
+ if (src_buf == NULL) {
+ /* read from file */
+- if (lseek64(source,
+- read_offset, 0) != (off64_t)read_offset)
+- return -1;
++ if (lseek64(source, read_offset, 0) !=
++ (off64_t)read_offset) {
++ rv = -1;
++ goto abort;
++ }
+ if (read(source,
+ stripes[disk],
+- chunk_size) != chunk_size)
+- return -1;
++ chunk_size) != chunk_size) {
++ rv = -1;
++ goto abort;
++ }
+ } else {
+ /* read from input buffer */
+ memcpy(stripes[disk],
+@@ -782,15 +786,27 @@ int restore_stripes(int *dest, unsigned long long *offsets,
+ }
+ for (i=0; i < raid_disks ; i++)
+ if (dest[i] >= 0) {
+- if (lseek64(dest[i], offsets[i]+offset, 0) < 0)
+- return -1;
+- if (write(dest[i], stripes[i], chunk_size) != chunk_size)
+- return -1;
++ if (lseek64(dest[i],
++ offsets[i]+offset, 0) < 0) {
++ rv = -1;
++ goto abort;
++ }
++ if (write(dest[i], stripes[i],
++ chunk_size) != chunk_size) {
++ rv = -1;
++ goto abort;
++ }
+ }
+ length -= len;
+ start += len;
+ }
+- return 0;
++ rv = 0;
++
++abort:
++ free(stripe_buf);
++ free(stripes);
++ free(blocks);
++ return rv;
+ }
+
+ #ifdef MAIN
+--
+1.7.7.3
+
diff --git a/mdadm.spec b/mdadm.spec
index ef7215c..5012ede 100644
--- a/mdadm.spec
+++ b/mdadm.spec
@@ -1,7 +1,7 @@
Summary: The mdadm program controls Linux md devices (software RAID arrays)
Name: mdadm
Version: 3.2.2
-Release: 14%{?dist}
+Release: 15%{?dist}
Source: http://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}.tar.bz2
Source1: mdmonitor.init
Source2: raid-check
@@ -25,6 +25,8 @@ Patch10: mdadm-3.2.2-Fix-component-size-checks-in-validate_super0.patch
Patch11: mdadm-3.2.2-Discourage-large-devices-from-being-added-to-0.90-ar.patch
Patch12: mdadm-3.2.2-Grow-refuse-to-grow-a-0.90-array-beyond-2TB.patch
Patch13: mdadm-3.2.2-super0-fix-overflow-when-checking-max-size.patch
+Patch14: mdadm-3.2.2-Fix-serious-memory-leak.patch
+Patch15: mdadm-3.2.2-FIX-Close-unused-handle-in-child-process-during-resh.patch
Patch19: mdadm-3.1.3-udev.patch
Patch20: mdadm-2.5.2-static.patch
URL: http://www.kernel.org/pub/linux/utils/raid/mdadm/
@@ -73,6 +75,8 @@ is not used as the system init process.
%patch11 -p1 -b .discourage
%patch12 -p1 -b .grow
%patch13 -p1 -b .overflow
+%patch14 -p1 -b .memleak
+%patch15 -p1 -b .reshape
%patch19 -p1 -b .udev
%patch20 -p1 -b .static
@@ -147,6 +151,13 @@ fi
%{_initrddir}/*
%changelog
+* Mon Nov 21 2011 Jes Sorensen <Jes.Sorensen at redhat.com> - 3.2.2-15
+- Backport upstream fix for memory leak that can prevent migration to
+ RAID5 from completing.
+- Backport upstream fix preventing mounting a device while it is in
+ process of reshaping
+- Resolves: bz755005 bz755009
+
* Wed Nov 9 2011 Jes Sorensen <Jes.Sorensen at redhat.com> - 3.2.2-14
- Backport upstream fixes to prevent growing v0.90 metadata raid out
of supported size.
More information about the scm-commits
mailing list