[condor: 16/31] Update to latest trunk - no more mem leak, hopefully.

Brian Bockelman bbockelm at fedoraproject.org
Fri Mar 8 01:20:58 UTC 2013


commit 85aed4efb79fa91aa19e9886dcda6a45f18b7d65
Author: Brian Bockelman <bbockelm at cse.unl.edu>
Date:   Tue Aug 7 17:22:35 2012 -0500

    Update to latest trunk - no more mem leak, hopefully.

 condor.spec         |   14 ++-
 condor_oom_v3.patch |  342 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 354 insertions(+), 2 deletions(-)
---
diff --git a/condor.spec b/condor.spec
index 340486e..8a61fd8 100644
--- a/condor.spec
+++ b/condor.spec
@@ -33,12 +33,12 @@
 %define git_build 1
 # If building with git tarball, Fedora requests us to record the rev.  Use:
 # git log -1 --pretty=format:'%h'
-%define git_rev 2e636b9
+%define git_rev 70b9542
 
 Summary: Condor: High Throughput Computing
 Name: condor
 Version: 7.9.1
-%define condor_base_release 0.5
+%define condor_base_release 0.8
 %if %git_build
 	%define condor_release %condor_base_release.%{git_rev}.git
 %else
@@ -108,6 +108,9 @@ Patch5: condor-gahp.patch
 Patch6: cgahp_scaling.patch
 Patch7: condor-1605-v2.patch
 Patch8: lcmaps_env_in_init_script.patch
+# See gt3158
+Patch9: 0001-Apply-the-user-s-condor_config-last-rather-than-firs.patch
+Patch11: condor_oom_v3.patch
 
 BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
 
@@ -126,6 +129,7 @@ BuildRequires: /usr/include/expat.h
 BuildRequires: openldap-devel
 BuildRequires: /usr/include/ldap.h
 BuildRequires: latex2html
+BuildRequires: boost-devel
 
 # Globus GSI build requirements
 BuildRequires: globus-gssapi-gsi-devel
@@ -395,6 +399,9 @@ exit 0
 %patch5 -p1
 %patch6 -p1
 %patch7 -p1
+%patch9 -p1
+#%patch10 -p1
+%patch11 -p1
 
 %if %systemd
 cp %{SOURCE2} %{name}-tmpfiles.conf
@@ -1089,6 +1096,9 @@ fi
 %endif
 
 %changelog
+* Tue Jul 24 2012 Brian Bockelman <bbockelm at cse.unl.edu> - 7.9.1-0.6.ceb6a0a.git
+- Fix per-user condor config to be more useful.  See gt3158
+
 * Mon Jul 16 2012 Brian Bockelman <bbockelm at cse.unl.edu> - 7.9.1-0.5.ceb6a0a.git
 - Upstreaming of many of the custom patches.
 
diff --git a/condor_oom_v3.patch b/condor_oom_v3.patch
new file mode 100644
index 0000000..5f6db89
--- /dev/null
+++ b/condor_oom_v3.patch
@@ -0,0 +1,342 @@
+diff --git a/build/cmake/CondorConfigure.cmake b/build/cmake/CondorConfigure.cmake
+index e61fb4f..1094cb3 100644
+--- a/build/cmake/CondorConfigure.cmake
++++ b/build/cmake/CondorConfigure.cmake
+@@ -164,6 +164,7 @@ if( NOT WINDOWS)
+ 	check_function_exists("setlinebuf" HAVE_SETLINEBUF)
+ 	check_function_exists("snprintf" HAVE_SNPRINTF)
+ 	check_function_exists("snprintf" HAVE_WORKING_SNPRINTF)
++	check_function_exists("eventfd" HAVE_EVENTFD)
+ 
+ 	check_function_exists("stat64" HAVE_STAT64)
+ 	check_function_exists("_stati64" HAVE__STATI64)
+diff --git a/src/condor_includes/config.h.cmake b/src/condor_includes/config.h.cmake
+index b083945..3bd92b0 100644
+--- a/src/condor_includes/config.h.cmake
++++ b/src/condor_includes/config.h.cmake
+@@ -438,6 +438,9 @@
+ /* Define to 1 if you have the 'snprintf' function. (USED)*/
+ #cmakedefine HAVE_SNPRINTF 1
+ 
++/* Define to 1 if you have the 'eventfd' function. (USED)*/
++#cmakedefine HAVE_EVENTFD 1
++
+ /* Define to 1 if you have the 'stat64' function. (USED)*/
+ #cmakedefine HAVE_STAT64 1
+ 
+diff --git a/src/condor_starter.V6.1/vanilla_proc.cpp b/src/condor_starter.V6.1/vanilla_proc.cpp
+index 2e5538f..0246e5e 100644
+--- a/src/condor_starter.V6.1/vanilla_proc.cpp
++++ b/src/condor_starter.V6.1/vanilla_proc.cpp
+@@ -42,9 +42,16 @@
+ extern dynuser* myDynuser;
+ #endif
+ 
++#if defined(HAVE_EVENTFD)
++#include <sys/eventfd.h>
++#endif
++
+ extern CStarter *Starter;
+ 
+-VanillaProc::VanillaProc(ClassAd* jobAd) : OsProc(jobAd)
++VanillaProc::VanillaProc(ClassAd* jobAd) : OsProc(jobAd),
++	m_memory_limit(-1),
++	m_oom_fd(-1),
++	m_oom_efd(-1)
+ {
+ #if !defined(WIN32)
+ 	m_escalation_tid = -1;
+@@ -215,6 +222,12 @@ VanillaProc::StartJob()
+ 		}
+ 		fi.group_ptr = &tracking_gid;
+ 	}
++
++	// Increase the OOM score of this process; the child will inherit it.
++	// This way, the job will be heavily preferred to be killed over a normal process.
++	// OOM score is currently exponential - a score of 4 is a factor-16 increase in
++	// the OOM score.
++	setupOOMScore(4);
+ #endif
+ 
+ #if defined(HAVE_EXT_LIBCGROUP)
+@@ -406,6 +419,7 @@ VanillaProc::StartJob()
+ 			int MemMb;
+ 			if (MachineAd->LookupInteger(ATTR_MEMORY, MemMb)) {
+ 				uint64_t MemMb_big = MemMb;
++				m_memory_limit = MemMb_big;
+ 				climits.set_memory_limit_bytes(1024*1024*MemMb_big, mem_is_soft);
+ 			} else {
+ 				dprintf(D_ALWAYS, "Not setting memory soft limit in cgroup because "
+@@ -425,6 +439,14 @@ VanillaProc::StartJob()
+ 		} else {
+ 			dprintf(D_FULLDEBUG, "Invalid value of SlotWeight in machine ClassAd; ignoring.\n");
+ 		}
++		setupOOMEvent(cgroup);
++	}
++
++	// Now that the job is started, decrease the likelihood that the starter
++	// is killed instead of the job itself.
++	if (retval)
++	{
++		setupOOMScore(-4);
+ 	}
+ 
+ #endif
+@@ -611,5 +633,226 @@ VanillaProc::finishShutdownFast()
+ 	//   -gquinn, 2007-11-14
+ 	daemonCore->Kill_Family(JobPid);
+ 
++	if (m_oom_efd >= 0) {
++		dprintf(D_FULLDEBUG, "Closing event FD pipe in shutdown %d.\n", m_oom_efd);
++		daemonCore->Close_Pipe(m_oom_efd);
++		m_oom_efd = -1;
++	}
++	if (m_oom_fd >= 0) {
++		close(m_oom_fd);
++		m_oom_fd = -1;
++	}
++
+ 	return false;	// shutdown is pending, so return false
+ }
++
++/*
++ * This will be called when the event fd fires, indicating an OOM event.
++ */
++int
++VanillaProc::outOfMemoryEvent(int /* fd */)
++{
++	std::stringstream ss;
++	if (m_memory_limit >= 0) {
++		ss << "Job has gone over memory limit of " << m_memory_limit << " megabytes.";
++	} else {
++		ss << "Job has encountered an out-of-memory event.";
++	}
++	Starter->jic->holdJob(ss.str().c_str(), CONDOR_HOLD_CODE_JobOutOfResources, 0);
++
++	// this will actually clean up the job
++	if ( Starter->Hold( ) ) {
++		dprintf( D_FULLDEBUG, "All jobs were removed due to OOM event.\n" );
++		Starter->allJobsDone();
++	}
++
++	dprintf(D_FULLDEBUG, "Closing event FD pipe %d.\n", m_oom_efd);
++	daemonCore->Close_Pipe(m_oom_efd);
++	close(m_oom_fd);
++	m_oom_efd = -1;
++	m_oom_fd = -1;
++
++	Starter->ShutdownFast();
++
++	return 0;
++}
++
++int
++VanillaProc::setupOOMScore(int new_score)
++{
++#if !defined(LINUX)
++	if (new_score) // Done to suppress compiler warnings.
++		return 0;
++	return 0;
++#else
++	TemporaryPrivSentry sentry(PRIV_ROOT);
++	// oom_adj is deprecated on modern kernels and causes a deprecation warning when used.
++	int oom_score_fd = open("/proc/self/oom_score_adj", O_WRONLY);
++	if (oom_score_fd == -1) {
++		if (errno != ENOENT) {
++			dprintf(D_ALWAYS,
++				"Unable to open oom_score_adj for the starter: (errno=%u, %s)\n",
++				errno, strerror(errno));
++			return 1;
++		} else {
++			int oom_score_fd = open("/proc/self/oom_adj", O_WRONLY);
++			if (oom_score_fd == -1) {
++				dprintf(D_ALWAYS,
++					"Unable to open oom_adj for the starter: (errno=%u, %s)\n",
++					errno, strerror(errno));
++				return 1;
++			}
++		}
++	} else {
++		// oom_score_adj is linear; oom_adj was exponential.
++		if (new_score > 0)
++			new_score = 1 << new_score;
++		else
++			new_score = -(1 << -new_score);
++	}
++
++	std::stringstream ss;
++	ss << new_score;
++	std::string new_score_str = ss.str();
++        ssize_t nwritten = full_write(oom_score_fd, new_score_str.c_str(), new_score_str.length());
++	if (nwritten < 0) {
++		dprintf(D_ALWAYS,
++			"Unable to write into oom_adj file for the starter: (errno=%u, %s)\n",
++			errno, strerror(errno));
++		close(oom_score_fd);
++		return 1;
++	}
++	close(oom_score_fd);
++	return 0;
++#endif
++}
++
++int
++VanillaProc::setupOOMEvent(const std::string &cgroup_string)
++{
++#if !(defined(HAVE_EVENTFD) && defined(HAVE_EXT_LIBCGROUP))
++	return 0;
++#else
++	// Initialize the event descriptor
++	m_oom_efd = eventfd(0, EFD_CLOEXEC);
++	if (m_oom_efd == -1) {
++		dprintf(D_ALWAYS,
++			"Unable to create new event FD for starter: %u %s\n",
++			errno, strerror(errno));
++		return 1;
++	}
++
++	// Find the memcg location on disk
++	void * handle = NULL;
++	struct cgroup_mount_point mount_info;
++	int ret = cgroup_get_controller_begin(&handle, &mount_info);
++	std::stringstream oom_control;
++	std::stringstream event_control;
++	bool found_memcg = false;
++	while (ret == 0) {
++		if (strcmp(mount_info.name, MEMORY_CONTROLLER_STR) == 0) {
++			found_memcg = true;
++			oom_control << mount_info.path << "/";
++			event_control << mount_info.path << "/";
++			break;
++		}
++		cgroup_get_controller_next(&handle, &mount_info);
++	}
++	if (!found_memcg && (ret != ECGEOF)) {
++		dprintf(D_ALWAYS,
++			"Error while locating memcg controller for starter: %u %s\n",
++			ret, cgroup_strerror(ret));
++		return 1;
++	}
++	cgroup_get_controller_end(&handle);
++	if (found_memcg == false) {
++		dprintf(D_ALWAYS,
++			"Memcg is not available; OOM notification disabled for starter.\n");
++		return 1;
++	}
++
++	// Finish constructing the location of the control files
++	oom_control << cgroup_string << "/memory.oom_control";
++	std::string oom_control_str = oom_control.str();
++	event_control << cgroup_string << "/cgroup.event_control";
++	std::string event_control_str = event_control.str();
++
++	// Open the oom_control and event control files
++	TemporaryPrivSentry sentry(PRIV_ROOT);
++	m_oom_fd = open(oom_control_str.c_str(), O_RDONLY | O_CLOEXEC);
++	if (m_oom_fd == -1) {
++		dprintf(D_ALWAYS,
++			"Unable to open the OOM control file for starter: %u %s\n",
++			errno, strerror(errno));
++		return 1;
++	}
++	int event_ctrl_fd = open(event_control_str.c_str(), O_WRONLY | O_CLOEXEC);
++	if (event_ctrl_fd == -1) {
++		dprintf(D_ALWAYS,
++			"Unable to open event control for starter: %u %s\n",
++			errno, strerror(errno));
++		return 1;
++	}
++
++	// Inform Linux we will be handling the OOM events for this container.
++	int oom_fd2 = open(oom_control_str.c_str(), O_WRONLY | O_CLOEXEC);
++	if (oom_fd2 == -1) {
++		dprintf(D_ALWAYS,
++			"Unable to open the OOM control file for writing for starter: %u %s\n",
++			errno, strerror(errno));
++		return 1;
++	}
++	const char limits [] = "1";
++        ssize_t nwritten = full_write(oom_fd2, &limits, 1);
++	if (nwritten < 0) {
++		dprintf(D_ALWAYS,
++			"Unable to set OOM control to %s for starter: %u %s\n",
++				limits, errno, strerror(errno));
++		close(event_ctrl_fd);
++		close(oom_fd2);
++		return 1;
++	}
++	close(oom_fd2);
++
++	// Create the subscription string:
++	std::stringstream sub_ss;
++	sub_ss << m_oom_efd << " " << m_oom_fd;
++	std::string sub_str = sub_ss.str();
++
++	if ((nwritten = full_write(event_ctrl_fd, sub_str.c_str(), sub_str.size())) < 0) {
++		dprintf(D_ALWAYS,
++			"Unable to write into event control file for starter: %u %s\n",
++			errno, strerror(errno));
++		close(event_ctrl_fd);
++		return 1;
++	}
++	close(event_ctrl_fd);
++
++	// Fool DC into talking to the eventfd
++	int pipes[2]; pipes[0] = -1; pipes[1] = -1;
++	int fd_to_replace = -1;
++	if (daemonCore->Create_Pipe(pipes, true) == -1 || pipes[0] == -1) {
++		dprintf(D_ALWAYS, "Unable to create a DC pipe\n");
++		close(m_oom_efd);
++		m_oom_efd = -1;
++		close(m_oom_fd);
++		m_oom_fd = -1;
++		return 1;
++	}
++	if ( daemonCore->Get_Pipe_FD(pipes[0], &fd_to_replace) == -1 || fd_to_replace == -1) {
++		dprintf(D_ALWAYS, "Unable to lookup pipe's FD\n");
++		close(m_oom_efd); m_oom_efd = -1;
++		close(m_oom_fd); m_oom_fd = -1;
++		daemonCore->Close_Pipe(pipes[0]);
++		daemonCore->Close_Pipe(pipes[1]);
++	}
++	dup3(m_oom_efd, fd_to_replace, O_CLOEXEC);
++	close(m_oom_efd);
++	m_oom_efd = pipes[0];
++
++	// Inform DC we want to recieve notifications from this FD.
++	daemonCore->Register_Pipe(pipes[0],"OOM event fd", static_cast<PipeHandlercpp>(&VanillaProc::outOfMemoryEvent),"OOM Event Handler",this,HANDLE_READ);
++	return 0;
++#endif
++}
++
+diff --git a/src/condor_starter.V6.1/vanilla_proc.h b/src/condor_starter.V6.1/vanilla_proc.h
+index d524cf5..90b4741 100644
+--- a/src/condor_starter.V6.1/vanilla_proc.h
++++ b/src/condor_starter.V6.1/vanilla_proc.h
+@@ -74,6 +74,15 @@ private:
+ #if !defined(WIN32)
+ 	int m_escalation_tid;
+ #endif
++
++	// Configure OOM killer for this job
++	int m_memory_limit; // Memory limit, in MB.
++	int m_oom_fd; // The file descriptor which recieves events
++	int m_oom_efd; // The event FD to watch
++	int setupOOMScore(int new_score);
++	int outOfMemoryEvent(int fd);
++	int setupOOMEvent(const std::string & cgroup_string);
++
+ };
+ 
+ #endif
+diff --git a/src/condor_utils/condor_holdcodes.h b/src/condor_utils/condor_holdcodes.h
+index d788d6e..3083db3 100644
+--- a/src/condor_utils/condor_holdcodes.h
++++ b/src/condor_utils/condor_holdcodes.h
+@@ -128,4 +128,6 @@ const int CONDOR_HOLD_CODE_GlexecChownSandboxToCondor = 30;
+ 
+ const int CONDOR_HOLD_CODE_PrivsepChownSandboxToCondor = 31;
+ 
++const int CONDOR_HOLD_CODE_JobOutOfResources = 32;
++
+ #endif


More information about the scm-commits mailing list