[condor: 12/31] Update to latest HCC version.
Brian Bockelman
bbockelm at fedoraproject.org
Fri Mar 8 01:20:37 UTC 2013
commit b3fe293fa4f8a2ac40387f1a6b03bd19c9e6f4b6
Author: Brian Bockelman <bbockelm at cse.unl.edu>
Date: Mon Jul 16 11:12:05 2012 -0500
Update to latest HCC version.
cgahp_scaling.patch | 140 ++++
cgroup_reset_stats.patch | 181 +++++
cgroups_noswap.patch | 100 +++
condor-1605-v2.patch | 1457 ++++++++++++++++++++++++++++++++++++++++
condor-gahp.patch | 34 +
condor.spec | 151 ++++-
condor_config.generic.patch | 19 +-
condor_partial_defrag_v2.patch | 208 ++++++
condor_pid_namespaces_v7.patch | 305 +++++++++
hcc_config.patch | 15 +-
wso2-axis2.patch | 11 +
11 files changed, 2557 insertions(+), 64 deletions(-)
---
diff --git a/cgahp_scaling.patch b/cgahp_scaling.patch
new file mode 100644
index 0000000..6c2153e
--- /dev/null
+++ b/cgahp_scaling.patch
@@ -0,0 +1,140 @@
+diff --git a/src/condor_c-gahp/schedd_client.cpp b/src/condor_c-gahp/schedd_client.cpp
+index e3acc44..ba96490 100644
+--- a/src/condor_c-gahp/schedd_client.cpp
++++ b/src/condor_c-gahp/schedd_client.cpp
+@@ -173,7 +173,10 @@ doContactSchedd()
+ }
+ }
+
+-
++ int interaction_time = param_integer("CGAHP_SCHEDD_INTERACTION_TIME", 5);
++ time_t starttime = time(NULL);
++ bool rerun_immediately = false;
++
+ SchedDRequest::schedd_command_type commands [] = {
+ SchedDRequest::SDC_REMOVE_JOB,
+ SchedDRequest::SDC_HOLD_JOB,
+@@ -188,7 +191,11 @@ doContactSchedd()
+ int i=0;
+ while (i<3) {
+
+-
++ if (time(NULL) - starttime > interaction_time) {
++ rerun_immediately = true;
++ break;
++ }
++
+ StringList id_list;
+ SimpleList <SchedDRequest*> this_batch;
+
+@@ -361,6 +368,12 @@ doContactSchedd()
+
+ SimpleList <SchedDRequest*> stage_in_batch;
+ do {
++
++ if (time(NULL) - starttime > interaction_time) {
++ rerun_immediately = true;
++ break;
++ }
++
+ stage_in_batch.Clear();
+
+ command_queue.Rewind();
+@@ -507,6 +520,11 @@ doContactSchedd()
+ if (current_command->command != SchedDRequest::SDC_JOB_REFRESH_PROXY)
+ continue;
+
++ if (time(NULL) - starttime > interaction_time) {
++ rerun_immediately = true;
++ break;
++ }
++
+ time_t expiration_time = GetDesiredDelegatedJobCredentialExpiration(current_command->classad);
+ time_t result_expiration_time = 0;
+
+@@ -591,7 +609,12 @@ doContactSchedd()
+
+ if (qmgr_connection == NULL)
+ goto update_report_result;
+-
++
++ if (time(NULL) - starttime > interaction_time) {
++ rerun_immediately = true;
++ break;
++ }
++
+ error = FALSE;
+ errno = 0;
+ BeginTransaction();
+@@ -631,7 +654,8 @@ doContactSchedd()
+ if( SetAttribute(current_command->cluster_id,
+ current_command->proc_id,
+ lhstr,
+- rhstr) == -1 ) {
++ rhstr,
++ SetAttribute_NoAck) == -1 ) {
+ if ( errno == ETIMEDOUT ) {
+ failure_line_num = __LINE__;
+ failure_errno = errno;
+@@ -698,6 +722,11 @@ update_report_result:
+ if (current_command->command != SchedDRequest::SDC_UPDATE_LEASE)
+ continue;
+
++ if (time(NULL) - starttime > interaction_time) {
++ rerun_immediately = true;
++ break;
++ }
++
+ std::string success_job_ids="";
+ if (qmgr_connection == NULL) {
+ sprintf( error_msg, "Error connecting to schedd %s", ScheddAddr );
+@@ -803,6 +832,11 @@ update_report_result:
+ if (current_command->command != SchedDRequest::SDC_SUBMIT_JOB)
+ continue;
+
++ if (time(NULL) - starttime > interaction_time) {
++ rerun_immediately = true;
++ break;
++ }
++
+ int ClusterId = -1;
+ int ProcId = -1;
+
+@@ -929,7 +963,8 @@ update_report_result:
+ error = TRUE;
+ } else if( SetAttribute (ClusterId, ProcId,
+ lhstr,
+- rhstr) == -1 ) {
++ rhstr,
++ SetAttribute_NoAck) == -1 ) {
+ if ( errno == ETIMEDOUT ) {
+ failure_line_num = __LINE__;
+ failure_errno = errno;
+@@ -993,6 +1028,11 @@ submit_report_result:
+ if (current_command->command != SchedDRequest::SDC_STATUS_CONSTRAINED)
+ continue;
+
++ if (time(NULL) - starttime > interaction_time) {
++ rerun_immediately = true;
++ break;
++ }
++
+ if (qmgr_connection != NULL) {
+ SimpleList <MyString *> matching_ads;
+
+@@ -1165,9 +1205,14 @@ submit_report_result:
+ }
+ }
+
++ dprintf (D_FULLDEBUG, "Schedd interaction took %ld seconds.\n", time(NULL)-starttime);
++ if (rerun_immediately) {
++ dprintf (D_FULLDEBUG, "Schedd interaction time hit limit; will retry immediately.\n");
++ }
++
+ // Come back soon..
+ // QUESTION: Should this always be a fixed time period?
+- daemonCore->Reset_Timer( contactScheddTid, contact_schedd_interval );
++ daemonCore->Reset_Timer( contactScheddTid, rerun_immediately ? 1 : contact_schedd_interval );
+ }
+
+
diff --git a/cgroup_reset_stats.patch b/cgroup_reset_stats.patch
new file mode 100644
index 0000000..458e7ad
--- /dev/null
+++ b/cgroup_reset_stats.patch
@@ -0,0 +1,181 @@
+diff --git a/src/condor_procd/proc_family.cpp b/src/condor_procd/proc_family.cpp
+index d35ffcc..29d9471 100644
+--- a/src/condor_procd/proc_family.cpp
++++ b/src/condor_procd/proc_family.cpp
+@@ -54,7 +54,9 @@ ProcFamily::ProcFamily(ProcFamilyMonitor* monitor,
+ m_member_list(NULL)
+ #if defined(HAVE_EXT_LIBCGROUP)
+ , m_cgroup_string(""),
+- m_cm(CgroupManager::getInstance())
++ m_cm(CgroupManager::getInstance()),
++ m_initial_user_cpu(0),
++ m_initial_sys_cpu(0)
+ #endif
+ {
+ #if !defined(WIN32)
+@@ -188,6 +190,7 @@ after_migrate:
+ cgroup_free(&orig_cgroup);
+ }
+
++
+ after_restore:
+ if (orig_cgroup_string != NULL) {
+ free(orig_cgroup_string);
+@@ -231,6 +234,27 @@ ProcFamily::set_cgroup(const std::string &cgroup_string)
+ member = member->m_next;
+ }
+
++ // Record the amount of pre-existing CPU usage here.
++ m_initial_user_cpu = 0;
++ m_initial_sys_cpu = 0;
++ get_cpu_usage_cgroup(m_initial_user_cpu, m_initial_sys_cpu);
++
++ // Reset block IO controller
++ if (m_cm.isMounted(CgroupManager::BLOCK_CONTROLLER)) {
++ struct cgroup *tmp_cgroup = cgroup_new_cgroup(m_cgroup_string.c_str());
++ struct cgroup_controller *blkio_controller = cgroup_add_controller(tmp_cgroup, BLOCK_CONTROLLER_STR);
++ ASSERT (blkio_controller != NULL); // Block IO controller should already exist.
++ cgroup_add_value_uint64(blkio_controller, "blkio.reset_stats", 0);
++ int err;
++ if ((err = cgroup_modify_cgroup(tmp_cgroup))) {
++ // Not allowed to reset stats?
++ dprintf(D_ALWAYS,
++ "Unable to reset cgroup %s block IO statistics. "
++ "Some block IO accounting will be inaccurate (ProcFamily %u): %u %s\n",
++ m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err));
++ }
++ }
++
+ return 0;
+ }
+
+@@ -486,6 +510,40 @@ ProcFamily::aggregate_usage_cgroup_blockio(ProcFamilyUsage* usage)
+ return 0;
+ }
+
++int ProcFamily::get_cpu_usage_cgroup(long &user_time, long &sys_time) {
++
++ if (!m_cm.isMounted(CgroupManager::CPUACCT_CONTROLLER)) {
++ return 1;
++ }
++
++ void * handle = NULL;
++ u_int64_t tmp = 0;
++ struct cgroup_stat stats;
++ int err = cgroup_read_stats_begin(CPUACCT_CONTROLLER_STR, m_cgroup_string.c_str(), &handle, &stats);
++ while (err != ECGEOF) {
++ if (err > 0) {
++ dprintf(D_PROCFAMILY,
++ "Unable to read cgroup %s cpuacct stats (ProcFamily %u): %s.\n",
++ m_cgroup_string.c_str(), m_root_pid, cgroup_strerror(err));
++ break;
++ }
++ if (_check_stat_uint64(stats, "user", &tmp)) {
++ user_time = tmp/clock_tick-m_initial_user_cpu;
++ } else if (_check_stat_uint64(stats, "system", &tmp)) {
++ sys_time = tmp/clock_tick-m_initial_sys_cpu;
++ }
++ err = cgroup_read_stats_next(&handle, &stats);
++ }
++ if (handle != NULL) {
++ cgroup_read_stats_end(&handle);
++ }
++ if (err != ECGEOF) {
++ dprintf(D_ALWAYS, "Internal cgroup error when retrieving CPU statistics: %s\n", cgroup_strerror(err));
++ return 1;
++ }
++ return 0;
++}
++
+ int
+ ProcFamily::aggregate_usage_cgroup(ProcFamilyUsage* usage)
+ {
+@@ -496,16 +554,13 @@ ProcFamily::aggregate_usage_cgroup(ProcFamilyUsage* usage)
+
+ int err;
+ struct cgroup_stat stats;
+- void **handle;
++ void *handle = NULL;
+ u_int64_t tmp = 0, image = 0;
+ bool found_rss = false;
+
+ // Update memory
+- handle = (void **)malloc(sizeof(void*));
+- ASSERT (handle != NULL);
+- *handle = NULL;
+
+- err = cgroup_read_stats_begin(MEMORY_CONTROLLER_STR, m_cgroup_string.c_str(), handle, &stats);
++ err = cgroup_read_stats_begin(MEMORY_CONTROLLER_STR, m_cgroup_string.c_str(), &handle, &stats);
+ while (err != ECGEOF) {
+ if (err > 0) {
+ dprintf(D_PROCFAMILY,
+@@ -522,10 +577,10 @@ ProcFamily::aggregate_usage_cgroup(ProcFamilyUsage* usage)
+ } else if (_check_stat_uint64(stats, "total_swap", &tmp)) {
+ image += tmp;
+ }
+- err = cgroup_read_stats_next(handle, &stats);
++ err = cgroup_read_stats_next(&handle, &stats);
+ }
+- if (*handle != NULL) {
+- cgroup_read_stats_end(handle);
++ if (handle != NULL) {
++ cgroup_read_stats_end(&handle);
+ }
+ if (found_rss) {
+ usage->total_image_size = image/1024;
+@@ -540,29 +595,12 @@ ProcFamily::aggregate_usage_cgroup(ProcFamilyUsage* usage)
+ m_max_image_size = image/1024;
+ }
+ // Try updating the max size using cgroups
+- update_max_image_size_cgroup();
++ // XXX: This is taken out for now - kernel calculates max INCLUDING
++ // the filesystem cache. Not what you want.
++ //update_max_image_size_cgroup();
+
+ // Update CPU
+- *handle = NULL;
+- err = cgroup_read_stats_begin(CPUACCT_CONTROLLER_STR, m_cgroup_string.c_str(), handle, &stats);
+- while (err != ECGEOF) {
+- if (err > 0) {
+- dprintf(D_PROCFAMILY,
+- "Unable to read cgroup %s cpuacct stats (ProcFamily %u): %s.\n",
+- m_cgroup_string.c_str(), m_root_pid, cgroup_strerror(err));
+- break;
+- }
+- if (_check_stat_uint64(stats, "user", &tmp)) {
+- usage->user_cpu_time = tmp/clock_tick;
+- } else if (_check_stat_uint64(stats, "system", &tmp)) {
+- usage->sys_cpu_time = tmp/clock_tick;
+- }
+- err = cgroup_read_stats_next(handle, &stats);
+- }
+- if (*handle != NULL) {
+- cgroup_read_stats_end(handle);
+- }
+- free(handle);
++ get_cpu_usage_cgroup(usage->user_cpu_time, usage->sys_cpu_time);
+
+ aggregate_usage_cgroup_blockio(usage);
+
+--- a/src/condor_procd/proc_family.h
++++ b/src/condor_procd/proc_family.h
+@@ -181,6 +181,11 @@ private:
+ std::string m_cgroup_string;
+ CgroupManager &m_cm;
+ static long clock_tick;
++ // Sometimes Condor doesn't successfully clear out the cgroup from the
++ // previous run. Hence, we subtract off any CPU usage found at the
++ // start of the job.
++ long m_initial_user_cpu;
++ long m_initial_sys_cpu;
+ static bool have_warned_about_memsw;
+
+ int count_tasks_cgroup();
+@@ -190,6 +195,7 @@ private:
+ int spree_cgroup(int);
+ int migrate_to_cgroup(pid_t);
+ void update_max_image_size_cgroup();
++ int get_cpu_usage_cgroup(long &user_cpu, long &sys_cpu);
+ #endif
+ };
+
diff --git a/cgroups_noswap.patch b/cgroups_noswap.patch
new file mode 100644
index 0000000..b94e7df
--- /dev/null
+++ b/cgroups_noswap.patch
@@ -0,0 +1,100 @@
+diff --git a/src/condor_procd/proc_family.cpp b/src/condor_procd/proc_family.cpp
+index d35ffcc..2a5839f 100644
+--- a/src/condor_procd/proc_family.cpp
++++ b/src/condor_procd/proc_family.cpp
+@@ -36,6 +36,10 @@
+
+ #include <unistd.h>
+ long ProcFamily::clock_tick = sysconf( _SC_CLK_TCK );
++
++// Swap accounting is sometimes turned off. We use this variable so we
++// warn about that situation only once.
++bool ProcFamily::have_warned_about_memsw = false;
+ #endif
+
+ ProcFamily::ProcFamily(ProcFamilyMonitor* monitor,
+@@ -425,10 +429,19 @@ ProcFamily::update_max_image_size_cgroup()
+ return;
+ }
+ if ((err = cgroup_get_value_uint64(memct, "memory.memsw.max_usage_in_bytes", &max_image))) {
+- dprintf(D_PROCFAMILY,
+- "Unable to load max memory usage for cgroup %s (ProcFamily %u): %u %s\n",
+- m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err));
+- return;
++ // On newer nodes, swap accounting is disabled by default.
++ // In some cases, swap accounting causes a kernel oops at the time of writing.
++ // So, we check memory.max_usage_in_bytes instead.
++ int err2 = cgroup_get_value_uint64(memct, "memory.max_usage_in_bytes", &max_image);
++ if (err2) {
++ dprintf(D_PROCFAMILY,
++ "Unable to load max memory usage for cgroup %s (ProcFamily %u): %u %s\n",
++ m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err));
++ return;
++ } else if (!have_warned_about_memsw) {
++ have_warned_about_memsw = true;
++ dprintf(D_ALWAYS, "Swap acounting is not available; only doing RAM accounting.\n");
++ }
+ }
+ m_max_image_size = max_image/1024;
+ }
+diff --git a/src/condor_procd/proc_family.h b/src/condor_procd/proc_family.h
+index 28a854c..d831d8e 100644
+--- a/src/condor_procd/proc_family.h
++++ b/src/condor_procd/proc_family.h
+@@ -181,6 +181,7 @@ private:
+ std::string m_cgroup_string;
+ CgroupManager &m_cm;
+ static long clock_tick;
++ static bool have_warned_about_memsw;
+
+ int count_tasks_cgroup();
+ int aggregate_usage_cgroup_blockio(ProcFamilyUsage*);
+diff --git a/src/condor_starter.V6.1/cgroup.linux.cpp b/src/condor_starter.V6.1/cgroup.linux.cpp
+index 97407b3..4fbd00d 100644
+--- a/src/condor_starter.V6.1/cgroup.linux.cpp
++++ b/src/condor_starter.V6.1/cgroup.linux.cpp
+@@ -185,13 +185,6 @@ int CgroupManager::create(const std::string &cgroup_string, Cgroup &cgroup,
+ has_cgroup, changed_cgroup)) {
+ return -1;
+ }
+- if ((preferred_controllers & CPUACCT_CONTROLLER) &&
+- initialize_controller(*cgroupp, CPUACCT_CONTROLLER,
+- CPUACCT_CONTROLLER_STR,
+- required_controllers & CPUACCT_CONTROLLER,
+- has_cgroup, changed_cgroup)) {
+- return -1;
+- }
+ if ((preferred_controllers & BLOCK_CONTROLLER) &&
+ initialize_controller(*cgroupp, BLOCK_CONTROLLER,
+ BLOCK_CONTROLLER_STR,
+diff --git a/src/condor_starter.V6.1/cgroup_limits.cpp b/src/condor_starter.V6.1/cgroup_limits.cpp
+index 71830a5..93e311c 100644
+--- a/src/condor_starter.V6.1/cgroup_limits.cpp
++++ b/src/condor_starter.V6.1/cgroup_limits.cpp
+@@ -20,7 +20,7 @@ CgroupLimits::CgroupLimits(std::string &cgroup) : m_cgroup_string(cgroup)
+ int CgroupLimits::set_memory_limit_bytes(uint64_t mem_bytes, bool soft)
+ {
+ if (!m_cgroup.isValid() || !CgroupManager::getInstance().isMounted(CgroupManager::MEMORY_CONTROLLER)) {
+- dprintf(D_ALWAYS, "Unable to set memory limit because cgroup is invalid.");
++ dprintf(D_ALWAYS, "Unable to set memory limit because cgroup is invalid.\n");
+ return 1;
+ }
+
+@@ -55,7 +55,7 @@ int CgroupLimits::set_memory_limit_bytes(uint64_t mem_bytes, bool soft)
+ int CgroupLimits::set_cpu_shares(uint64_t shares)
+ {
+ if (!m_cgroup.isValid() || !CgroupManager::getInstance().isMounted(CgroupManager::CPU_CONTROLLER)) {
+- dprintf(D_ALWAYS, "Unable to set CPU shares because cgroup is invalid.");
++ dprintf(D_ALWAYS, "Unable to set CPU shares because cgroup is invalid.\n");
+ return 1;
+ }
+
+@@ -89,7 +89,7 @@ int CgroupLimits::set_cpu_shares(uint64_t shares)
+ int CgroupLimits::set_blockio_weight(uint64_t weight)
+ {
+ if (!m_cgroup.isValid() || !CgroupManager::getInstance().isMounted(CgroupManager::BLOCK_CONTROLLER)) {
+- dprintf(D_ALWAYS, "Unable to set blockio weight because cgroup is invalid.");
++ dprintf(D_ALWAYS, "Unable to set blockio weight because cgroup is invalid.\n");
+ return 1;
+ }
+
diff --git a/condor-1605-v2.patch b/condor-1605-v2.patch
new file mode 100644
index 0000000..1702b1d
--- /dev/null
+++ b/condor-1605-v2.patch
@@ -0,0 +1,1457 @@
+diff --git a/src/condor_c-gahp/schedd_client.cpp b/src/condor_c-gahp/schedd_client.cpp
+index 32f0059..b8fda84 100644
+--- a/src/condor_c-gahp/schedd_client.cpp
++++ b/src/condor_c-gahp/schedd_client.cpp
+@@ -562,7 +562,7 @@ doContactSchedd()
+ // Try connecting to the queue
+ Qmgr_connection * qmgr_connection;
+
+- if ((qmgr_connection = ConnectQ(dc_schedd.addr(), QMGMT_TIMEOUT, false, NULL, NULL, dc_schedd.version() )) == NULL) {
++ if ((qmgr_connection = ConnectQ(dc_schedd, QMGMT_TIMEOUT, false, NULL, NULL, dc_schedd.version() )) == NULL) {
+ error = TRUE;
+ sprintf( error_msg, "Error connecting to schedd %s", ScheddAddr );
+ dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
+diff --git a/src/condor_contrib/triggerd/src/Triggerd.cpp b/src/condor_contrib/triggerd/src/Triggerd.cpp
+index 40e1197..59f2e55 100644
+--- a/src/condor_contrib/triggerd/src/Triggerd.cpp
++++ b/src/condor_contrib/triggerd/src/Triggerd.cpp
+@@ -819,7 +819,7 @@ Triggerd::PerformQueries()
+ // Perform the query and check the result
+ if (NULL != query_collector)
+ {
+- status = query->fetchAds(result, query_collector->addr(), &errstack);
++ status = query->fetchAds(result, *query_collector, &errstack);
+ }
+ else
+ {
+diff --git a/src/condor_credd/credd.cpp b/src/condor_credd/credd.cpp
+index 7f0973f..c3b7b1f 100644
+--- a/src/condor_credd/credd.cpp
++++ b/src/condor_credd/credd.cpp
+@@ -80,7 +80,7 @@ store_cred_handler(Service * /*service*/, int /*i*/, Stream *stream) {
+
+ if (!socket->triedAuthentication()) {
+ CondorError errstack;
+- if( ! SecMan::authenticate_sock(socket, WRITE, &errstack) ) {
++ if( ! SecMan::authenticate_sock(socket, WRITE, &errstack, NULL) ) {
+ dprintf (D_ALWAYS, "Unable to authenticate, qutting\n");
+ goto EXIT;
+ }
+@@ -236,7 +236,7 @@ get_cred_handler(Service * /*service*/, int /*i*/, Stream *stream) {
+ // Authenticate
+ if (!socket->triedAuthentication()) {
+ CondorError errstack;
+- if( ! SecMan::authenticate_sock(socket, READ, &errstack) ) {
++ if( ! SecMan::authenticate_sock(socket, READ, &errstack, NULL) ) {
+ dprintf (D_ALWAYS, "Unable to authenticate, qutting\n");
+ goto EXIT;
+ }
+@@ -351,7 +351,7 @@ query_cred_handler(Service * /*service*/, int /*i*/, Stream *stream) {
+
+ if (!socket->triedAuthentication()) {
+ CondorError errstack;
+- if( ! SecMan::authenticate_sock(socket, READ, &errstack) ) {
++ if( ! SecMan::authenticate_sock(socket, READ, &errstack, NULL) ) {
+ dprintf (D_ALWAYS, "Unable to authenticate, qutting\n");
+ goto EXIT;
+ }
+@@ -426,7 +426,7 @@ rm_cred_handler(Service * /*service*/, int /*i*/, Stream *stream) {
+
+ if (!socket->triedAuthentication()) {
+ CondorError errstack;
+- if( ! SecMan::authenticate_sock(socket, READ, &errstack) ) {
++ if( ! SecMan::authenticate_sock(socket, READ, &errstack, NULL) ) {
+ dprintf (D_ALWAYS, "Unable to authenticate, qutting\n");
+ goto EXIT;
+ }
+diff --git a/src/condor_daemon_client/daemon.cpp b/src/condor_daemon_client/daemon.cpp
+index e2afded..5ccb2aa 100644
+--- a/src/condor_daemon_client/daemon.cpp
++++ b/src/condor_daemon_client/daemon.cpp
+@@ -162,7 +162,7 @@ Daemon::Daemon( const ClassAd* tAd, daemon_t tType, const char* tPool )
+
+ getInfoFromAd( tAd );
+
+- dprintf( D_HOSTNAME, "New Daemon obj (%s) name: \"%s\", pool: "
++ dprintf( D_HOSTNAME, "From ClassAd, new Daemon obj (%s) name: \"%s\", pool: "
+ "\"%s\", addr: \"%s\"\n", daemonString(_type),
+ _name ? _name : "NULL", _pool ? _pool : "NULL",
+ _addr ? _addr : "NULL" );
+@@ -533,7 +533,7 @@ Daemon::connectSock(Sock *sock, int sec, CondorError* errstack, bool non_blockin
+
+
+ StartCommandResult
+-Daemon::startCommand( int cmd, Sock* sock, int timeout, CondorError *errstack, StartCommandCallbackType *callback_fn, void *misc_data, bool nonblocking, char const *cmd_description, char *, SecMan *sec_man, bool raw_protocol, char const *sec_session_id )
++Daemon::startCommand( int cmd, Sock* sock, int timeout, CondorError *errstack, StartCommandCallbackType *callback_fn, void *misc_data, bool nonblocking, char const *cmd_description, char *, SecMan *sec_man, bool raw_protocol, char const *sec_session_id, const char * hostname )
+ {
+ // This function may be either blocking or non-blocking, depending
+ // on the flag that is passed in. All versions of Daemon::startCommand()
+@@ -555,7 +555,7 @@ Daemon::startCommand( int cmd, Sock* sock, int timeout, CondorError *errstack, S
+ sock->timeout( timeout );
+ }
+
+- start_command_result = sec_man->startCommand(cmd, sock, raw_protocol, errstack, 0, callback_fn, misc_data, nonblocking, cmd_description, sec_session_id);
++ start_command_result = sec_man->startCommand(cmd, sock, raw_protocol, errstack, 0, callback_fn, misc_data, nonblocking, cmd_description, sec_session_id, hostname);
+
+ if(callback_fn) {
+ // SecMan::startCommand() called the callback function, so we just return here
+@@ -619,7 +619,8 @@ Daemon::startCommand( int cmd, Stream::stream_type st,Sock **sock,int timeout, C
+ _version,
+ &_sec_man,
+ raw_protocol,
+- sec_session_id);
++ sec_session_id,
++ _full_hostname);
+ }
+
+ Sock*
+@@ -662,7 +663,7 @@ Daemon::startCommand_nonblocking( int cmd, Sock* sock, int timeout, CondorError
+ {
+ // This is the nonblocking version of startCommand().
+ const bool nonblocking = true;
+- return startCommand(cmd,sock,timeout,errstack,callback_fn,misc_data,nonblocking,cmd_description,_version,&_sec_man,raw_protocol,sec_session_id);
++ return startCommand(cmd,sock,timeout,errstack,callback_fn,misc_data,nonblocking,cmd_description,_version,&_sec_man,raw_protocol,sec_session_id, _full_hostname);
+ }
+
+ bool
+@@ -670,7 +671,7 @@ Daemon::startCommand( int cmd, Sock* sock, int timeout, CondorError *errstack, c
+ {
+ // This is a blocking version of startCommand().
+ const bool nonblocking = false;
+- StartCommandResult rc = startCommand(cmd,sock,timeout,errstack,NULL,NULL,nonblocking,cmd_description,_version,&_sec_man,raw_protocol,sec_session_id);
++ StartCommandResult rc = startCommand(cmd,sock,timeout,errstack,NULL,NULL,nonblocking,cmd_description,_version,&_sec_man,raw_protocol,sec_session_id, _full_hostname);
+ switch(rc) {
+ case StartCommandSucceeded:
+ return true;
+@@ -2097,7 +2098,7 @@ Daemon::forceAuthentication( ReliSock* rsock, CondorError* errstack )
+ return true;
+ }
+
+- return SecMan::authenticate_sock(rsock, CLIENT_PERM, errstack );
++ return SecMan::authenticate_sock(rsock, CLIENT_PERM, errstack, _full_hostname);
+ }
+
+
+diff --git a/src/condor_daemon_client/daemon.h b/src/condor_daemon_client/daemon.h
+index 57fcd8a..9aa3b9f 100644
+--- a/src/condor_daemon_client/daemon.h
++++ b/src/condor_daemon_client/daemon.h
+@@ -761,7 +761,7 @@ protected:
+ It may be either blocking or nonblocking, depending on the
+ nonblocking flag. This version uses an existing socket.
+ */
+- static StartCommandResult startCommand( int cmd, Sock* sock, int timeout, CondorError *errstack, StartCommandCallbackType *callback_fn, void *misc_data, bool nonblocking, char const *cmd_description, char *version, SecMan *sec_man, bool raw_protocol, char const *sec_session_id );
++ static StartCommandResult startCommand( int cmd, Sock* sock, int timeout, CondorError *errstack, StartCommandCallbackType *callback_fn, void *misc_data, bool nonblocking, char const *cmd_description, char *version, SecMan *sec_man, bool raw_protocol, char const *sec_session_id, char const *hostname );
+
+ /**
+ Internal function used by public versions of startCommand().
+@@ -769,7 +769,7 @@ protected:
+ nonblocking flag. This version creates a socket of the
+ specified type and connects it.
+ */
+- StartCommandResult startCommand( int cmd, Stream::stream_type st,Sock **sock,int timeout, CondorError *errstack, StartCommandCallbackType *callback_fn, void *misc_data, bool nonblocking, char const *cmd_description=NULL, bool raw_protocol=false, char const *sec_session_id=NULL );
++ StartCommandResult startCommand( int cmd, Stream::stream_type st,Sock **sock,int timeout, CondorError *errstack, StartCommandCallbackType *callback_fn, void *misc_data, bool nonblocking, char const *cmd_description, bool raw_protocol, char const *sec_session_id );
+
+ /**
+ Class used internally to handle non-blocking connects for
+diff --git a/src/condor_daemon_client/daemon_list.cpp b/src/condor_daemon_client/daemon_list.cpp
+index 244d58a..597d981 100644
+--- a/src/condor_daemon_client/daemon_list.cpp
++++ b/src/condor_daemon_client/daemon_list.cpp
+@@ -330,7 +330,7 @@ CollectorList::query(CondorQuery & cQuery, ClassAdList & adList, CondorError *er
+ }
+
+ result =
+- cQuery.fetchAds (adList, daemon->addr(), errstack);
++ cQuery.fetchAds (adList, *daemon, errstack);
+
+ if( num_collectors > 1 ) {
+ daemon->blacklistMonitorQueryFinished( result == Q_OK );
+diff --git a/src/condor_daemon_client/dc_schedd.cpp b/src/condor_daemon_client/dc_schedd.cpp
+index af1049e..769e12c 100644
+--- a/src/condor_daemon_client/dc_schedd.cpp
++++ b/src/condor_daemon_client/dc_schedd.cpp
+@@ -41,6 +41,9 @@ DCSchedd::DCSchedd( const char* the_name, const char* the_pool )
+ {
+ }
+
++DCSchedd::DCSchedd( const ClassAd* ad, const char* pool )
++ : Daemon( ad, DT_SCHEDD ,pool ) // Surprise! DT_SCHEDD is the second argument.
++{}
+
+ DCSchedd::~DCSchedd( void )
+ {
+diff --git a/src/condor_daemon_client/dc_schedd.h b/src/condor_daemon_client/dc_schedd.h
+index 201c9b4..6389e09 100644
+--- a/src/condor_daemon_client/dc_schedd.h
++++ b/src/condor_daemon_client/dc_schedd.h
+@@ -62,6 +62,13 @@ public:
+ */
+ DCSchedd( const char* const name = NULL, const char* pool = NULL );
+
++ /** Constructor. Same as a Daemon object.
++ @param ad The classad of the schedd object; saves a query
++ to the collector.
++ @param pool The name of the pool, NULL if you want local
++ */
++ DCSchedd( const ClassAd* ad, const char* pool = NULL );
++
+ /// Destructor
+ ~DCSchedd();
+
+diff --git a/src/condor_daemon_client/dc_startd.cpp b/src/condor_daemon_client/dc_startd.cpp
+index ec3ab14..9a4b44c 100644
+--- a/src/condor_daemon_client/dc_startd.cpp
++++ b/src/condor_daemon_client/dc_startd.cpp
+@@ -914,7 +914,6 @@ DCStartd::getAds( ClassAdList &adsList )
+ // fetch the query
+ QueryResult q;
+ CondorQuery* query;
+- char* ad_addr;
+
+ // instantiate query object
+ if (!(query = new CondorQuery (STARTD_AD))) {
+@@ -923,8 +922,7 @@ DCStartd::getAds( ClassAdList &adsList )
+ }
+
+ if( this->locate() ){
+- ad_addr = this->addr();
+- q = query->fetchAds(adsList, ad_addr, &errstack);
++ q = query->fetchAds(adsList, *this, &errstack);
+ if (q != Q_OK) {
+ if (q == Q_COMMUNICATION_ERROR) {
+ dprintf( D_ALWAYS, "%s\n", errstack.getFullText(true) );
+diff --git a/src/condor_daemon_core.V6/daemon_command.cpp b/src/condor_daemon_core.V6/daemon_command.cpp
+index e6da114..3a96315 100644
+--- a/src/condor_daemon_core.V6/daemon_command.cpp
++++ b/src/condor_daemon_core.V6/daemon_command.cpp
+@@ -991,7 +991,7 @@ DaemonCommandProtocol::CommandProtocolResult DaemonCommandProtocol::Authenticate
+ int auth_timeout = daemonCore->getSecMan()->getSecTimeout( m_comTable[cmd_index].perm );
+
+ char *method_used = NULL;
+- bool auth_success = m_sock->authenticate(m_key, auth_methods, &errstack, auth_timeout, &method_used);
++ bool auth_success = m_sock->authenticate(m_key, auth_methods, &errstack, auth_timeout, &method_used, NULL);
+
+ if ( method_used ) {
+ m_policy->Assign(ATTR_SEC_AUTHENTICATION_METHODS, method_used);
+@@ -1279,7 +1279,7 @@ DaemonCommandProtocol::CommandProtocolResult DaemonCommandProtocol::ExecCommand(
+ m_comTable[cmd_index].force_authentication &&
+ !m_sock->triedAuthentication() )
+ {
+- SecMan::authenticate_sock(m_sock, WRITE, &errstack);
++ SecMan::authenticate_sock(m_sock, WRITE, &errstack, NULL);
+ // we don't check the return value, because the code below
+ // handles what to do with unauthenticated connections
+ }
+diff --git a/src/condor_eventd.V2/admin_event.cpp b/src/condor_eventd.V2/admin_event.cpp
+index 3fa1c46..83abc49 100644
+--- a/src/condor_eventd.V2/admin_event.cpp
++++ b/src/condor_eventd.V2/admin_event.cpp
+@@ -1089,7 +1089,7 @@ AdminEvent::FetchAds_ByConstraint( const char *constraint )
+
+ query->addORConstraint( constraint );
+
+- q = query->fetchAds( m_collector_query_ads, pool->addr(), &errstack);
++ q = query->fetchAds( m_collector_query_ads, pool, &errstack);
+
+ if( q != Q_OK ){
+ dprintf(D_ALWAYS, "Trouble fetching Ads with<<%s>><<%d>>\n",
+diff --git a/src/condor_gridmanager/gridmanager.cpp b/src/condor_gridmanager/gridmanager.cpp
+index 1f97ef7..03db810 100644
+--- a/src/condor_gridmanager/gridmanager.cpp
++++ b/src/condor_gridmanager/gridmanager.cpp
+@@ -644,7 +644,7 @@ doContactSchedd()
+ }
+
+
+- schedd = ConnectQ( ScheddAddr, QMGMT_TIMEOUT, false, NULL, myUserName, CondorVersion() );
++ schedd = ConnectQ( *ScheddObj, QMGMT_TIMEOUT, false, NULL, myUserName, CondorVersion() );
+ if ( !schedd ) {
+ error_str = "Failed to connect to schedd!";
+ goto contact_schedd_failure;
+diff --git a/src/condor_includes/authentication.h b/src/condor_includes/authentication.h
+index d2b976a..6167e84 100644
+--- a/src/condor_includes/authentication.h
++++ b/src/condor_includes/authentication.h
+@@ -40,7 +40,7 @@ class Authentication {
+
+ ~Authentication();
+
+- int authenticate( char *hostAddr, const char* auth_methods, CondorError* errstack, int timeout);
++ int authenticate( const char *hostAddr, const char* auth_methods, CondorError* errstack, int timeout);
+ //------------------------------------------
+ // PURPOSE: authenticate with the other side
+ // REQUIRE: hostAddr -- host to authenticate
+@@ -50,7 +50,7 @@ class Authentication {
+ // RETURNS: -1 -- failure
+ //------------------------------------------
+
+- int authenticate( char *hostAddr, KeyInfo *& key, const char* auth_methods, CondorError* errstack, int timeout);
++ int authenticate( const char *hostAddr, KeyInfo *& key, const char* auth_methods, CondorError* errstack, int timeout);
+ //------------------------------------------
+ // PURPOSE: To send the secret key over. this method
+ // is written to keep compatibility issues
+@@ -161,7 +161,7 @@ class Authentication {
+
+ #endif /* !SKIP_AUTHENTICATION */
+
+- int authenticate_inner( char *hostAddr, const char* auth_methods, CondorError* errstack, int timeout);
++ int authenticate_inner( const char *hostAddr, const char* auth_methods, CondorError* errstack, int timeout);
+
+ //------------------------------------------
+ // Data (private)
+diff --git a/src/condor_includes/condor_auth_x509.h b/src/condor_includes/condor_auth_x509.h
+index bdbc545..dac8aa5 100644
+--- a/src/condor_includes/condor_auth_x509.h
++++ b/src/condor_includes/condor_auth_x509.h
+@@ -103,7 +103,7 @@ class Condor_Auth_X509 : public Condor_Auth_Base {
+
+ int authenticate_self_gss(CondorError* errstack);
+
+- int authenticate_client_gss(CondorError* errstack);
++ int authenticate_client_gss(const char *remoteHost, CondorError* errstack);
+
+ int authenticate_server_gss(CondorError* errstack);
+
+diff --git a/src/condor_includes/condor_qmgr.h b/src/condor_includes/condor_qmgr.h
+index 5e5012e..642b602 100644
+--- a/src/condor_includes/condor_qmgr.h
++++ b/src/condor_includes/condor_qmgr.h
+@@ -25,7 +25,7 @@
+ #include "proc.h"
+ #include "../condor_utils/CondorError.h"
+ #include "condor_classad.h"
+-
++#include "daemon.h"
+
+ typedef struct {
+ bool dummy;
+@@ -54,8 +54,7 @@ int InitializeConnection(const char *, const char *);
+ int InitializeReadOnlyConnection(const char * );
+
+ /** Initiate connection to schedd job queue and begin transaction.
+- @param qmgr_location can be the name or sinful string of a schedd or
+- NULL to connect to the local schedd
++ @param daemon a daemon object of type DT_SCHEDD
+ @param timeout specifies the maximum time (in seconds) to wait for TCP
+ connection establishment
+ @param read_only can be set to true to skip the potentially slow
+@@ -64,7 +63,7 @@ int InitializeReadOnlyConnection(const char * );
+ @param schedd_version_str Version of schedd if known (o.w. NULL).
+ @return opaque Qmgr_connection structure
+ */
+-Qmgr_connection *ConnectQ(const char *qmgr_location, int timeout=0,
++Qmgr_connection *ConnectQ(Daemon &daemon, int timeout=0,
+ bool read_only=false, CondorError* errstack=NULL,
+ const char *effective_owner=NULL,
+ char const *schedd_version_str=NULL);
+diff --git a/src/condor_includes/condor_secman.h b/src/condor_includes/condor_secman.h
+index b59519e..9891497 100644
+--- a/src/condor_includes/condor_secman.h
++++ b/src/condor_includes/condor_secman.h
+@@ -106,12 +106,12 @@ public:
+ // spawn off a non-blocking attempt to create a security
+ // session so that in the future, a UDP command could succeed
+ // without StartCommandWouldBlock.
+- StartCommandResult startCommand( int cmd, Sock* sock, bool raw_protocol, CondorError* errstack, int subcmd, StartCommandCallbackType *callback_fn, void *misc_data, bool nonblocking,char const *cmd_description,char const *sec_session_id);
++ StartCommandResult startCommand( int cmd, Sock* sock, bool raw_protocol, CondorError* errstack, int subcmd, StartCommandCallbackType *callback_fn, void *misc_data, bool nonblocking,char const *cmd_description,char const *sec_session_id, const char *hostname);
+
+ // Authenticate a socket using whatever authentication methods
+ // have been configured for the specified perm level.
+- static int authenticate_sock(Sock *s,DCpermission perm, CondorError* errstack);
+- static int authenticate_sock(Sock *s,KeyInfo *&ki, DCpermission perm, CondorError* errstack);
++ static int authenticate_sock(Sock *s,DCpermission perm, CondorError* errstack, const char * hostname);
++ static int authenticate_sock(Sock *s,KeyInfo *&ki, DCpermission perm, CondorError* errstack, const char * hostname);
+
+
+ //------------------------------------------
+diff --git a/src/condor_includes/reli_sock.h b/src/condor_includes/reli_sock.h
+index b48ce8d..f5cdca8 100644
+--- a/src/condor_includes/reli_sock.h
++++ b/src/condor_includes/reli_sock.h
+@@ -218,9 +218,9 @@ public:
+ virtual int peek(char &);
+
+ ///
+- int authenticate( const char* methods, CondorError* errstack, int auth_timeout );
++ int authenticate( const char* methods, CondorError* errstack, int auth_timeout, const char * hostname );
+ ///
+- int authenticate( KeyInfo *& key, const char* methods, CondorError* errstack, int auth_timeout, char **method_used=NULL );
++ int authenticate( KeyInfo *& key, const char* methods, CondorError* errstack, int auth_timeout, char **method_used, const char * hostname );
+ ///
+ int isClient() { return is_client; };
+
+@@ -254,7 +254,8 @@ protected:
+ int prepare_for_nobuffering( stream_coding = stream_unknown);
+ int perform_authenticate( bool with_key, KeyInfo *& key,
+ const char* methods, CondorError* errstack,
+- int auth_timeout, char **method_used );
++ int auth_timeout, char **method_used,
++ const char* hostname );
+
+ // This is used internally to recover sanity on the stream after
+ // failing to open a file in put_file().
+diff --git a/src/condor_includes/sock.h b/src/condor_includes/sock.h
+index ca1b1c8..53f8b23 100644
+--- a/src/condor_includes/sock.h
++++ b/src/condor_includes/sock.h
+@@ -347,10 +347,10 @@ public:
+ bool isAuthenticated() const;
+
+ ///
+- virtual int authenticate(const char * auth_methods, CondorError* errstack, int timeout);
++ virtual int authenticate(const char * auth_methods, CondorError* errstack, int timeout, const char *hostname);
+ ///
+ // method_used should be freed by the caller when finished with it
+- virtual int authenticate(KeyInfo *&ki, const char * auth_methods, CondorError* errstack, int timeout, char **method_used=NULL);
++ virtual int authenticate(KeyInfo *&ki, const char * auth_methods, CondorError* errstack, int timeout, char **method_used, const char *hostname);
+
+ /// if we are connecting, merges together Stream::get_deadline
+ /// and connect_timeout_time()
+diff --git a/src/condor_io/authentication.cpp b/src/condor_io/authentication.cpp
+index 4a11db0..0374857 100644
+--- a/src/condor_io/authentication.cpp
++++ b/src/condor_io/authentication.cpp
+@@ -85,7 +85,7 @@ Authentication::~Authentication()
+ #endif
+ }
+
+-int Authentication::authenticate( char *hostAddr, KeyInfo *& key,
++int Authentication::authenticate( const char *hostAddr, KeyInfo *& key,
+ const char* auth_methods, CondorError* errstack, int timeout)
+ {
+ int retval = authenticate(hostAddr, auth_methods, errstack, timeout);
+@@ -106,7 +106,7 @@ int Authentication::authenticate( char *hostAddr, KeyInfo *& key,
+ return retval;
+ }
+
+-int Authentication::authenticate( char *hostAddr, const char* auth_methods,
++int Authentication::authenticate( const char *hostAddr, const char* auth_methods,
+ CondorError* errstack, int timeout)
+ {
+ int retval;
+@@ -124,7 +124,7 @@ int Authentication::authenticate( char *hostAddr, const char* auth_methods,
+ return retval;
+ }
+
+-int Authentication::authenticate_inner( char *hostAddr, const char* auth_methods,
++int Authentication::authenticate_inner( const char *hostAddr, const char* auth_methods,
+ CondorError* errstack, int timeout)
+ {
+ #if defined(SKIP_AUTHENTICATION)
+diff --git a/src/condor_io/condor_auth_x509.cpp b/src/condor_io/condor_auth_x509.cpp
+index ee80b9d..7c81cea 100644
+--- a/src/condor_io/condor_auth_x509.cpp
++++ b/src/condor_io/condor_auth_x509.cpp
+@@ -92,7 +92,7 @@ Condor_Auth_X509 :: ~Condor_Auth_X509()
+ }
+ }
+
+-int Condor_Auth_X509 :: authenticate(const char * /* remoteHost */, CondorError* errstack)
++int Condor_Auth_X509 :: authenticate(const char * remoteHost, CondorError* errstack)
+ {
+ int status = 1;
+ int reply = 0;
+@@ -171,7 +171,7 @@ int Condor_Auth_X509 :: authenticate(const char * /* remoteHost */, CondorError*
+
+ switch ( mySock_->isClient() ) {
+ case 1:
+- status = authenticate_client_gss(errstack);
++ status = authenticate_client_gss(remoteHost, errstack);
+ break;
+ default:
+ status = authenticate_server_gss(errstack);
+@@ -655,7 +655,7 @@ int Condor_Auth_X509::authenticate_self_gss(CondorError* errstack)
+ return TRUE;
+ }
+
+-int Condor_Auth_X509::authenticate_client_gss(CondorError* errstack)
++int Condor_Auth_X509::authenticate_client_gss(const char * remoteHost, CondorError* errstack)
+ {
+ OM_uint32 major_status = 0;
+ OM_uint32 minor_status = 0;
+@@ -775,31 +775,48 @@ int Condor_Auth_X509::authenticate_client_gss(CondorError* errstack)
+ }
+ }
+
+- std::string fqh = get_full_hostname(mySock_->peer_addr());
+- StringList * daemonNames = getDaemonList("GSI_DAEMON_NAME",fqh.c_str());
+-
+- // Now, let's see if the name is in the list, I am not using
+- // anycase here, so if the host name and what we are looking for
+- // are in different cases, then we will run into problems.
+- if( daemonNames ) {
+- status = daemonNames->contains_withwildcard(server) == TRUE? 1 : 0;
+-
+- if( !status ) {
+- errstack->pushf("GSI", GSI_ERR_UNAUTHORIZED_SERVER,
+- "Failed to authenticate because the subject '%s' is not currently trusted by you. "
+- "If it should be, add it to GSI_DAEMON_NAME or undefine GSI_DAEMON_NAME.", server);
+- dprintf(D_SECURITY,
+- "GSI_DAEMON_NAME is defined and the server %s is not specified in the GSI_DAEMON_NAME parameter\n",
+- server);
+- }
++ std::vector<MyString> fqhs;
++ if (remoteHost)
++ {
++ std::vector<MyString> fqhs_copy = get_hostname_with_alias(mySock_->peer_addr());
++ fqhs.push_back(remoteHost);
++ fqhs.insert(fqhs.begin()+1, fqhs_copy.begin(), fqhs_copy.end());
+ }
+- else {
+- status = CheckServerName(fqh.c_str(),mySock_->peer_ip_str(),mySock_,errstack);
++ else
++ {
++ fqhs = get_hostname_with_alias(mySock_->peer_addr());
+ }
++ dprintf(D_FULLDEBUG, "Number of aliases: %zu\n", fqhs.size());
++ for(std::vector<MyString>::const_iterator it = fqhs.begin(); it != fqhs.end(); ++it) {
++ dprintf(D_FULLDEBUG, "Checking validity of alias %s\n", it->Value());
++ std::string fqh = it->Value();
++ StringList * daemonNames = getDaemonList("GSI_DAEMON_NAME",fqh.c_str());
++
++ // Now, let's see if the name is in the list, I am not using
++ // anycase here, so if the host name and what we are looking for
++ // are in different cases, then we will run into problems.
++ if( daemonNames ) {
++ status = daemonNames->contains_withwildcard(server) == TRUE? 1 : 0;
++
++ if( !status ) {
++ errstack->pushf("GSI", GSI_ERR_UNAUTHORIZED_SERVER,
++ "Failed to authenticate because the subject '%s' is not currently trusted by you. "
++ "If it should be, add it to GSI_DAEMON_NAME or undefine GSI_DAEMON_NAME.", server);
++ dprintf(D_SECURITY,
++ "GSI_DAEMON_NAME is defined and the server %s is not specified in the GSI_DAEMON_NAME parameter\n",
++ server);
++ }
++ }
++ else {
++ status = CheckServerName(fqh.c_str(),mySock_->peer_ip_str(),mySock_,errstack);
++ }
++ delete daemonNames;
+
+- if (status) {
+- dprintf(D_SECURITY, "valid GSS connection established to %s\n", server);
+- }
++ if (status) {
++ dprintf(D_SECURITY, "valid GSS connection established to %s\n", server);
++ break;
++ }
++ }
+
+ mySock_->encode();
+ if (!mySock_->code(status) || !mySock_->end_of_message()) {
+@@ -810,7 +827,6 @@ int Condor_Auth_X509::authenticate_client_gss(CondorError* errstack)
+ }
+
+ delete [] server;
+- delete daemonNames;
+ }
+ clear:
+ return (status == 0) ? FALSE : TRUE;
+diff --git a/src/condor_io/condor_secman.cpp b/src/condor_io/condor_secman.cpp
+index 21607fe..ea768bf 100644
+--- a/src/condor_io/condor_secman.cpp
++++ b/src/condor_io/condor_secman.cpp
+@@ -855,7 +855,7 @@ class SecManStartCommand: Service, public ClassyCountedPtr {
+ SecManStartCommand (
+ int cmd,Sock *sock,bool raw_protocol,
+ CondorError *errstack,int subcmd,StartCommandCallbackType *callback_fn,
+- void *misc_data,bool nonblocking,char const *cmd_description,char const *sec_session_id_hint,SecMan *sec_man):
++ void *misc_data,bool nonblocking,char const *cmd_description,char const *sec_session_id_hint,SecMan *sec_man, const std::string &hostname):
+
+ m_cmd(cmd),
+ m_subcmd(subcmd),
+@@ -867,7 +867,8 @@ class SecManStartCommand: Service, public ClassyCountedPtr {
+ m_nonblocking(nonblocking),
+ m_pending_socket_registered(false),
+ m_sec_man(*sec_man),
+- m_use_tmp_sec_session(false)
++ m_use_tmp_sec_session(false),
++ m_hostname(hostname)
+ {
+ m_sec_session_id_hint = sec_session_id_hint ? sec_session_id_hint : "";
+ if( m_sec_session_id_hint == USE_TMP_SEC_SESSION ) {
+@@ -972,6 +973,7 @@ class SecManStartCommand: Service, public ClassyCountedPtr {
+ KeyCacheEntry *m_enc_key;
+ KeyInfo* m_private_key;
+ MyString m_sec_session_id_hint;
++ std::string m_hostname;
+
+ enum StartCommandState {
+ SendAuthInfo,
+@@ -1023,7 +1025,7 @@ class SecManStartCommand: Service, public ClassyCountedPtr {
+ };
+
+ StartCommandResult
+-SecMan::startCommand( int cmd, Sock* sock, bool raw_protocol, CondorError* errstack, int subcmd, StartCommandCallbackType *callback_fn, void *misc_data, bool nonblocking,char const *cmd_description,char const *sec_session_id_hint)
++SecMan::startCommand( int cmd, Sock* sock, bool raw_protocol, CondorError* errstack, int subcmd, StartCommandCallbackType *callback_fn, void *misc_data, bool nonblocking,char const *cmd_description,char const *sec_session_id_hint, const char *hostname)
+ {
+ // This function is simply a convenient wrapper around the
+ // SecManStartCommand class, which does the actual work.
+@@ -1032,7 +1034,8 @@ SecMan::startCommand( int cmd, Sock* sock, bool raw_protocol, CondorError* errst
+ // The blocking case could avoid use of the heap, but for simplicity,
+ // we just do the same in both cases.
+
+- classy_counted_ptr<SecManStartCommand> sc = new SecManStartCommand(cmd,sock,raw_protocol,errstack,subcmd,callback_fn,misc_data,nonblocking,cmd_description,sec_session_id_hint,this);
++ std::string hostname_str = hostname ? hostname : "";
++ classy_counted_ptr<SecManStartCommand> sc = new SecManStartCommand(cmd,sock,raw_protocol,errstack,subcmd,callback_fn,misc_data,nonblocking,cmd_description,sec_session_id_hint,this, hostname_str);
+
+ ASSERT(sc.get());
+
+@@ -1829,7 +1832,7 @@ SecManStartCommand::authenticate_inner()
+ }
+
+ int auth_timeout = m_sec_man.getSecTimeout( CLIENT_PERM );
+- bool auth_success = m_sock->authenticate(m_private_key, auth_methods, m_errstack,auth_timeout);
++ bool auth_success = m_sock->authenticate(m_private_key, auth_methods, m_errstack,auth_timeout, NULL, m_hostname.c_str());
+
+ if (auth_methods) {
+ free(auth_methods);
+@@ -2159,7 +2162,8 @@ SecManStartCommand::DoTCPAuth_inner()
+ m_nonblocking,
+ m_cmd_description.Value(),
+ m_sec_session_id_hint.Value(),
+- &m_sec_man);
++ &m_sec_man,
++ m_hostname);
+
+ StartCommandResult auth_result = m_tcp_auth_command->startCommand();
+
+@@ -2796,23 +2800,23 @@ char* SecMan::my_parent_unique_id() {
+ }
+
+ int
+-SecMan::authenticate_sock(Sock *s,DCpermission perm, CondorError* errstack)
++SecMan::authenticate_sock(Sock *s,DCpermission perm, CondorError* errstack, const char * hostname)
+ {
+ MyString methods;
+ getAuthenticationMethods( perm, &methods );
+ ASSERT(s);
+ int auth_timeout = getSecTimeout(perm);
+- return s->authenticate(methods.Value(),errstack,auth_timeout);
++ return s->authenticate(methods.Value(),errstack,auth_timeout, hostname);
+ }
+
+ int
+-SecMan::authenticate_sock(Sock *s,KeyInfo *&ki, DCpermission perm, CondorError* errstack)
++SecMan::authenticate_sock(Sock *s,KeyInfo *&ki, DCpermission perm, CondorError* errstack, const char * hostname)
+ {
+ MyString methods;
+ getAuthenticationMethods( perm, &methods );
+ ASSERT(s);
+ int auth_timeout = getSecTimeout(perm);
+- return s->authenticate(ki,methods.Value(),errstack,auth_timeout);
++ return s->authenticate(ki,methods.Value(),errstack,auth_timeout, NULL, hostname);
+ }
+
+ int
+diff --git a/src/condor_io/reli_sock.cpp b/src/condor_io/reli_sock.cpp
+index d80bab4..00a6d10 100644
+--- a/src/condor_io/reli_sock.cpp
++++ b/src/condor_io/reli_sock.cpp
+@@ -967,11 +967,11 @@ ReliSock::prepare_for_nobuffering(stream_coding direction)
+
+ int ReliSock::perform_authenticate(bool with_key, KeyInfo *& key,
+ const char* methods, CondorError* errstack,
+- int auth_timeout, char **method_used)
++ int auth_timeout, char **method_used,
++ const char * hostname )
+ {
+ int in_encode_mode;
+ int result;
+-
+ if( method_used ) {
+ *method_used = NULL;
+ }
+@@ -984,9 +984,9 @@ int ReliSock::perform_authenticate(bool with_key, KeyInfo *& key,
+
+ // actually perform the authentication
+ if ( with_key ) {
+- result = authob.authenticate( hostAddr, key, methods, errstack, auth_timeout );
++ result = authob.authenticate( hostname, key, methods, errstack, auth_timeout );
+ } else {
+- result = authob.authenticate( hostAddr, methods, errstack, auth_timeout );
++ result = authob.authenticate( hostname, methods, errstack, auth_timeout );
+ }
+ // restore stream mode (either encode or decode)
+ if ( in_encode_mode && is_decode() ) {
+@@ -1010,16 +1010,16 @@ int ReliSock::perform_authenticate(bool with_key, KeyInfo *& key,
+ }
+ }
+
+-int ReliSock::authenticate(KeyInfo *& key, const char* methods, CondorError* errstack, int auth_timeout, char **method_used)
++int ReliSock::authenticate(KeyInfo *& key, const char* methods, CondorError* errstack, int auth_timeout, char **method_used, const char *hostname)
+ {
+- return perform_authenticate(true,key,methods,errstack,auth_timeout,method_used);
++ return perform_authenticate(true,key,methods,errstack,auth_timeout,method_used, hostname);
+ }
+
+ int
+-ReliSock::authenticate(const char* methods, CondorError* errstack,int auth_timeout )
++ReliSock::authenticate(const char* methods, CondorError* errstack,int auth_timeout, const char * hostname)
+ {
+ KeyInfo *key = NULL;
+- return perform_authenticate(false,key,methods,errstack,auth_timeout,NULL);
++ return perform_authenticate(false,key,methods,errstack,auth_timeout,NULL, hostname);
+ }
+
+ bool
+diff --git a/src/condor_io/sock.cpp b/src/condor_io/sock.cpp
+index c4dcb0b..e743139 100644
+--- a/src/condor_io/sock.cpp
++++ b/src/condor_io/sock.cpp
+@@ -2235,12 +2235,12 @@ bool Sock :: is_hdr_encrypt(){
+ return FALSE;
+ }
+
+-int Sock :: authenticate(KeyInfo *&, const char * /* methods */, CondorError* /* errstack */, int /*timeout*/, char ** /*method_used*/)
++int Sock :: authenticate(KeyInfo *&, const char * /* methods */, CondorError* /* errstack */, int /*timeout*/, char ** /*method_used*/, const char * /*hostname*/)
+ {
+ return -1;
+ }
+
+-int Sock :: authenticate(const char * /* methods */, CondorError* /* errstack */, int /*timeout*/)
++int Sock :: authenticate(const char * /* methods */, CondorError* /* errstack */, int /*timeout*/, const char * /*hostname*/)
+ {
+ /*
+ errstack->push("AUTHENTICATE", AUTHENTICATE_ERR_NOT_BUILT,
+diff --git a/src/condor_job_router/submit_job.cpp b/src/condor_job_router/submit_job.cpp
+index ccad171..393bafc 100644
+--- a/src/condor_job_router/submit_job.cpp
++++ b/src/condor_job_router/submit_job.cpp
+@@ -175,7 +175,7 @@ ClaimJobResult claim_job(int cluster, int proc, MyString * error_details, const
+ static Qmgr_connection *open_q_as_owner(char const *effective_owner,DCSchedd &schedd,FailObj &failobj)
+ {
+ CondorError errstack;
+- Qmgr_connection * qmgr = ConnectQ(schedd.addr(), 0 /*timeout==default*/, false /*read-only*/, & errstack, effective_owner, schedd.version());
++ Qmgr_connection * qmgr = ConnectQ(schedd, 0 /*timeout==default*/, false /*read-only*/, & errstack, effective_owner, schedd.version());
+ if( ! qmgr ) {
+ failobj.fail("Unable to connect\n%s\n", errstack.getFullText(true));
+ return NULL;
+diff --git a/src/condor_prio/prio.cpp b/src/condor_prio/prio.cpp
+index deec9b1..c27aa94 100644
+--- a/src/condor_prio/prio.cpp
++++ b/src/condor_prio/prio.cpp
+@@ -157,8 +157,7 @@ main( int argc, char *argv[] )
+ }
+
+ // Open job queue
+- DaemonName = schedd.addr();
+- q = ConnectQ(DaemonName.Value());
++ q = ConnectQ(schedd);
+ if( !q ) {
+ fprintf( stderr, "Failed to connect to queue manager %s\n",
+ DaemonName.Value() );
+diff --git a/src/condor_q.V6/queue.cpp b/src/condor_q.V6/queue.cpp
+index 58ff6ed..85ad9cf 100644
+--- a/src/condor_q.V6/queue.cpp
++++ b/src/condor_q.V6/queue.cpp
+@@ -119,13 +119,13 @@ static char * bufferJobShort (ClassAd *);
+ /* if useDB is false, then v1 =scheddAddress, v2=scheddName, v3=scheddMachine, v4=scheddVersion;
+ if useDB is true, then v1 =quill_name, v2=db_ipAddr, v3=db_name, v4=db_password
+ */
+-static bool show_queue (const char* v1, const char* v2, const char* v3, const char* v4, bool useDB);
+-static bool show_queue_buffered (const char* v1, const char* v2, const char* v3, const char* v4, bool useDB);
++static bool show_queue (const char* v1, const char* v2, const char* v3, const char* v4, const ClassAd *ad, bool useDB);
++static bool show_queue_buffered (const char* v1, const char* v2, const char* v3, const char* v4, const ClassAd *ad, bool useDB);
+ static void init_output_mask();
+
+
+ /* a type used to point to one of the above two functions */
+-typedef bool (*show_queue_fp)(const char* v1, const char* v2, const char* v3, const char* v4, bool useDB);
++typedef bool (*show_queue_fp)(const char* v1, const char* v2, const char* v3, const char* v4, const ClassAd *ad, bool useDB);
+
+ static bool read_classad_file(const char *filename, ClassAdList &classads);
+
+@@ -506,7 +506,7 @@ int main (int argc, char **argv)
+
+ /* ask the database for the queue */
+
+- if ( (retval = sqfp( NULL, NULL, NULL, NULL, TRUE) ) ) {
++ if ( (retval = sqfp( NULL, NULL, NULL, NULL, NULL, TRUE) ) ) {
+ /* if the queue was retrieved, then I am done */
+ freeConnectionStrings();
+ exit(retval?EXIT_SUCCESS:EXIT_FAILURE);
+@@ -557,7 +557,7 @@ int main (int argc, char **argv)
+ (quill.name()):tmp_char,
+ (quill.fullHostname())?
+ (quill.fullHostname()):tmp_char,
+- NULL, FALSE) ) ) )
++ NULL, NULL, FALSE) ) ) )
+ {
+ /* if the queue was retrieved, then I am done */
+ freeConnectionStrings();
+@@ -600,7 +600,7 @@ int main (int argc, char **argv)
+ #endif /* HAVE_EXT_POSTGRESQL */
+ case DIRECT_SCHEDD:
+ retval = sqfp(scheddAddr, scheddName, scheddMachine,
+- scheddVersion.Value(), FALSE);
++ scheddVersion.Value(), NULL, FALSE);
+
+ /* Hopefully I got the queue from the schedd... */
+ freeConnectionStrings();
+@@ -793,7 +793,7 @@ int main (int argc, char **argv)
+ case DIRECT_RDBMS:
+ if (useDB) {
+ if ( (retval = sqfp(quillName, dbIpAddr, dbName,
+- queryPassword, TRUE) ) )
++ queryPassword, ad, TRUE) ) )
+ {
+ /* processed correctly, so do the next ad */
+ continue;
+@@ -840,7 +840,7 @@ int main (int argc, char **argv)
+
+ if((result2 == Q_OK) && quillAddr &&
+ (retval = sqfp(quillAddr, quillName, quillMachine,
+- NULL, FALSE) ) )
++ NULL, ad, FALSE) ) )
+ {
+ /* processed correctly, so do the next ad */
+ continue;
+@@ -896,7 +896,7 @@ int main (int argc, char **argv)
+ case DIRECT_SCHEDD:
+ /* database not configured or could not be reached,
+ query the schedd daemon directly */
+- retval = sqfp(scheddAddr, scheddName, scheddMachine, scheddVersion.Value(), FALSE);
++ retval = sqfp(scheddAddr, scheddName, scheddMachine, scheddVersion.Value(), ad, FALSE);
+
+ break;
+
+@@ -2536,7 +2536,7 @@ static void init_output_mask()
+ */
+
+ static bool
+-show_queue_buffered( const char* v1, const char* v2, const char* v3, const char* v4, bool useDB )
++show_queue_buffered( const char* v1, const char* v2, const char* v3, const char* v4, const ClassAd* ad, bool useDB )
+ {
+ const char *scheddAddress = 0;
+ const char *scheddName = 0;
+@@ -2709,7 +2709,13 @@ show_queue_buffered( const char* v1, const char* v2, const char* v3, const char*
+ #endif /* HAVE_EXT_POSTGRESQL */
+ } else {
+ // fetch queue from schedd and stash it in output_buffer.
+- Daemon schedd(DT_SCHEDD, scheddName, pool ? pool->addr() : NULL );
++ Daemon *schedd_ptr = NULL;
++ if (ad)
++ schedd_ptr = new Daemon(ad, DT_SCHEDD, pool ? pool->addr() : NULL );
++ else
++ schedd_ptr = new Daemon(DT_SCHEDD, scheddName, pool ? pool->addr() : NULL );
++ ASSERT(schedd_ptr);
++ Daemon & schedd = *schedd_ptr;
+ const char *version = schedd.version();
+ bool useFastPath = false;
+ if (version) {
+@@ -2719,9 +2725,12 @@ show_queue_buffered( const char* v1, const char* v2, const char* v3, const char*
+
+ // stash the schedd daemon object for use by process_buffer_line
+ g_cur_schedd_for_process_buffer_line = new Daemon( schedd );
++ ASSERT( g_cur_schedd_for_process_buffer_line );
++
++ delete schedd_ptr;
+
+ int fetchResult;
+- if( (fetchResult = Q.fetchQueueFromHostAndProcess( scheddAddress, attrs,
++ if( (fetchResult = Q.fetchQueueFromDaemonAndProcess( *g_cur_schedd_for_process_buffer_line, attrs,
+ process_buffer_line,
+ useFastPath,
+ &errstack)) != Q_OK) {
+@@ -2967,7 +2976,7 @@ process_buffer_line( ClassAd *job )
+ refer to the prototype of this function on the top of this file
+ */
+ static bool
+-show_queue( const char* v1, const char* v2, const char* v3, const char* v4, bool useDB )
++show_queue( const char* v1, const char* v2, const char* v3, const char* v4, const ClassAd * ad, bool useDB )
+ {
+ const char *scheddAddress;
+ const char *scheddName;
+@@ -3046,7 +3055,13 @@ show_queue( const char* v1, const char* v2, const char* v3, const char* v4, bool
+ } else {
+ // fetch queue from schedd
+ int fetchResult;
+- if( (fetchResult = Q.fetchQueueFromHost(jobs, attrs,scheddAddress, scheddVersion, &errstack) != Q_OK)) {
++ Daemon *schedd_daemon_ptr;
++ if (ad)
++ schedd_daemon_ptr = new Daemon(ad, DT_SCHEDD, pool ? pool->addr() : NULL );
++ else
++ schedd_daemon_ptr = new Daemon(DT_SCHEDD, scheddName, pool ? pool->addr() : NULL );
++ Daemon &schedd_daemon = *schedd_daemon_ptr;
++ if( (fetchResult = Q.fetchQueueFromDaemon(jobs, attrs, schedd_daemon, scheddVersion, &errstack) != Q_OK)) {
+ // The parse + fetch failed, print out why
+ switch(fetchResult) {
+ case Q_PARSE_ERROR:
+@@ -3081,7 +3096,12 @@ show_queue( const char* v1, const char* v2, const char* v3, const char* v4, bool
+ scheddAddress, scheddMachine);
+ }
+
+- Daemon schedd_daemon(DT_SCHEDD,scheddName,pool ? pool->addr() : NULL);
++ Daemon *schedd_daemon_ptr;
++ if (ad)
++ schedd_daemon_ptr = new Daemon(ad, DT_SCHEDD, pool ? pool->addr() : NULL );
++ else
++ schedd_daemon_ptr = new Daemon(DT_SCHEDD, scheddName, pool ? pool->addr() : NULL );
++ Daemon &schedd_daemon = *schedd_daemon_ptr;
+ schedd_daemon.locate();
+
+ jobs.Open();
+@@ -3089,6 +3109,7 @@ show_queue( const char* v1, const char* v2, const char* v3, const char* v4, bool
+ doRunAnalysis( job, &schedd_daemon );
+ }
+ jobs.Close();
++ delete schedd_daemon_ptr;
+
+ if(lastUpdate) {
+ free(lastUpdate);
+diff --git a/src/condor_schedd.V6/qmgmt_receivers.cpp b/src/condor_schedd.V6/qmgmt_receivers.cpp
+index 98e1960..db6314f 100644
+--- a/src/condor_schedd.V6/qmgmt_receivers.cpp
++++ b/src/condor_schedd.V6/qmgmt_receivers.cpp
+@@ -71,7 +71,7 @@ do_Q_request(ReliSock *syscall_sock,bool &may_fork)
+ dprintf(D_SECURITY,"Calling authenticate(%s) in qmgmt_receivers\n", methods.Value());
+ }
+ CondorError errstack;
+- if( ! SecMan::authenticate_sock(syscall_sock, WRITE, &errstack) ) {
++ if( ! SecMan::authenticate_sock(syscall_sock, WRITE, &errstack, NULL) ) {
+ // Failed to authenticate
+ dprintf( D_ALWAYS, "SCHEDD: authentication failed: %s\n",
+ errstack.getFullText() );
+diff --git a/src/condor_schedd.V6/qmgr_job_updater.cpp b/src/condor_schedd.V6/qmgr_job_updater.cpp
+index f9c100a..25126dd 100644
+--- a/src/condor_schedd.V6/qmgr_job_updater.cpp
++++ b/src/condor_schedd.V6/qmgr_job_updater.cpp
+@@ -47,8 +47,10 @@ QmgrJobUpdater::QmgrJobUpdater( ClassAd* job, const char* schedd_address,
+ schedd_addr(schedd_address?strdup(schedd_address):0),
+ schedd_ver(schedd_version?strdup(schedd_version):0),
+ cluster(-1), proc(-1),
+- q_update_tid(-1)
++ q_update_tid(-1),
++ m_daemon(DT_SCHEDD, schedd_address)
+ {
++
+ if( ! is_valid_sinful(schedd_address) ) {
+ EXCEPT( "schedd_addr not specified with valid address (%s)",
+ schedd_address );
+@@ -252,7 +254,7 @@ QmgrJobUpdater::updateAttr( const char *name, const char *expr, bool updateMaste
+ if (log) {
+ flags = SHOULDLOG;
+ }
+- if( ConnectQ(schedd_addr,SHADOW_QMGMT_TIMEOUT,false,NULL,m_owner.Value(),schedd_ver) ) {
++ if( ConnectQ(m_daemon,SHADOW_QMGMT_TIMEOUT,false,NULL,m_owner.Value(),schedd_ver) ) {
+ if( SetAttribute(cluster,p,name,expr,flags) < 0 ) {
+ err_msg = "SetAttribute() failed";
+ result = FALSE;
+@@ -338,7 +340,7 @@ QmgrJobUpdater::updateJob( update_t type, SetAttributeFlags_t commit_flags )
+ job_queue_attrs->contains_anycase(name)) ) {
+
+ if( ! is_connected ) {
+- if( ! ConnectQ(schedd_addr, SHADOW_QMGMT_TIMEOUT, false, NULL, m_owner.Value(),schedd_ver) ) {
++ if( ! ConnectQ(m_daemon, SHADOW_QMGMT_TIMEOUT, false, NULL, m_owner.Value(),schedd_ver) ) {
+ return false;
+ }
+ is_connected = true;
+@@ -351,7 +353,7 @@ QmgrJobUpdater::updateJob( update_t type, SetAttributeFlags_t commit_flags )
+ m_pull_attrs->rewind();
+ while ( (name = m_pull_attrs->next()) ) {
+ if ( !is_connected ) {
+- if ( !ConnectQ( schedd_addr, SHADOW_QMGMT_TIMEOUT, true, NULL, NULL, schedd_ver ) ) {
++ if ( !ConnectQ( m_daemon, SHADOW_QMGMT_TIMEOUT, true, NULL, NULL, schedd_ver ) ) {
+ return false;
+ }
+ is_connected = true;
+@@ -392,7 +394,7 @@ QmgrJobUpdater::retrieveJobUpdates( void )
+ ProcIdToStr(cluster, proc, id_str);
+ job_ids.insert(id_str);
+
+- if ( !ConnectQ( schedd_addr, SHADOW_QMGMT_TIMEOUT, false ) ) {
++ if ( !ConnectQ( m_daemon, SHADOW_QMGMT_TIMEOUT, false ) ) {
+ return false;
+ }
+ if ( GetDirtyAttributes( cluster, proc, &updates ) < 0 ) {
+diff --git a/src/condor_schedd.V6/qmgr_job_updater.h b/src/condor_schedd.V6/qmgr_job_updater.h
+index e487688..ad86fe2 100644
+--- a/src/condor_schedd.V6/qmgr_job_updater.h
++++ b/src/condor_schedd.V6/qmgr_job_updater.h
+@@ -47,7 +47,7 @@ class QmgrJobUpdater : public Service
+ {
+ public:
+ QmgrJobUpdater( ClassAd* job_a, const char*schedd_address, char const *schedd_version);
+- QmgrJobUpdater( ) : common_job_queue_attrs(0), hold_job_queue_attrs(0), evict_job_queue_attrs(0), remove_job_queue_attrs(0), requeue_job_queue_attrs(0), terminate_job_queue_attrs(0), checkpoint_job_queue_attrs(0), x509_job_queue_attrs(0), m_pull_attrs(0), job_ad(0), schedd_addr(0), schedd_ver(0), cluster(-1), proc(-1), q_update_tid(-1) {}
++ QmgrJobUpdater( ) : common_job_queue_attrs(0), hold_job_queue_attrs(0), evict_job_queue_attrs(0), remove_job_queue_attrs(0), requeue_job_queue_attrs(0), terminate_job_queue_attrs(0), checkpoint_job_queue_attrs(0), x509_job_queue_attrs(0), m_pull_attrs(0), job_ad(0), schedd_addr(0), schedd_ver(0), cluster(-1), proc(-1), q_update_tid(-1), m_daemon(DT_SCHEDD, NULL) {}
+ virtual ~QmgrJobUpdater();
+
+ virtual void startUpdateTimer( void );
+@@ -148,6 +148,8 @@ private:
+ int proc;
+
+ int q_update_tid;
++
++ Daemon m_daemon;
+ };
+
+ // usefull if you don't want to update the job queue
+diff --git a/src/condor_schedd.V6/qmgr_lib_support.cpp b/src/condor_schedd.V6/qmgr_lib_support.cpp
+index 64bfffd..4afd1a6 100644
+--- a/src/condor_schedd.V6/qmgr_lib_support.cpp
++++ b/src/condor_schedd.V6/qmgr_lib_support.cpp
+@@ -34,8 +34,9 @@ ReliSock *qmgmt_sock = NULL;
+ static Qmgr_connection connection;
+
+ Qmgr_connection *
+-ConnectQ(const char *qmgr_location, int timeout, bool read_only, CondorError* errstack, const char *effective_owner, const char* schedd_version_str )
++ConnectQ(Daemon &d, int timeout, bool read_only, CondorError* errstack, const char *effective_owner, const char* schedd_version_str )
+ {
++
+ int rval, ok;
+ int cmd = read_only ? QMGMT_READ_CMD : QMGMT_WRITE_CMD;
+
+@@ -54,15 +55,10 @@ ConnectQ(const char *qmgr_location, int timeout, bool read_only, CondorError* er
+ }
+
+ // no connection active as of now; create a new one
+- Daemon d( DT_SCHEDD, qmgr_location );
++ dprintf(D_HOSTNAME, "Hostname of new connection: %s\n", d.fullHostname());
+ if( ! d.locate() ) {
+ ok = FALSE;
+- if( qmgr_location ) {
+- dprintf( D_ALWAYS, "Can't find address of queue manager %s\n",
+- qmgr_location );
+- } else {
+- dprintf( D_ALWAYS, "Can't find address of local queue manager\n" );
+- }
++ dprintf( D_ALWAYS, "Can't find address of queue manager\n" );
+ } else {
+ // QMGMT_WRITE_CMD didn't exist before 7.5.0, so use QMGMT_READ_CMD
+ // when talking to older schedds
+@@ -104,7 +100,7 @@ ConnectQ(const char *qmgr_location, int timeout, bool read_only, CondorError* er
+ // the connection, because this command is registered with
+ // force_authentication=true on the server side.
+ if( cmd == QMGMT_WRITE_CMD && !qmgmt_sock->triedAuthentication()) {
+- if( !SecMan::authenticate_sock(qmgmt_sock, CLIENT_PERM, errstack_select ) )
++ if( !SecMan::authenticate_sock(qmgmt_sock, CLIENT_PERM, errstack_select, d.fullHostname()) )
+ {
+ delete qmgmt_sock;
+ qmgmt_sock = NULL;
+@@ -155,7 +151,7 @@ ConnectQ(const char *qmgr_location, int timeout, bool read_only, CondorError* er
+ }
+
+ if ( !read_only ) {
+- if (!SecMan::authenticate_sock(qmgmt_sock, CLIENT_PERM, errstack_select)) {
++ if (!SecMan::authenticate_sock(qmgmt_sock, CLIENT_PERM, errstack_select, d.fullHostname())) {
+ delete qmgmt_sock;
+ qmgmt_sock = NULL;
+ if (!errstack) {
+diff --git a/src/condor_schedd.V6/schedd.cpp b/src/condor_schedd.V6/schedd.cpp
+index b855407..6731e6c 100644
+--- a/src/condor_schedd.V6/schedd.cpp
++++ b/src/condor_schedd.V6/schedd.cpp
+@@ -3434,7 +3434,7 @@ Scheduler::spoolJobFiles(int mode, Stream* s)
+
+ if( ! rsock->triedAuthentication() ) {
+ CondorError errstack;
+- if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack) ) {
++ if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack, NULL) ) {
+ // we failed to authenticate, we should bail out now
+ // since we don't know what user is trying to perform
+ // this action.
+@@ -3690,7 +3690,7 @@ Scheduler::updateGSICred(int cmd, Stream* s)
+
+ if( ! rsock->triedAuthentication() ) {
+ CondorError errstack;
+- if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack) ) {
++ if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack, NULL) ) {
+ // we failed to authenticate, we should bail out now
+ // since we don't know what user is trying to perform
+ // this action.
+@@ -3901,7 +3901,7 @@ Scheduler::actOnJobs(int, Stream* s)
+ rsock->decode();
+ if( ! rsock->triedAuthentication() ) {
+ CondorError errstack;
+- if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack) ) {
++ if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack, NULL) ) {
+ // we failed to authenticate, we should bail out now
+ // since we don't know what user is trying to perform
+ // this action.
+@@ -12104,7 +12104,7 @@ Scheduler::get_job_connect_info_handler_implementation(int, Stream* s) {
+ // force authentication
+ if( !sock->triedAuthentication() ) {
+ CondorError errstack;
+- if( ! SecMan::authenticate_sock(sock, WRITE, &errstack) ||
++ if( ! SecMan::authenticate_sock(sock, WRITE, &errstack, NULL) ||
+ ! sock->getFullyQualifiedUser() )
+ {
+ dprintf( D_ALWAYS,
+@@ -13145,7 +13145,7 @@ Scheduler::claimLocalStartd()
+ CondorQuery query(STARTD_AD);
+ QueryResult q;
+ ClassAdList result;
+- q = query.fetchAds(result, startd_addr, &errstack);
++ q = query.fetchAds(result, startd, &errstack);
+ if ( q != Q_OK ) {
+ dprintf(D_FULLDEBUG,
+ "ERROR: could not fetch ads from local startd : %s (%s)\n",
+@@ -13654,7 +13654,7 @@ Scheduler::RecycleShadow(int /*cmd*/, Stream *stream)
+ sock->decode();
+ if( !sock->triedAuthentication() ) {
+ CondorError errstack;
+- if( ! SecMan::authenticate_sock(sock, WRITE, &errstack) ||
++ if( ! SecMan::authenticate_sock(sock, WRITE, &errstack, NULL) ||
+ ! sock->getFullyQualifiedUser() )
+ {
+ dprintf( D_ALWAYS,
+@@ -13864,7 +13864,7 @@ Scheduler::clear_dirty_job_attrs_handler(int /*cmd*/, Stream *stream)
+ sock->decode();
+ if( !sock->triedAuthentication() ) {
+ CondorError errstack;
+- if( ! SecMan::authenticate_sock(sock, WRITE, &errstack) ||
++ if( ! SecMan::authenticate_sock(sock, WRITE, &errstack, NULL) ||
+ ! sock->getFullyQualifiedUser() )
+ {
+ dprintf( D_ALWAYS,
+diff --git a/src/condor_schedd.V6/schedd_td.cpp b/src/condor_schedd.V6/schedd_td.cpp
+index 3e30f27..a9b0e4b 100644
+--- a/src/condor_schedd.V6/schedd_td.cpp
++++ b/src/condor_schedd.V6/schedd_td.cpp
+@@ -76,7 +76,7 @@ Scheduler::requestSandboxLocation(int mode, Stream* s)
+
+ if( ! rsock->triedAuthentication() ) {
+ CondorError errstack;
+- if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack) ) {
++ if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack, NULL) ) {
+ // we failed to authenticate, we should bail out now
+ // since we don't know what user is trying to perform
+ // this action.
+diff --git a/src/condor_schedd.V6/tdman.cpp b/src/condor_schedd.V6/tdman.cpp
+index 53c6000..eb1e205 100644
+--- a/src/condor_schedd.V6/tdman.cpp
++++ b/src/condor_schedd.V6/tdman.cpp
+@@ -869,7 +869,7 @@ TDMan::transferd_registration(int cmd, Stream *sock)
+ ///////////////////////////////////////////////////////////////
+ if( ! rsock->triedAuthentication() ) {
+ CondorError errstack;
+- if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack) ) {
++ if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack, NULL) ) {
+ // we failed to authenticate, we should bail out now
+ // since we don't know what user is trying to perform
+ // this action.
+diff --git a/src/condor_status.V6/status.cpp b/src/condor_status.V6/status.cpp
+index dea2c92..187e843 100644
+--- a/src/condor_status.V6/status.cpp
++++ b/src/condor_status.V6/status.cpp
+@@ -442,14 +442,19 @@ main (int argc, char *argv[])
+ // subsystem that corresponds to a daemon (above).
+ // Here 'addr' represents either the host:port of requested pool, or
+ // alternatively the host:port of daemon associated with requested subsystem (direct mode)
+- q = query->fetchAds (result, addr, &errstack);
++
++ // If we are querying the same collector as before, reuse that object.
++ // This is important for hostname resolution.
++ if (!direct)
++ q = query->fetchAds (result, *pool, &errstack);
++ else
++ q = query->fetchAds (result, addr, &errstack);
+ } else {
+ // otherwise obtain list of collectors and submit query that way
+ CollectorList * collectors = CollectorList::create();
+ q = collectors->query (*query, result, &errstack);
+ delete collectors;
+ }
+-
+
+ // if any error was encountered during the query, report it and exit
+ if (Q_OK != q) {
+diff --git a/src/condor_submit.V6/submit.cpp b/src/condor_submit.V6/submit.cpp
+index 76bbabf..f5e118b 100644
+--- a/src/condor_submit.V6/submit.cpp
++++ b/src/condor_submit.V6/submit.cpp
+@@ -6237,7 +6237,8 @@ connect_to_the_schedd()
+ setupAuthentication();
+
+ CondorError errstack;
+- if( ConnectQ(MySchedd->addr(), 0 /* default */, false /* default */, &errstack, NULL, MySchedd->version() ) == 0 ) {
++ ASSERT(MySchedd);
++ if( ConnectQ(*MySchedd, 0 /* default */, false /* default */, &errstack, NULL, MySchedd->version() ) == 0 ) {
+ if( ScheddName ) {
+ fprintf( stderr,
+ "\nERROR: Failed to connect to queue manager %s\n%s\n",
+@@ -7202,7 +7203,8 @@ DoCleanup(int,int,const char*)
+ // DoCleanup(). This lead to infinite recursion which is bad.
+ ClusterCreated = 0;
+ if (!ActiveQueueConnection) {
+- ActiveQueueConnection = (ConnectQ(MySchedd->addr()) != 0);
++ ASSERT( MySchedd );
++ ActiveQueueConnection = (ConnectQ(*MySchedd) != 0);
+ }
+ if (ActiveQueueConnection) {
+ // Call DestroyCluster() now in an attempt to get the schedd
+diff --git a/src/condor_tools/preen.cpp b/src/condor_tools/preen.cpp
+index 57fcd04..e2f9774 100644
+--- a/src/condor_tools/preen.cpp
++++ b/src/condor_tools/preen.cpp
+@@ -356,7 +356,8 @@ check_spool_dir()
+ well_known_list.append( ".pgpass" );
+
+ // connect to the Q manager
+- if (!(qmgr = ConnectQ (0))) {
++ Daemon d(DT_SCHEDD, 0);
++ if (!(qmgr = ConnectQ (d))) {
+ dprintf( D_ALWAYS, "Not cleaning spool directory: Can't contact schedd\n" );
+ return;
+ }
+diff --git a/src/condor_tools/qedit.cpp b/src/condor_tools/qedit.cpp
+index e36d844..61d1b63 100644
+--- a/src/condor_tools/qedit.cpp
++++ b/src/condor_tools/qedit.cpp
+@@ -131,7 +131,7 @@ main(int argc, char *argv[])
+ }
+
+ // Open job queue
+- q = ConnectQ( schedd.addr(), 0, false, NULL, NULL, schedd.version() );
++ q = ConnectQ( schedd, 0, false, NULL, NULL, schedd.version() );
+ if( !q ) {
+ fprintf( stderr, "Failed to connect to queue manager %s\n",
+ schedd.addr() );
+diff --git a/src/condor_tools/tool.cpp b/src/condor_tools/tool.cpp
+index 5e63dc7..8b3ba37 100644
+--- a/src/condor_tools/tool.cpp
++++ b/src/condor_tools/tool.cpp
+@@ -1167,8 +1167,8 @@ resolveNames( DaemonList* daemon_list, StringList* name_list )
+ }
+
+
+- if (pool_addr) {
+- q_result = query.fetchAds(ads, pool_addr, &errstack);
++ if (pool) {
++ q_result = query.fetchAds(ads, *pool, &errstack);
+ } else {
+ CollectorList * collectors = CollectorList::create();
+ q_result = collectors->query (query, ads);
+diff --git a/src/condor_transferd/td_init.cpp b/src/condor_transferd/td_init.cpp
+index 1fccebd..f2330e1 100644
+--- a/src/condor_transferd/td_init.cpp
++++ b/src/condor_transferd/td_init.cpp
+@@ -277,7 +277,7 @@ TransferD::setup_transfer_request_handler(int /*cmd*/, Stream *sock)
+ ///////////////////////////////////////////////////////////////
+ if( ! rsock->triedAuthentication() ) {
+ CondorError errstack;
+- if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack) ) {
++ if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack, NULL) ) {
+ // we failed to authenticate, we should bail out now
+ // since we don't know what user is trying to perform
+ // this action.
+diff --git a/src/condor_transferd/td_read_files.cpp b/src/condor_transferd/td_read_files.cpp
+index a6c7f87..4febc43 100644
+--- a/src/condor_transferd/td_read_files.cpp
++++ b/src/condor_transferd/td_read_files.cpp
+@@ -67,7 +67,7 @@ TransferD::read_files_handler(int cmd, Stream *sock)
+ /////////////////////////////////////////////////////////////////////////
+ if( ! rsock->triedAuthentication() ) {
+ CondorError errstack;
+- if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack) ) {
++ if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack, NULL) ) {
+ // we failed to authenticate, we should bail out now
+ // since we don't know what user is trying to perform
+ // this action.
+diff --git a/src/condor_transferd/td_write_files.cpp b/src/condor_transferd/td_write_files.cpp
+index 412a552..572cc79 100644
+--- a/src/condor_transferd/td_write_files.cpp
++++ b/src/condor_transferd/td_write_files.cpp
+@@ -67,7 +67,7 @@ TransferD::write_files_handler(int cmd, Stream *sock)
+ /////////////////////////////////////////////////////////////////////////
+ if( ! rsock->triedAuthentication() ) {
+ CondorError errstack;
+- if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack) ) {
++ if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack, NULL) ) {
+ // we failed to authenticate, we should bail out now
+ // since we don't know what user is trying to perform
+ // this action.
+diff --git a/src/condor_utils/classad_command_util.cpp b/src/condor_utils/classad_command_util.cpp
+index 56d7ddb..1ae11a8 100644
+--- a/src/condor_utils/classad_command_util.cpp
++++ b/src/condor_utils/classad_command_util.cpp
+@@ -92,7 +92,7 @@ getCmdFromReliSock( ReliSock* s, ClassAd* ad, bool force_auth )
+ s->decode();
+ if( force_auth && ! s->triedAuthentication() ) {
+ CondorError errstack;
+- if( ! SecMan::authenticate_sock(s, WRITE, &errstack) ) {
++ if( ! SecMan::authenticate_sock(s, WRITE, &errstack, NULL) ) {
+ // we failed to authenticate, we should bail out now
+ // since we don't know what user is trying to perform
+ // this action.
+diff --git a/src/condor_utils/condor_q.cpp b/src/condor_utils/condor_q.cpp
+index 886b664..c540bbd 100644
+--- a/src/condor_utils/condor_q.cpp
++++ b/src/condor_utils/condor_q.cpp
+@@ -28,6 +28,7 @@
+ #include "CondorError.h"
+ #include "condor_classad.h"
+ #include "quill_enums.h"
++#include "daemon.h"
+
+ #ifdef HAVE_EXT_POSTGRESQL
+ #include "pgsqldatabase.h"
+@@ -230,7 +231,8 @@ fetchQueue (ClassAdList &list, StringList &attrs, ClassAd *ad, CondorError* errs
+ if (ad == 0)
+ {
+ // local case
+- if( !(qmgr = ConnectQ( 0, connect_timeout, true, errstack)) ) {
++ Daemon d(DT_SCHEDD, 0, 0);
++ if( !(qmgr = ConnectQ( d, connect_timeout, true, errstack)) ) {
+ errstack->push("TEST", 0, "FOO");
+ return Q_SCHEDD_COMMUNICATION_ERROR;
+ }
+@@ -241,8 +243,9 @@ fetchQueue (ClassAdList &list, StringList &attrs, ClassAd *ad, CondorError* errs
+ // remote case to handle condor_globalq
+ if (!ad->LookupString (ATTR_SCHEDD_IP_ADDR, scheddString))
+ return Q_NO_SCHEDD_IP_ADDR;
++ Daemon d(ad, DT_SCHEDD, NULL);
+
+- if( !(qmgr = ConnectQ( scheddString, connect_timeout, true, errstack)) )
++ if( !(qmgr = ConnectQ( d, connect_timeout, true, errstack)) )
+ return Q_SCHEDD_COMMUNICATION_ERROR;
+
+ }
+@@ -255,7 +258,7 @@ fetchQueue (ClassAdList &list, StringList &attrs, ClassAd *ad, CondorError* errs
+ }
+
+ int CondorQ::
+-fetchQueueFromHost (ClassAdList &list, StringList &attrs, const char *host, char const *schedd_version, CondorError* errstack)
++fetchQueueFromDaemon (ClassAdList &list, StringList &attrs, Daemon &daemon, char const *schedd_version, CondorError* errstack)
+ {
+ Qmgr_connection *qmgr;
+ ExprTree *tree;
+@@ -276,7 +279,7 @@ fetchQueueFromHost (ClassAdList &list, StringList &attrs, const char *host, char
+ optimal. :^).
+ */
+ init(); // needed to get default connect_timeout
+- if( !(qmgr = ConnectQ( host, connect_timeout, true, errstack)) )
++ if( !(qmgr = ConnectQ( daemon, connect_timeout, true, errstack)) )
+ return Q_SCHEDD_COMMUNICATION_ERROR;
+
+ bool useFastPath = false;
+@@ -353,7 +356,7 @@ CondorQ::fetchQueueFromDB (ClassAdList &list,
+ }
+
+ int
+-CondorQ::fetchQueueFromHostAndProcess ( const char *host,
++CondorQ::fetchQueueFromDaemonAndProcess ( Daemon &daemon,
+ StringList &attrs,
+ process_function process_func,
+ bool useFastPath,
+@@ -378,7 +381,7 @@ CondorQ::fetchQueueFromHostAndProcess ( const char *host,
+ optimal. :^).
+ */
+ init(); // needed to get default connect_timeout
+- if( !(qmgr = ConnectQ( host, connect_timeout, true, errstack)) )
++ if( !(qmgr = ConnectQ( daemon, connect_timeout, true, errstack)) )
+ return Q_SCHEDD_COMMUNICATION_ERROR;
+
+ // get the ads and filter them
+diff --git a/src/condor_utils/condor_q.h b/src/condor_utils/condor_q.h
+index 7f6a620..ccd9196 100644
+--- a/src/condor_utils/condor_q.h
++++ b/src/condor_utils/condor_q.h
+@@ -23,6 +23,7 @@
+ #include "condor_common.h"
+ #include "generic_query.h"
+ #include "CondorError.h"
++#include "daemon.h"
+
+ #define MAXOWNERLEN 20
+ #define MAXSCHEDDLEN 255
+@@ -90,8 +91,8 @@ class CondorQ
+ // which pass the criterion specified by the constraints; default is
+ // from the local schedd
+ int fetchQueue (ClassAdList &, StringList &attrs, ClassAd * = 0, CondorError* errstack = 0);
+- int fetchQueueFromHost (ClassAdList &, StringList &attrs, const char * = 0, char const *schedd_version = 0,CondorError* errstack = 0);
+- int fetchQueueFromHostAndProcess ( const char *, StringList &attrs, process_function process_func, bool useFastPath, CondorError* errstack = 0);
++ int fetchQueueFromDaemon (ClassAdList &, StringList &attrs, Daemon &, char const *schedd_version = 0,CondorError* errstack = 0);
++ int fetchQueueFromDaemonAndProcess ( Daemon &, StringList &attrs, process_function process_func, bool useFastPath, CondorError* errstack = 0);
+
+ // fetch the job ads from database
+ int fetchQueueFromDB (ClassAdList &, char *&lastUpdate, const char * = 0, CondorError* errstack = 0);
+diff --git a/src/condor_utils/condor_query.cpp b/src/condor_utils/condor_query.cpp
+index 95bc78a..acc6201 100644
+--- a/src/condor_utils/condor_query.cpp
++++ b/src/condor_utils/condor_query.cpp
+@@ -386,10 +386,6 @@ addORConstraint (const char *value)
+ QueryResult CondorQuery::
+ fetchAds (ClassAdList &adList, const char *poolName, CondorError* errstack)
+ {
+- Sock* sock;
+- int more;
+- QueryResult result;
+- ClassAd queryAd(extraAttrs), *ad;
+
+ if ( !poolName ) {
+ return Q_NO_COLLECTOR_HOST;
+@@ -402,7 +398,16 @@ fetchAds (ClassAdList &adList, const char *poolName, CondorError* errstack)
+ return Q_NO_COLLECTOR_HOST;
+ }
+
++ return fetchAds(adList, my_collector, errstack);
++}
+
++QueryResult CondorQuery::
++fetchAds (ClassAdList &adList, Daemon &my_collector, CondorError* errstack)
++{
++ Sock* sock;
++ int more;
++ QueryResult result;
++ ClassAd queryAd(extraAttrs), *ad;
+ // make the query ad
+ result = getQueryAd (queryAd);
+ if (result != Q_OK) return result;
+diff --git a/src/condor_utils/condor_query.h b/src/condor_utils/condor_query.h
+index 7e58eef..9fedcad 100644
+--- a/src/condor_utils/condor_query.h
++++ b/src/condor_utils/condor_query.h
+@@ -156,6 +156,7 @@ class CondorQuery
+
+ // fetch from collector
+ QueryResult fetchAds (ClassAdList &adList, const char * pool, CondorError* errstack = NULL);
++ QueryResult fetchAds (ClassAdList &adList, Daemon &daemon, CondorError* errstack = NULL);
+
+
+ // filter list of ads; arg1 is 'in', arg2 is 'out'
+diff --git a/src/condor_utils/ipv6_hostname.cpp b/src/condor_utils/ipv6_hostname.cpp
+index cfefb4b..3666bd4 100644
+--- a/src/condor_utils/ipv6_hostname.cpp
++++ b/src/condor_utils/ipv6_hostname.cpp
+@@ -197,10 +197,13 @@ int get_fqdn_and_ip_from_hostname(const MyString& hostname,
+ MyString ret;
+ condor_sockaddr ret_addr;
+ bool found_ip = false;
++ bool use_given_name = false;
+
+ // if the hostname contains dot, hostname is assumed to be full hostname
+ if (hostname.FindChar('.') != -1) {
+ ret = hostname;
++ fqdn = hostname;
++ use_given_name = true;
+ }
+
+ if (nodns_enabled()) {
+@@ -219,7 +222,9 @@ int get_fqdn_and_ip_from_hostname(const MyString& hostname,
+
+ while (addrinfo* info = ai.next()) {
+ if (info->ai_canonname) {
+- fqdn = info->ai_canonname;
++ dprintf(D_HOSTNAME, "Found canon addr: %s\n", info->ai_canonname);
++ if (!use_given_name)
++ fqdn = info->ai_canonname;
+ addr = condor_sockaddr(info->ai_addr);
+ return 1;
+ }
+diff --git a/src/condor_who/who.cpp b/src/condor_who/who.cpp
+index 08f19b6..870668a 100644
+--- a/src/condor_who/who.cpp
++++ b/src/condor_who/who.cpp
+@@ -681,7 +681,7 @@ main( int argc, char *argv[] )
+ ClassAdList result;
+ if (addr || App.diagnostic) {
+ CondorError errstack;
+- QueryResult qr = query->fetchAds (result, addr, &errstack);
++ QueryResult qr = dae->locate() ? query->fetchAds (result, *dae, &errstack) : query->fetchAds (result, addr, &errstack);
+ if (Q_OK != qr) {
+ fprintf( stderr, "Error: %s\n", getStrQueryResult(qr) );
+ fprintf( stderr, "%s\n", errstack.getFullText(true) );
diff --git a/condor-gahp.patch b/condor-gahp.patch
new file mode 100644
index 0000000..596ff2d
--- /dev/null
+++ b/condor-gahp.patch
@@ -0,0 +1,34 @@
+diff --git a/src/condor_gridmanager/gahp-client.cpp b/src/condor_gridmanager/gahp-client.cpp
+index 48c6b15..b0a44d8 100644
+--- a/src/condor_gridmanager/gahp-client.cpp
++++ b/src/condor_gridmanager/gahp-client.cpp
+@@ -703,6 +703,16 @@ GahpServer::Startup()
+ free( tmp_char );
+ }
+
++ // GLOBUS_LOCATION needs to be set for the blahp; otherwise, it defaults to /opt/globus,
++ // which is likely never correct
++ tmp_char = param("GLOBUS_LOCATION");
++ if ( tmp_char ) {
++ newenv.SetEnv( "GLOBUS_LOCATION", tmp_char );
++ free( tmp_char );
++ } else if (getenv("GLOBUS_LOCATION") == NULL) {
++ newenv.SetEnv( "GLOBUS_LOCATION", "/usr" );
++ }
++
+ // For amazon ec2 ca authentication
+ tmp_char = param("SOAP_SSL_CA_FILE");
+ if( tmp_char ) {
+diff --git a/src/condor_gridmanager/infnbatchjob.cpp b/src/condor_gridmanager/infnbatchjob.cpp
+index b76bc4f..f6751a1 100644
+--- a/src/condor_gridmanager/infnbatchjob.cpp
++++ b/src/condor_gridmanager/infnbatchjob.cpp
+@@ -224,7 +224,7 @@ INFNBatchJob::INFNBatchJob( ClassAd *classad )
+ free( gahp_path );
+
+ myResource = INFNBatchResource::FindOrCreateResource( batchType,
+- gahp_args.GetArg(0) );
++ (gahp_args.Count() > 0) ? gahp_args.GetArg(0) : "localhost" );
+ myResource->RegisterJob( this );
+ if ( remoteJobId ) {
+ myResource->AlreadySubmitted( this );
diff --git a/condor.spec b/condor.spec
index 9a8e8cd..0ef0fe5 100644
--- a/condor.spec
+++ b/condor.spec
@@ -1,4 +1,6 @@
-%define tarball_version 7.7.5
+%define tarball_version 7.9.1
+
+%define _default_patch_fuzz 2
# Things for F15 or later
%if 0%{?fedora} >= 15
@@ -10,15 +12,19 @@
%define qmf 1
%else
%define deltacloud 0
-%define aviary 0
+%define aviary 1
%define plumage 0
%define systemd 0
%define cgroups 0
%define qmf 1
%endif
+%if 0%{?rhel} >= 6
+%define cgroups 1
+%endif
+
# Things not turned on, or don't have Fedora packages yet
-%define blahp 0
+%define blahp 1
%define glexec 1
# These flags are meant for developers; it allows one to build Condor
@@ -26,14 +32,14 @@
%define git_build 1
# If building with git tarball, Fedora requests us to record the rev. Use:
# git log -1 --pretty=format:'%h'
-%define git_rev c42c744
+%define git_rev ceb6a0a
Summary: Condor: High Throughput Computing
Name: condor
-Version: 7.7.5
-%define condor_base_release 0.3
+Version: 7.9.1
+%define condor_base_release 0.2
%if %git_build
- %define condor_release %condor_base_release.%{git_rev}git
+ %define condor_release %condor_base_release.%{git_rev}.git
%else
%define condor_release %condor_base_release
%endif
@@ -92,7 +98,15 @@ Source3: condor.service
Patch0: condor_config.generic.patch
Patch1: chkconfig_off.patch
-Patch8: hcc_config.patch
+Patch2: hcc_config.patch
+Patch3: wso2-axis2.patch
+Patch4: condor_pid_namespaces_v7.patch
+Patch5: condor_partial_defrag_v2.patch
+Patch6: cgroups_noswap.patch
+Patch7: cgroup_reset_stats.patch
+Patch8: condor-gahp.patch
+Patch9: cgahp_scaling.patch
+Patch10: condor-1605-v2.patch
BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
@@ -161,6 +175,7 @@ Requires: libcgroup >= 0.37
%endif
%if %blahp
+BuildRequires: blahp >= 1.16.1
Requires: blahp >= 1.16.1
%endif
%if %glexec
@@ -176,9 +191,6 @@ Requires: python >= 2.2
Requires: condor-classads = %{version}-%{release}
Requires: condor-procd = %{version}-%{release}
-%if %blahp
-Requires: blahp >= 1.16.1
-%endif
# libcgroup < 0.37 has a bug that invalidates our accounting.
Requires: libcgroup >= 0.37
@@ -355,9 +367,17 @@ exit 0
%setup -q -n %{name}-%{tarball_version}
%endif
-#%patch0 -p1
-
-#%patch8 -p1
+%patch0 -p1
+%patch1 -p1
+%patch2 -p1
+%patch3 -p0
+%patch4 -p1
+%patch5 -p1
+%patch6 -p1
+%patch7 -p1
+%patch8 -p1
+%patch9 -p1
+%patch10 -p1
# fix errant execute permissions
find src -perm /a+x -type f -name "*.[Cch]" -exec chmod a-x {} \;
@@ -396,6 +416,12 @@ find src -perm /a+x -type f -name "*.[Cch]" -exec chmod a-x {} \;
-DWITH_MANAGEMENT:BOOL=FALSE \
%endif
-DWANT_FULL_DEPLOYMENT:BOOL=TRUE \
+%if %blahp
+ -DBLAHP_FOUND=/usr/libexec/BLClient \
+ -DWITH_BLAHP:BOOL=TRUE \
+%else
+ -DWITH_BLAHP:BOOL=FALSE \
+%endif
%if %glexec
-DWANT_GLEXEC:BOOL=TRUE \
%else
@@ -413,6 +439,7 @@ find src -perm /a+x -type f -name "*.[Cch]" -exec chmod a-x {} \;
%endif
make %{?_smp_mflags}
+#make
%install
@@ -436,7 +463,7 @@ populate %_sysconfdir/condor %{buildroot}/%{_usr}/lib/condor_ssh_to_job_sshd_con
populate %{_datadir}/condor %{buildroot}/%{_usr}/lib/*
# Except for the shared libs
populate %{_libdir}/ %{buildroot}/%{_datadir}/condor/libclassad.so*
-populate %{_libdir}/ %{buildroot}/%{_datadir}/condor/libcondor_utils.so
+populate %{_libdir}/ %{buildroot}/%{_datadir}/condor/libcondor_utils*.so
rm -f %{buildroot}/%{_datadir}/condor/libclassad.a
%if %aviary || %qmf
@@ -528,7 +555,6 @@ rm %{buildroot}/%{_mandir}/man1/condor_configure.1
# not packaging legacy cruft
#rm %{buildroot}/%{_mandir}/man1/condor_master_off.1
#rm %{buildroot}/%{_mandir}/man1/condor_reconfig_schedd.1
-rm %{buildroot}/%{_mandir}/man1/condor_convert_history.1
# not packaging quill bits
rm %{buildroot}/%{_mandir}/man1/condor_load_history.1
@@ -639,8 +665,6 @@ rm -rf %{buildroot}
# dep problem in 7.7.3
#%_datadir/condor/Condor.pm
%_datadir/condor/scimark2lib.jar
-%_datadir/condor/gt4-gahp.jar
-%_datadir/condor/gt42-gahp.jar
%dir %_sysconfdir/condor/config.d/
%_sysconfdir/condor/config.d/00personal_condor.config
%_sysconfdir/condor/condor_ssh_to_job_sshd_config_template
@@ -649,7 +673,6 @@ rm -rf %{buildroot}
%_libexecdir/condor/condor_ssh
%_libexecdir/condor/sshd.sh
%_libexecdir/condor/condor_job_router
-%_libexecdir/condor/gridftp_wrapper.sh
%if %glexec
%_libexecdir/condor/condor_glexec_setup
%_libexecdir/condor/condor_glexec_run
@@ -658,6 +681,9 @@ rm -rf %{buildroot}
%_libexecdir/condor/condor_glexec_cleanup
%_libexecdir/condor/condor_glexec_kill
%endif
+%if %blahp
+%_libexecdir/condor/glite/bin/*
+%endif
%_libexecdir/condor/condor_limits_wrapper.sh
%_libexecdir/condor/condor_rooster
%_libexecdir/condor/condor_ssh_to_job_shell_setup
@@ -713,8 +739,10 @@ rm -rf %{buildroot}
%_mandir/man1/condor_glidein.1.gz
%_mandir/man1/condor_continue.1.gz
%_mandir/man1/condor_suspend.1.gz
+%_mandir/man1/condor_gather_info.1.gz
+%_mandir/man1/condor_router_rm.1.gz
# bin/condor is a link for checkpoint, reschedule, vacate
-%_libdir/libcondor_utils.so
+%_libdir/libcondor_utils*.so
#%_bindir/condor
%_bindir/condor_submit_dag
%_bindir/condor_prio
@@ -753,6 +781,7 @@ rm -rf %{buildroot}
%_bindir/condor_suspend
%_bindir/condor_test_match
%_bindir/condor_glidein
+%_bindir/condor_who
# sbin/condor is a link for master_off, off, on, reconfig,
# reconfig_schedd, restart
#%_sbindir/condor
@@ -784,9 +813,9 @@ rm -rf %{buildroot}
%_sbindir/condor_gridshell
%_sbindir/gahp_server
%_sbindir/grid_monitor.sh
+%_sbindir/grid_monitor
%_sbindir/nordugrid_gahp
-%_sbindir/gt4_gahp
-%_sbindir/gt42_gahp
+%_sbindir/remote_gahp
%defattr(-,condor,condor,-)
%dir %_var/lib/condor/
%dir %_var/lib/condor/execute/
@@ -797,12 +826,14 @@ rm -rf %{buildroot}
%ghost %dir %_var/run/condor/
%else
%dir %_var/lock/condor
+%dir %_var/lock/condor/local
%dir %_var/run/condor
%endif
-# For Condor 7.7.5
%_bindir/condor_drain
%_libexecdir/condor/condor_defrag
+%_datadir/condor/libcondorapi.so
+%_libexecdir/condor/interactive.sub
%files procd
%_sbindir/condor_procd
@@ -838,6 +869,7 @@ rm -rf %{buildroot}
%_sysconfdir/condor/config.d/61aviary.config
%dir %_libdir/condor/plugins
%_libdir/condor/plugins/AviaryScheddPlugin-plugin.so
+%_libdir/condor/plugins/AviaryLocatorPlugin-plugin.so
%_sbindir/aviary_query_server
%dir %_datadir/condor/aviary
%_datadir/condor/aviary/jobcontrol.py*
@@ -845,6 +877,7 @@ rm -rf %{buildroot}
%_datadir/condor/aviary/submissions.py*
%_datadir/condor/aviary/submit.py*
%_datadir/condor/aviary/setattr.py*
+%_datadir/condor/aviary/jobinventory.py*
%dir %_datadir/condor/aviary/dag
%_datadir/condor/aviary/dag/diamond.dag
%_datadir/condor/aviary/dag/dag-submit.py*
@@ -853,6 +886,8 @@ rm -rf %{buildroot}
%_datadir/condor/aviary/module/aviary/util.py*
%_datadir/condor/aviary/module/aviary/https.py*
%_datadir/condor/aviary/module/aviary/__init__.py*
+%_datadir/condor/aviary/locator.py*
+%_datadir/condor/aviary/submission_ids.py*
%_datadir/condor/aviary/README
%defattr(-,condor,condor,-)
%dir %_var/lib/condor/aviary
@@ -870,6 +905,11 @@ rm -rf %{buildroot}
%_var/lib/condor/aviary/services/query/aviary-common.xsd
%_var/lib/condor/aviary/services/query/aviary-query.xsd
%_var/lib/condor/aviary/services/query/aviary-query.wsdl
+%_var/lib/condor/aviary/services/locator/aviary-common.xsd
+%_var/lib/condor/aviary/services/locator/aviary-locator.wsdl
+%_var/lib/condor/aviary/services/locator/aviary-locator.xsd
+%_var/lib/condor/aviary/services/locator/services.xml
+%_var/lib/condor/aviary/services/locator/libaviary_locator_axis.so
%endif
%if %plumage
@@ -898,6 +938,7 @@ rm -rf %{buildroot}
%doc LICENSE-2.0.txt NOTICE.txt
%_sbindir/condor_vm-gahp
#%_sbindir/condor_vm_vmware.pl
+%_sbindir/condor_vm_vmware
#%_sbindir/condor_vm_xen.sh
%_libexecdir/condor/libvirt_simple_script.awk
@@ -913,8 +954,7 @@ rm -rf %{buildroot}
%files classads
%defattr(-,root,root,-)
%doc LICENSE-2.0.txt NOTICE.txt
-%_libdir/libclassad.so.%{version}
-%_libdir/libclassad.so.2
+%_libdir/libclassad.so.*
#################
%files classads-devel
@@ -954,6 +994,7 @@ rm -rf %{buildroot}
%_includedir/classad/xmlLexer.h
%_includedir/classad/xmlSink.h
%_includedir/classad/xmlSource.h
+%_includedir/classad/classadCache.h
%if %systemd
%post
@@ -971,10 +1012,9 @@ fi
%postun
/bin/systemctl daemon-reload >/dev/null 2>&1 || :
-if [ $1 -ge 1 ] ; then
- # Package upgrade, not uninstall
- /bin/systemctl try-restart condor.service >/dev/null 2>&1 || :
-fi
+# Note we don't try to restart - Condor will automatically notice the
+# binary has changed and do graceful or peaceful restart, based on its
+# configuration
%triggerun -- condor < 7.7.0-0.5
@@ -1001,13 +1041,60 @@ fi
%postun -n condor
-if [ "$1" -ge "1" ]; then
- /sbin/service condor condrestart >/dev/null 2>&1 || :
-fi
+# Note we don't try to restart - Condor will automatically notice the
+# binary has changed and do graceful or peaceful restart, based on its
+# configuration
/sbin/ldconfig
%endif
%changelog
+* Fri Jul 13 2012 Brian Bockelman <bbockelm at cse.unl.edu> - 7.9.1-0.2.013069b.git
+- Hunt down segfault bug.
+
+* Fri Jul 13 2012 Brian Bockelman <bbockelm at cse.unl.edu> - 7.9.1-0.1.013069b.git
+- Update to latest master.
+
+* Tue Jun 19 2012 Brian Bockelman <bbockelm at cse.unl.edu> - 7.9.0-0.14.888a81cgit
+- Fix DNS-based hostname checks for GSI.
+- Add the user lock directory to the file listing.
+
+* Sun Jun 17 2012 Brian Bockelman <bbockelm at cse.unl.edu> - 7.9.0-0.13.888a81cgit
+- Patch for C-GAHP client scalability.
+
+* Fri Jun 15 2012 Brian Bockelman <bbockelm at cse.unl.edu> - 7.9.0-0.12.888a81cgit
+- Fix re-acquisition of routed jobs on JR restart.
+- Allow DNS-based hostname checks for GSI.
+- Allow the queue super-user to impersonate any other user.
+
+* Wed Jun 2 2012 Brian Bockelman <bbockelm at cse.unl.edu> - 7.9.0-0.11.888a81cgit
+- Fix proxy handling for Condor-C submissions.
+
+* Wed May 30 2012 Brian Bockelman <bbockelm at cse.unl.edu> - 7.9.0-0.10.888a81cgit
+- Fix blahp segfault and GLOBUS_LOCATION.
+- Allow a 2-schedd setup for JobRouter.
+
+* Mon May 28 2012 Brian Bockelman <bbockelm at cse.unl.edu> - 7.9.0-0.8.257bc70git
+- Re-enable blahp
+
+* Wed May 17 2012 Brian Bockelman <bbockelm at cse.unl.edu> - 7.9.0-0.7.257bc70git
+- Fix reseting of cgroup statistics.
+
+* Wed May 16 2012 Brian Bockelman <bbockelm at cse.unl.edu> - 7.9.0-0.6.257bc70git
+- Fix for procd when there is no swap accounting.
+- Allow condor_defrag to cancel draining when it is happy with things.
+
+* Mon May 11 2012 Brian Bockelman <bbockelm at cse.unl.edu> - 7.9.0-0.5.257bc70git
+- Fix for autofs support.
+
+* Mon Apr 09 2012 Brian Bockelman <bbockelm at cse.unl.edu> - 7.9.0-0.1.2693346git.1
+- Update to the 7.9.0 branch.
+
+* Fri Feb 10 2012 Brian Bockelman <bbockelm at cse.unl.edu> - 7.7.5-0.9.3513b55git
+- Fix fd leak for cgroups in the procd.
+
+* Fri Feb 10 2012 Brian Bockelman <bbockelm at cse.unl.edu> - 7.7.5-0.8.3513b55git
+- Enable cgroups for EL6.
+
* Tue Oct 25 2011 <tstclair at redhat.com> - 7.7.3-0.1
- Fast forward to 7.7.3 pre release
diff --git a/condor_config.generic.patch b/condor_config.generic.patch
index a866d41..7f141af 100644
--- a/condor_config.generic.patch
+++ b/condor_config.generic.patch
@@ -158,23 +158,6 @@
## Lock files
SHADOW_LOCK = $(LOCK)/ShadowLock
-@@ -1078,7 +1125,7 @@
- #DC_DAEMON_LIST = \
- #MASTER, STARTD, SCHEDD, KBDD, COLLECTOR, NEGOTIATOR, EVENTD, \
- #VIEW_SERVER, CONDOR_VIEW, VIEW_COLLECTOR, HAWKEYE, CREDD, HAD, \
--#DBMSD, QUILL, JOB_ROUTER, ROOSTER, LEASEMANAGER, HDFS, SHARED_PORT
-+#DBMSD, QUILL, JOB_ROUTER, ROOSTER, LEASEMANAGER, HDFS, SHARED_PORT, TRIGGERD
-
-
- ## Where are the binaries for these daemons?
-@@ -1094,6 +1141,7 @@
- HDFS = $(SBIN)/condor_hdfs
- SHARED_PORT = $(LIBEXEC)/condor_shared_port
- TRANSFERER = $(LIBEXEC)/condor_transferer
-+TRIGGERD = $(sbin)/condor_triggerd
-
- ## When the master starts up, it can place it's address (IP and port)
- ## into a file. This way, tools running on the local machine don't
@@ -1170,11 +1218,13 @@
##--------------------------------------------------------------------
## Address to which Condor will send a weekly e-mail with output of
@@ -195,7 +178,7 @@
## Determine if the Negotiator will honor SlotWeight attributes, which
## may be used to give a slot greater weight when calculating usage.
#NEGOTIATOR_USE_SLOT_WEIGHTS = True
-+NEGOTIATOR_USE_SLOT_WEIGHTS = False
++NEGOTIATOR_USE_SLOT_WEIGHTS = True
## How often the Negotaitor starts a negotiation cycle, defined in
diff --git a/condor_partial_defrag_v2.patch b/condor_partial_defrag_v2.patch
new file mode 100644
index 0000000..d2b0016
--- /dev/null
+++ b/condor_partial_defrag_v2.patch
@@ -0,0 +1,208 @@
+diff --git a/src/condor_daemon_client/dc_startd.cpp b/src/condor_daemon_client/dc_startd.cpp
+index 7261c4a..09a2689 100644
+--- a/src/condor_daemon_client/dc_startd.cpp
++++ b/src/condor_daemon_client/dc_startd.cpp
+@@ -51,7 +51,7 @@ DCStartd::DCStartd( const char* tName, const char* tPool, const char* tAddr,
+ }
+ }
+
+-DCStartd::DCStartd( ClassAd *ad, const char *tPool )
++DCStartd::DCStartd( const ClassAd *ad, const char *tPool )
+ : Daemon(ad,DT_STARTD,tPool),
+ claim_id(NULL)
+ {
+diff --git a/src/condor_daemon_client/dc_startd.h b/src/condor_daemon_client/dc_startd.h
+index c5f3e89..ff20892 100644
+--- a/src/condor_daemon_client/dc_startd.h
++++ b/src/condor_daemon_client/dc_startd.h
+@@ -49,7 +49,7 @@ public:
+ DCStartd( const char* const name, const char* const pool,
+ const char* const addr, const char* const id );
+
+- DCStartd( ClassAd *ad, const char *pool = NULL );
++ DCStartd( const ClassAd *ad, const char *pool = NULL );
+
+ /// Destructor.
+ ~DCStartd();
+diff --git a/src/defrag/defrag.cpp b/src/defrag/defrag.cpp
+index 26aec0a..8710b5d 100644
+--- a/src/defrag/defrag.cpp
++++ b/src/defrag/defrag.cpp
+@@ -185,6 +185,8 @@ void Defrag::config()
+ }
+ }
+
++ m_can_cancel = param_boolean("DEFRAG_CAN_CANCEL", true);
++
+ param(m_defrag_name,"DEFRAG_NAME");
+
+ int stats_quantum = m_polling_interval;
+@@ -487,8 +489,17 @@ void Defrag::poll()
+ int num_whole_machines = countMachines(m_whole_machine_expr.c_str(),"DEFRAG_WHOLE_MACHINE_EXPR",&whole_machines);
+ m_stats.WholeMachines = num_whole_machines;
+
++ MachineSet draining_whole_machines;
++ std::stringstream draining_whole_machines_ss;
++ draining_whole_machines_ss << m_whole_machine_expr << " && Draining && Offline=!=True";
++ int num_draining_whole_machines = countMachines(draining_whole_machines_ss.str().c_str(),
++ "<DEFRAG_WHOLE_MACHINE_EXPR Draining>", &draining_whole_machines);
++
+ dprintf(D_ALWAYS,"There are currently %d draining and %d whole machines.\n",
+ num_draining,num_whole_machines);
++ if (num_draining_whole_machines)
++ dprintf(D_ALWAYS, "Of the %d whole machines, %d are in the draining state.\n",
++ num_whole_machines, num_draining_whole_machines);
+
+ queryDrainingCost();
+
+@@ -548,8 +559,7 @@ void Defrag::poll()
+
+ ClassAdList startdAds;
+ std::string requirements;
+- sprintf(requirements,"(%s) && Draining =!= true",m_defrag_requirements.c_str());
+- if( !queryMachines(requirements.c_str(),"DEFRAG_REQUIREMENTS",startdAds) ) {
++ if( !queryMachines(m_defrag_requirements.c_str(),"DEFRAG_REQUIREMENTS",startdAds) ) {
+ dprintf(D_ALWAYS,"Doing nothing, because the query to select machines matching DEFRAG_REQUIREMENTS failed.\n");
+ return;
+ }
+@@ -561,12 +571,26 @@ void Defrag::poll()
+ int num_drained = 0;
+ ClassAd *startd_ad;
+ MachineSet machines_done;
++ MachineSet draining_machines_done;
+ while( (startd_ad=startdAds.Next()) ) {
+ std::string machine;
+ std::string name;
+ startd_ad->LookupString(ATTR_NAME,name);
+ slotNameToDaemonName(name,machine);
+
++ if( !draining_machines_done.count(machine) && draining_whole_machines.count(machine) ) {
++ cancel_drain(*startd_ad);
++ draining_machines_done.insert(machine);
++ continue;
++ }
++
++ // Do not consider slots which are already draining.
++ bool startd_currently_draining = false;
++ startd_ad->LookupBool("Draining", startd_currently_draining);
++ if( startd_currently_draining ) {
++ continue;
++ }
++
+ if( machines_done.count(machine) ) {
+ dprintf(D_FULLDEBUG,
+ "Skipping %s: already attempted to drain %s in this cycle.\n",
+@@ -581,14 +605,13 @@ void Defrag::poll()
+ continue;
+ }
+
+- if( drain(startd_ad) ) {
++ if( (num_drained++ < num_to_drain) && drain(*startd_ad) ) {
+ machines_done.insert(machine);
+
+- if( ++num_drained >= num_to_drain ) {
++ if( num_drained >= num_to_drain ) {
+ dprintf(D_ALWAYS,
+ "Drained maximum number of machines allowed in this cycle (%d).\n",
+ num_to_drain);
+- break;
+ }
+ }
+ }
+@@ -601,26 +624,24 @@ void Defrag::poll()
+ }
+
+ bool
+-Defrag::drain(ClassAd *startd_ad)
++Defrag::drain(const ClassAd &startd_ad)
+ {
+- ASSERT( startd_ad );
+-
+ std::string name;
+- startd_ad->LookupString(ATTR_NAME,name);
++ startd_ad.LookupString(ATTR_NAME,name);
+
+ dprintf(D_ALWAYS,"Initiating %s draining of %s.\n",
+ m_draining_schedule_str.c_str(),name.c_str());
+
+- DCStartd startd( startd_ad );
++ DCStartd startd( &startd_ad );
+
+ int graceful_completion = 0;
+- startd_ad->LookupInteger(ATTR_EXPECTED_MACHINE_GRACEFUL_DRAINING_COMPLETION,graceful_completion);
++ startd_ad.LookupInteger(ATTR_EXPECTED_MACHINE_GRACEFUL_DRAINING_COMPLETION,graceful_completion);
+ int quick_completion = 0;
+- startd_ad->LookupInteger(ATTR_EXPECTED_MACHINE_QUICK_DRAINING_COMPLETION,quick_completion);
++ startd_ad.LookupInteger(ATTR_EXPECTED_MACHINE_QUICK_DRAINING_COMPLETION,quick_completion);
+ int graceful_badput = 0;
+- startd_ad->LookupInteger(ATTR_EXPECTED_MACHINE_GRACEFUL_DRAINING_BADPUT,graceful_badput);
++ startd_ad.LookupInteger(ATTR_EXPECTED_MACHINE_GRACEFUL_DRAINING_BADPUT,graceful_badput);
+ int quick_badput = 0;
+- startd_ad->LookupInteger(ATTR_EXPECTED_MACHINE_QUICK_DRAINING_BADPUT,quick_badput);
++ startd_ad.LookupInteger(ATTR_EXPECTED_MACHINE_QUICK_DRAINING_BADPUT,quick_badput);
+
+ time_t now = time(NULL);
+ std::string draining_check_expr;
+@@ -659,6 +680,27 @@ Defrag::drain(ClassAd *startd_ad)
+ return true;
+ }
+
++bool
++Defrag::cancel_drain(const ClassAd &startd_ad)
++{
++
++ std::string name;
++ startd_ad.LookupString(ATTR_NAME,name);
++
++ dprintf(D_ALWAYS,"Initiating %s draining of %s.\n",
++ m_draining_schedule_str.c_str(),name.c_str());
++
++ DCStartd startd( &startd_ad );
++
++ bool rval = startd.cancelDrainJobs( NULL );
++ if ( rval ) {
++ dprintf(D_FULLDEBUG, "Sent request to cancel draining on %s\n", startd.name());
++ } else {
++ dprintf(D_ALWAYS, "Unable to cancel draining on %s: %s\n", startd.name(), startd.error());
++ }
++ return rval;
++}
++
+ void
+ Defrag::publish(ClassAd *ad)
+ {
+diff --git a/src/defrag/defrag.h b/src/defrag/defrag.h
+index 8c7fd51..909b569 100644
+--- a/src/defrag/defrag.h
++++ b/src/defrag/defrag.h
+@@ -40,11 +40,11 @@ class Defrag: public Service {
+ void stop();
+
+ void poll(); // do the periodic policy evaluation
+- bool drain(ClassAd *startd_ad);
+
+ typedef std::set< std::string > MachineSet;
+
+ private:
++
+ int m_polling_interval; // delay between evaluations of the policy
+ int m_polling_timer;
+ double m_draining_per_hour;
+@@ -58,6 +58,7 @@ class Defrag: public Service {
+ ClassAd m_rank_ad;
+ int m_draining_schedule;
+ std::string m_draining_schedule_str;
++ bool m_can_cancel; // Whether condor_defrag can also cancel draining early.
+
+ time_t m_last_poll;
+
+@@ -70,6 +71,9 @@ class Defrag: public Service {
+ ClassAd m_public_ad;
+ DefragStats m_stats;
+
++ bool drain(const ClassAd &startd_ad);
++ bool cancel_drain(const ClassAd &startd_ad);
++
+ void validateExpr(char const *constraint,char const *constraint_source);
+ bool queryMachines(char const *constraint,char const *constraint_source,ClassAdList &startdAds);
+
diff --git a/condor_pid_namespaces_v7.patch b/condor_pid_namespaces_v7.patch
new file mode 100644
index 0000000..810a4b4
--- /dev/null
+++ b/condor_pid_namespaces_v7.patch
@@ -0,0 +1,305 @@
+diff --git a/src/condor_daemon_core.V6/condor_daemon_core.h b/src/condor_daemon_core.V6/condor_daemon_core.h
+index 3562577..d9d1736 100644
+--- a/src/condor_daemon_core.V6/condor_daemon_core.h
++++ b/src/condor_daemon_core.V6/condor_daemon_core.h
+@@ -192,6 +192,7 @@ struct FamilyInfo {
+ gid_t* group_ptr;
+ #endif
+ const char* glexec_proxy;
++ bool want_pid_namespace;
+ const char* cgroup;
+
+ FamilyInfo() {
+@@ -201,6 +202,7 @@ struct FamilyInfo {
+ group_ptr = NULL;
+ #endif
+ glexec_proxy = NULL;
++ want_pid_namespace = false;
+ cgroup = NULL;
+ }
+ };
+diff --git a/src/condor_daemon_core.V6/daemon_core.cpp b/src/condor_daemon_core.V6/daemon_core.cpp
+index e058fd3..74fe8a0 100644
+--- a/src/condor_daemon_core.V6/daemon_core.cpp
++++ b/src/condor_daemon_core.V6/daemon_core.cpp
+@@ -34,6 +34,7 @@
+ #if HAVE_CLONE
+ #include <sched.h>
+ #include <sys/syscall.h>
++#include <sys/mount.h>
+ #endif
+
+ #if HAVE_RESOLV_H && HAVE_DECL_RES_INIT
+@@ -112,6 +113,10 @@ CRITICAL_SECTION Big_fat_mutex; // coarse grained mutex for debugging purposes
+ #include <sched.h>
+ #endif
+
++#if !defined(CLONE_NEWPID)
++#define CLONE_NEWPID 0x20000000
++#endif
++
+ static const char* EMPTY_DESCRIP = "<NULL>";
+
+ // special errno values that may be returned from Create_Process
+@@ -6566,7 +6571,9 @@ public:
+ m_affinity_mask(affinity_mask),
+ m_fs_remap(fs_remap),
+ m_wrote_tracking_gid(false),
+- m_no_dprintf_allowed(false)
++ m_no_dprintf_allowed(false),
++ m_clone_newpid_pid(-1),
++ m_clone_newpid_ppid(-1)
+ {
+ }
+
+@@ -6627,6 +6634,10 @@ private:
+ bool m_wrote_tracking_gid;
+ bool m_no_dprintf_allowed;
+ priv_state m_priv_state;
++ pid_t m_clone_newpid_pid;
++ pid_t m_clone_newpid_ppid;
++
++ pid_t fork(int);
+ };
+
+ enum {
+@@ -6650,7 +6661,19 @@ pid_t CreateProcessForkit::clone_safe_getpid() {
+ // the pid of the parent process (presumably due to internal
+ // caching in libc). Therefore, use the syscall to get
+ // the answer directly.
+- return syscall(SYS_getpid);
++
++ int retval = syscall(SYS_getpid);
++
++ // If we were fork'd with CLONE_NEWPID, we think our PID is 1.
++ // In this case, ask the parent!
++ if (retval == 1) {
++ if (m_clone_newpid_pid == -1) {
++ EXCEPT("getpid is 1!");
++ }
++ retval = m_clone_newpid_pid;
++ }
++
++ return retval;
+ #else
+ return ::getpid();
+ #endif
+@@ -6659,12 +6682,115 @@ pid_t CreateProcessForkit::clone_safe_getppid() {
+ #if HAVE_CLONE
+ // See above comment for clone_safe_getpid() for explanation of
+ // why we need to do this.
+- return syscall(SYS_getppid);
++
++ int retval = syscall(SYS_getppid);
++
++ // If ppid is 0, then either Condor is init (DEAR GOD) or we
++ // were created with CLONE_NEWPID; ask the parent!
++ if (retval == 0) {
++ if (m_clone_newpid_ppid == -1) {
++ EXCEPT("getppid is 0!");
++ }
++ retval = m_clone_newpid_ppid;
++ }
++
++ return retval;
+ #else
+ return ::getppid();
+ #endif
+ }
+
++/**
++ * fork allows one to use certain clone syscall flags, but provides more
++ * familiar POSIX fork semantics.
++ * NOTES:
++ * - We whitelist the flags you are allowed to pass. Currently supported:
++ * - CLONE_NEWPID. Implies CLONE_NEWNS.
++ * If the clone succeeds but the remount fails, the child calls _exit(1),
++ * but the parent will return successfully.
++ * It would be a simple fix to have the parent return the failure, if
++ * someone desired.
++ * Flags are whitelisted to help us adhere to the fork-like semantics (no
++ * shared memory between parent and child, for example). If you give other
++ * flags, they are silently ignored.
++ * - man pages indicate that clone on i386 is only fully functional when used
++ * via ASM, not the vsyscall interface. This doesn't appear to be relevant
++ * to this particular use case.
++ * - To avoid linking with pthreads (or copy/pasting lots of glibc code), I
++ * don't include integration with threads. This means various threading
++ * calls in the child may not function correctly (pre-exec; post-exec
++ * should be fine), and pthreads might not notice when the child exits.
++ * Traditional POSIX calls like wait will still function because the
++ * parent will receive the SIGCHLD.
++ * This is simple to fix if someone desired, but I'd mostly rather not link
++ * with pthreads.
++ */
++
++#define ALLOWED_FLAGS (SIGCHLD | CLONE_NEWPID | CLONE_NEWNS )
++
++pid_t CreateProcessForkit::fork(int flags) {
++
++ // If you don't need any fancy flags, just do the old boring POSIX call
++ if (flags == 0) {
++ return ::fork();
++ }
++
++#if HAVE_CLONE
++
++ int rw[2]; // Communication pipes for the CLONE_NEWPID case.
++
++ flags |= SIGCHLD; // The only necessary flag.
++ if (flags & CLONE_NEWPID) {
++ flags |= CLONE_NEWNS;
++ if (pipe(rw)) {
++ EXCEPT("UNABLE TO CREATE PIPE.");
++ }
++ }
++
++ // fork as root if we have our fancy flags.
++ priv_state orig_state = set_priv(PRIV_ROOT);
++ int retval = syscall(SYS_clone, ALLOWED_FLAGS & flags, 0, NULL, NULL);
++
++ // Child
++ if ((retval == 0) && (flags & CLONE_NEWPID)) {
++
++ // If we should have forked as non-root, make things in life final.
++ set_priv(orig_state);
++
++ if (full_read(rw[0], &m_clone_newpid_ppid, sizeof(pid_t)) != sizeof(pid_t)) {
++ EXCEPT("Unable to write into pipe.");
++ }
++ if (full_read(rw[0], &m_clone_newpid_pid, sizeof(pid_t)) != sizeof(pid_t)) {
++ EXCEPT("Unable to write into pipe.");
++ }
++
++ // Parent
++ } else if (retval > 0) {
++ set_priv(orig_state);
++ pid_t ppid = getpid(); // We are parent, so don't need clone_safe_pid.
++ if (full_write(rw[1], &ppid, sizeof(ppid)) != sizeof(ppid)) {
++ EXCEPT("Unable to write into pipe.");
++ }
++ if (full_write(rw[1], &retval, sizeof(ppid)) != sizeof(ppid)) {
++ EXCEPT("Unable to write into pipe.");
++ }
++ }
++ // retval=-1 falls through here.
++ if (flags & CLONE_NEWPID) {
++ close(rw[0]);
++ close(rw[1]);
++ }
++ return retval;
++
++#else
++
++ // Note we silently ignore flags if there's no clone on the platform.
++ return ::fork();
++
++#endif
++
++}
++
+ pid_t CreateProcessForkit::fork_exec() {
+ pid_t newpid;
+
+@@ -6736,7 +6862,11 @@ pid_t CreateProcessForkit::fork_exec() {
+ }
+ #endif /* HAVE_CLONE */
+
+- newpid = fork();
++ int fork_flags = 0;
++ if (m_family_info) {
++ fork_flags |= m_family_info->want_pid_namespace ? CLONE_NEWPID : 0;
++ }
++ newpid = this->fork(fork_flags);
+ if( newpid == 0 ) {
+ // in child
+ enterCreateProcessChild(this);
+diff --git a/src/condor_starter.V6.1/vanilla_proc.cpp b/src/condor_starter.V6.1/vanilla_proc.cpp
+index 044cb10..8528ca7 100644
+--- a/src/condor_starter.V6.1/vanilla_proc.cpp
++++ b/src/condor_starter.V6.1/vanilla_proc.cpp
+@@ -360,6 +360,24 @@ VanillaProc::StartJob()
+ }
+ }
+
++#if defined(LINUX)
++ // On Linux kernel 2.6.24 and later, we can give each
++ // job its own PID namespace
++ if (param_boolean("USE_PID_NAMESPACES", false)) {
++ if (!can_switch_ids()) {
++ EXCEPT("USE_PID_NAMESPACES enabled, but can't perform this "
++ "call in Linux unless running as root.");
++ }
++ fi.want_pid_namespace = true;
++ if (!fs_remap) {
++ fs_remap = new FilesystemRemap();
++ }
++ fs_remap->RemapProc();
++ }
++ dprintf(D_FULLDEBUG, "PID namespace option: %s\n", fi.want_pid_namespace ? "true" : "false");
++#endif
++
++
+ // have OsProc start the job
+ //
+ int retval = OsProc::StartJob(&fi, fs_remap);
+diff --git a/src/condor_utils/filesystem_remap.cpp b/src/condor_utils/filesystem_remap.cpp
+index e0f2e61..735c744 100644
+--- a/src/condor_utils/filesystem_remap.cpp
++++ b/src/condor_utils/filesystem_remap.cpp
+@@ -29,7 +29,8 @@
+
+ FilesystemRemap::FilesystemRemap() :
+ m_mappings(),
+- m_mounts_shared()
++ m_mounts_shared(),
++ m_remap_proc(false)
+ {
+ ParseMountinfo();
+ }
+@@ -120,6 +121,9 @@ int FilesystemRemap::PerformMappings() {
+ break;
+ }
+ }
++ if ((!retval) && m_remap_proc) {
++ retval = mount("proc", "/proc", "proc", 0, NULL);
++ }
+ #endif
+ return retval;
+ }
+@@ -148,6 +152,10 @@ std::string FilesystemRemap::RemapDir(std::string target) {
+ return target;
+ }
+
++void FilesystemRemap::RemapProc() {
++ m_remap_proc = true;
++}
++
+ /*
+ Sample mountinfo contents (from http://www.kernel.org/doc/Documentation/filesystems/proc.txt):
+ 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
+diff --git a/src/condor_utils/filesystem_remap.h b/src/condor_utils/filesystem_remap.h
+index 5e9362d..2e17476 100644
+--- a/src/condor_utils/filesystem_remap.h
++++ b/src/condor_utils/filesystem_remap.h
+@@ -74,6 +74,12 @@ public:
+ */
+ std::string RemapFile(std::string);
+
++ /**
++ * Indicate that we should remount /proc in the child process.
++ * Necessary for PID namespaces.
++ */
++ void RemapProc();
++
+ private:
+
+ /**
+@@ -89,6 +95,7 @@ private:
+ std::list<pair_strings> m_mappings;
+ std::list<pair_str_bool> m_mounts_shared;
+ std::list<pair_strings> m_mounts_autofs;
++ bool m_remap_proc;
+
+ };
+ #endif
diff --git a/hcc_config.patch b/hcc_config.patch
index ce54bcd..9009867 100644
--- a/hcc_config.patch
+++ b/hcc_config.patch
@@ -5,20 +5,7 @@
## This is where the local config file(s), logs and
## spool/execute directories are located
-LOCAL_DIR = $(TILDE)
-+LOCAL_DIR = /var
++LOCAL_DIR = /var/lib/condor
#LOCAL_DIR = $(RELEASE_DIR)/hosts/$(HOSTNAME)
## Looking for LOCAL_CONFIG_FILE? You will not find it here. Instead
-@@ -969,9 +969,10 @@
- ######################################################################
-
- ## Pathnames
-+#LOCAL_CONFIG_FILE = $(LOCAL_DIR)/lib/condor/condor_config.local
- LOG = /var/log/condor
--SPOOL = $(LOCAL_DIR)/spool
--EXECUTE = $(LOCAL_DIR)/execute
-+SPOOL = $(LOCAL_DIR)/lib/condor/spool
-+EXECUTE = $(LOCAL_DIR)/lib/condor/execute
- BIN = $(RELEASE_DIR)/bin
- LIB = $(RELEASE_DIR)/lib
- INCLUDE = $(RELEASE_DIR)/include/condor
diff --git a/wso2-axis2.patch b/wso2-axis2.patch
new file mode 100644
index 0000000..7f9d177
--- /dev/null
+++ b/wso2-axis2.patch
@@ -0,0 +1,11 @@
+--- src/condor_contrib/aviary/src/Axis2SslProvider.cpp.orig 2012-01-08 20:31:54.000000000 -0600
++++ src/condor_contrib/aviary/src/Axis2SslProvider.cpp 2012-01-08 20:31:58.000000000 -0600
+@@ -131,7 +131,7 @@
+ axutil_stream_set_read(&(stream_impl->stream), m_env, axis2_ssl_stream_read);
+ axutil_stream_set_write(&(stream_impl->stream), m_env, axis2_ssl_stream_write);
+ axutil_stream_set_skip(&(stream_impl->stream), m_env, axis2_ssl_stream_skip);
+- axutil_stream_set_peek(&(stream_impl->stream), m_env, axis2_ssl_stream_peek);
++ //axutil_stream_set_peek(&(stream_impl->stream), m_env, axis2_ssl_stream_peek);
+
+ svr_conn->stream = &(stream_impl->stream);
+
More information about the scm-commits
mailing list