modules/enterprise/agent/src/main/java/org/rhq/enterprise/agent/AgentMain.java | 124 +++++++--- modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/core/StartupBean.java | 87 +++++-- 2 files changed, 153 insertions(+), 58 deletions(-)
New commits: commit fdc1cabee3ea72a167f71971a7d4c8f324b8e8db Author: Jay Shaughnessy jshaughn@redhat.com Date: Fri Oct 25 17:47:23 2013 -0400
Bug 1022620 - Windows 2008 - Upgrade to JON3.2.ER3 fails when using oracle jdk7-32b Second commit for this. Went one step further. Now for the server and agent we create/set java.io.tmpdir to InstallDir/temp, if the original java.io.tmpdir is invalid. We'll generate a warning about the original setting. We'll only exit if our attempt to create/set a local temp dir fails. This helps us succeed out of box with the known issues using the 32-bit JVm on Windows, as well as getting around the issue in general.
diff --git a/modules/enterprise/agent/src/main/java/org/rhq/enterprise/agent/AgentMain.java b/modules/enterprise/agent/src/main/java/org/rhq/enterprise/agent/AgentMain.java index 77c88b1..c99f4b6 100644 --- a/modules/enterprise/agent/src/main/java/org/rhq/enterprise/agent/AgentMain.java +++ b/modules/enterprise/agent/src/main/java/org/rhq/enterprise/agent/AgentMain.java @@ -368,7 +368,7 @@ public class AgentMain { private VMHealthCheckThread m_vmHealthCheckThread;
/** - * Counts the number of times the agent has been restarted and holds the reason for the last restart. + * Counts the number of times the agent has been restarted and holds the reason for the last restart. */ private final AgentRestartCounter m_agentRestartCounter = new AgentRestartCounter();
@@ -378,7 +378,7 @@ public class AgentMain { private boolean m_disableNativeSystem;
/** - * Thread used to repeatedly ping the server for connectivity, agent avail update, and clock sync + * Thread used to repeatedly ping the server for connectivity, agent avail update, and clock sync */ private ScheduledThreadPoolExecutor m_pingThreadPoolExecutor;
@@ -465,6 +465,62 @@ public class AgentMain { return; }
+ private void checkTempDir() { + File tmpDir = new File(System.getProperty("java.io.tmpdir")); + if (!tmpDir.exists()) { + LOG.warn("Invalid java.io.tmpdir: [" + tmpDir.getAbsolutePath() + "] does not exist."); + useLocalTmpDir(); + return; + } + if (!tmpDir.isDirectory()) { + LOG.warn("Invalid java.io.tmpdir: [" + tmpDir.getAbsolutePath() + "] is not a directory"); + useLocalTmpDir(); + return; + } + if (!tmpDir.canRead() || !tmpDir.canExecute()) { + LOG.warn("Invalid java.io.tmpdir: [" + tmpDir.getAbsolutePath() + "] is not readable"); + useLocalTmpDir(); + return; + } + if (!tmpDir.canWrite()) { + LOG.warn("Invalid java.io.tmpdir: [" + tmpDir.getAbsolutePath() + "] is not writable"); + useLocalTmpDir(); + return; + } + } + + private void useLocalTmpDir() { + File localTmpDir = null; + try { + localTmpDir = new File(new File(getAgentHomeDirectory()), "temp"); + LOG.info("Using alternate java.io.tmpdir: [" + localTmpDir.getAbsolutePath() + "]"); + if (!localTmpDir.exists()) { + LOG.info("Creating alternate java.io.tmpdir: [" + localTmpDir.getAbsolutePath() + "]"); + localTmpDir.mkdir(); + } + System.setProperty("java.io.tmpdir", localTmpDir.getAbsolutePath()); + } catch (Throwable t) { + throw new RuntimeException("Startup failed: Could not create or set local java.io.tmpdir [" + + localTmpDir.getAbsolutePath() + "]", t); + } + if (!localTmpDir.exists()) { + throw new RuntimeException("Startup failed: local java.io.tmpdir [" + localTmpDir.getAbsolutePath() + + "] does not exist"); + } + if (!localTmpDir.isDirectory()) { + throw new RuntimeException("Startup failed: local java.io.tmpdir [" + localTmpDir.getAbsolutePath() + + "] is not a directory"); + } + if (!localTmpDir.canRead() || !localTmpDir.canExecute()) { + throw new RuntimeException("Startup failed: local java.io.tmpdir [" + localTmpDir.getAbsolutePath() + + "] is not readable"); + } + if (!localTmpDir.canWrite()) { + throw new RuntimeException("Startup failed: local java.io.tmpdir [" + localTmpDir.getAbsolutePath() + + "] is not writable"); + } + } + /** * Constructor for {@link AgentMain} that loads the agent configuration and prepare some additional internal data. * @@ -517,13 +573,15 @@ public class AgentMain {
prepareNativeSystem();
+ checkTempDir(); + return; }
/** * Returns the directory that is considered the "agent home" (i.e. the directory * where the agent is installed). - * + * * @return agent home directory, or empty string if it cannot be determined */ public String getAgentHomeDirectory() { @@ -614,7 +672,7 @@ public class AgentMain { /** * This method should be called whenever the server time is known. This helps * keep the {@link #getAgentServerClockDifference()} up-to-date. - * + * * @param serverTime the currently know value of the server clock (epoch millis) */ public void serverClockNotification(long serverTime) { @@ -920,7 +978,7 @@ public class AgentMain { /** * This will enable/disable agent-server communication tracing. This is for * use mainly in development but can also be used for troubleshooting problems. - * + * * @param enabled whether or not to turn on agent comm tracing */ public void agentServerCommunicationsTrace(boolean enabled) { @@ -952,7 +1010,7 @@ public class AgentMain { * This will hot-deploy a new log4j log configuration file. Use this to change, at runtime, * the log settings so you can, for example, begin logging DEBUG messages to help troubleshoot * problems. - * + * * @param logFilePath the path to the log file - relative to the classloader or filesystem * * @throws Exception if failed to hot deploy the new log config @@ -985,7 +1043,7 @@ public class AgentMain { /** * Returns an iteratable list of servers that can be used as backups when this agent needs to failover * to another server. - * + * * @return list of servers (may be empty but will not be <code>null</code>) */ public FailoverListComposite getServerFailoverList() { @@ -1029,8 +1087,8 @@ public class AgentMain { /** * Downloads a new server failover list from the server and returns the failover list * that is now in effect. - * @return - * + * @return + * * @return the server failover list that is now in effect */ public FailoverListComposite downloadServerFailoverList() { @@ -1568,7 +1626,7 @@ public class AgentMain { * @param wait_ms maximum number of milliseconds to wait * * @return <code>true</code> if the server is up, <code>false</code> if it is not yet up or the agent has shutdown - * + * * @throws AgentNotSupportedException If the server is up but it told us we are the wrong version, then this is thrown. * When this is thrown, the agent is currently in the midst of updating itself. */ @@ -1940,9 +1998,9 @@ public class AgentMain { * port and transport parameters being used to talk to the current server * will stay the same. Otherwise, it will be assumed the server is a * full endpoint URL. - * + * * @param server the host of the server to switch to, or a full server endpoint URL - * + * * @return <code>true</code> if successfully switched, <code>false</code> otherwise */ public boolean switchToServer(String server) { @@ -1996,9 +2054,9 @@ public class AgentMain {
/** * Switches the agent to talk to the next server in the failover list. - * + * * This is package-scoped so the failover callback can call this. - * + * * @param comm the communicator object whose endpoint needs to be switched to the next server * the caller must ensure the remote communicator provided to this method is the * same communicator used by this agent's {@link #getClientCommandSender() sender}. @@ -2036,12 +2094,12 @@ public class AgentMain {
/** * Immediately switches the given communicator to the given server. - * + * * @param comm the communicator whose server is switched * @param newServer the endpoint of the new server * @param transport the transport that should be used in the new remote endpoint URL * @param transportParams the transport params that should be used in the new remote endpoint URL - * + * * @return <code>true</code> if successfully switched; <code>false</code> otherwise */ private boolean switchCommServer(RemoteCommunicator comm, ServerEntry newServer, String transport, @@ -2096,12 +2154,12 @@ public class AgentMain { * is making the server its primary server and will begin sending it messages. The request * is sent such that the communicator's initialize callback will never be invoked, however, * the caller can ask for the request to attempt failover. - * + * * <p>This is package scoped so the initialize callback can call this</p> - * + * * @param comm the communicator used to send the message to the server * @param attemptFailover if <code>true</code>, and the connect command fails, server failover will be attempted - * + * * @throws Throwable */ void sendConnectRequestToServer(RemoteCommunicator comm, boolean attemptFailover) throws Throwable { @@ -2203,7 +2261,7 @@ public class AgentMain {
/** * Returns the agent restart counter object. - * + * * @return the agent restart counter */ public AgentRestartCounter getAgentRestartCounter() { @@ -2351,7 +2409,7 @@ public class AgentMain { m_commServices.start(m_configuration.getPreferences(), m_configuration.getClientCommandSenderConfiguration());
// prime the sender so it can be prepared to start sending messages. - // if auto-discovery is enabled, then the auto-discovery listener will tell the sender when its OK to start + // if auto-discovery is enabled, then the auto-discovery listener will tell the sender when its OK to start // sending. Otherwise start polling and let the poller tell the sender when it is ok to start sending. if (!isAutoDiscoveryEnabled()) { LOG.info(AgentI18NResourceKeys.NO_AUTO_DETECT); @@ -2402,7 +2460,7 @@ public class AgentMain {
/** * This will prepare the auto-discovery listener, if server auto-detection is enabled. - * + * * @throws Exception */ private void prepareAutoDiscoveryListener() throws Exception { @@ -2638,17 +2696,17 @@ public class AgentMain {
/** * Creates a raw remote communicator that can talk to the given endpoint. - * + * * This is public-scoped so the {@link PrimaryServerSwitchoverThread} can use this * and the {@link IdentifyPromptCommand} can use this. - * + * * @param transport * @param address * @param port * @param transportParams - * + * * @return the remote communicator - * + * * @throws Exception if the communicator could not be created */ public RemoteCommunicator createServerRemoteCommunicator(String transport, String address, int port, @@ -2741,7 +2799,7 @@ public class AgentMain { * Given a failover list, this makes very rudimentary connection attempts to each server to see if * this agent can at least reach the server endpoints. If an endpoint cannot be reached, * a warning is logged. - * + * * @param failoverList the list of servers this agent will potentially need to talk to. * @return the servers that failed to be connected to */ @@ -2804,7 +2862,7 @@ public class AgentMain { * Given a failover list, this will persist it so the agent can recover it if the agent itself fails. * If this method fails to persist the list, an error is logged but otherwise this method * returns normally. - * + * * @param failoverList the failover list to persist (may be <code>null</code>) */ private void storeServerFailoverList(FailoverListComposite failoverList) { @@ -3184,7 +3242,7 @@ public class AgentMain { }
if (m_daemonMode) { - AgentInputReaderFactory.setConsoleType(AgentInputReaderFactory.ConsoleType.java); // don't use native libs, no need and jline causes problems + AgentInputReaderFactory.setConsoleType(AgentInputReaderFactory.ConsoleType.java); // don't use native libs, no need and jline causes problems } else if (console_type != null) { AgentInputReaderFactory.setConsoleType(console_type); } @@ -3373,7 +3431,7 @@ public class AgentMain { return args.toArray(new String[args.size()]); }
- // perform any other massaging + // perform any other massaging private String safeArg(String arg) { // remove trailing '=' from long option args. For example --plugin= should just be --plugin for // downstream processing. @@ -3642,7 +3700,7 @@ public class AgentMain { * </ol> * By restarting the plugin container in such conditions, we essentially re-run the resource upgrade * and let the plugin container try to re-merge with the server that we know has just connected. - * + * * @author Lukas Krejci */ private class PluginContainerConditionalRestartListener implements ClientCommandSenderStateListener { @@ -3750,7 +3808,7 @@ public class AgentMain { try { // if we can't send to the server ignore the ping if (!m_clientSender.isSending()) { - // An unlikely state, but if we're not sending, not polling and not performing autoDiscovery + // An unlikely state, but if we're not sending, not polling and not performing autoDiscovery // (multicast), then start polling to we eventually get out of this state. if (!(m_clientSender.isServerPolling() || isAutoDiscoveryEnabled())) { LOG.info(AgentI18NResourceKeys.PING_EXECUTOR_STARTING_POLLING); @@ -3760,7 +3818,7 @@ public class AgentMain { return; }
- // we are in sending mode, so make sure the poller is off + // we are in sending mode, so make sure the poller is off if (m_clientSender.isServerPolling()) { LOG.info(AgentI18NResourceKeys.PING_EXECUTOR_STOPPING_POLLING_RESUME_PING); m_clientSender.stopServerPolling(); diff --git a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/core/StartupBean.java b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/core/StartupBean.java index d5878c6..941ae93 100644 --- a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/core/StartupBean.java +++ b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/core/StartupBean.java @@ -236,20 +236,56 @@ public class StartupBean implements StartupLocal { private void checkTempDir() { File tmpDir = new File(System.getProperty("java.io.tmpdir")); if (!tmpDir.exists()) { - throw new RuntimeException("Startup failed: java.io.tmpdir '" + tmpDir.getAbsolutePath() - + "' does not exist"); + log.warn("Invalid java.io.tmpdir: [" + tmpDir.getAbsolutePath() + "] does not exist."); + useLocalTmpDir(); + return; } if (!tmpDir.isDirectory()) { - throw new RuntimeException("Startup failed: java.io.tmpdir '" + tmpDir.getAbsolutePath() - + "' is not a directory"); + log.warn("Invalid java.io.tmpdir: [" + tmpDir.getAbsolutePath() + "] is not a directory"); + useLocalTmpDir(); + return; } if (!tmpDir.canRead() || !tmpDir.canExecute()) { - throw new RuntimeException("Startup failed: java.io.tmpdir '" + tmpDir.getAbsolutePath() - + "' is not readable"); + log.warn("Invalid java.io.tmpdir: [" + tmpDir.getAbsolutePath() + "] is not readable"); + useLocalTmpDir(); + return; } if (!tmpDir.canWrite()) { - throw new RuntimeException("Startup failed: java.io.tmpdir '" + tmpDir.getAbsolutePath() - + "' is not writable"); + log.warn("Invalid java.io.tmpdir: [" + tmpDir.getAbsolutePath() + "] is not writable"); + useLocalTmpDir(); + return; + } + } + + private void useLocalTmpDir() { + File localTmpDir = null; + try { + localTmpDir = new File(LookupUtil.getCoreServer().getInstallDir(), "temp"); + log.info("Using alternate java.io.tmpdir: [" + localTmpDir.getAbsolutePath() + "]"); + if (!localTmpDir.exists()) { + log.info("Creating alternate java.io.tmpdir: [" + localTmpDir.getAbsolutePath() + "]"); + localTmpDir.mkdir(); + } + System.setProperty("java.io.tmpdir", localTmpDir.getAbsolutePath()); + } catch (Throwable t) { + throw new RuntimeException("Startup failed: Could not create or set local java.io.tmpdir [" + + localTmpDir.getAbsolutePath() + "]", t); + } + if (!localTmpDir.exists()) { + throw new RuntimeException("Startup failed: local java.io.tmpdir [" + localTmpDir.getAbsolutePath() + + "] does not exist"); + } + if (!localTmpDir.isDirectory()) { + throw new RuntimeException("Startup failed: local java.io.tmpdir [" + localTmpDir.getAbsolutePath() + + "] is not a directory"); + } + if (!localTmpDir.canRead() || !localTmpDir.canExecute()) { + throw new RuntimeException("Startup failed: local java.io.tmpdir [" + localTmpDir.getAbsolutePath() + + "] is not readable"); + } + if (!localTmpDir.canWrite()) { + throw new RuntimeException("Startup failed: local java.io.tmpdir [" + localTmpDir.getAbsolutePath() + + "] is not writable"); } }
@@ -546,19 +582,19 @@ public class StartupBean implements StartupLocal { } }
- /** - * This seeds the agent clients cache with clients for all known agents. These clients will be started so they can - * immediately begin to send any persisted guaranteed messages that might already exist. This method must be called - * at a time when the server is ready to accept messages from agents because any guaranteed messages that are - * delivered might trigger the agents to send messages back to the server. - * - * NOTE: we don't need to do this - so far, none of the messages the server sends to the agent are marked - * with "guaranteed delivery" (this is on purpose and a good thing) so we don't need to start all the agent clients - * in case they have persisted messages. Since the number of agents could be large this cache could be huge and - * take some time to initialize. If we don't call this, it speeds up start up, and doesn't bloat memory with - * clients we might not ever need (since agents might have affinity to other servers). Agent clients - * can be created lazily at runtime when the server needs it. - */ + /** + * This seeds the agent clients cache with clients for all known agents. These clients will be started so they can + * immediately begin to send any persisted guaranteed messages that might already exist. This method must be called + * at a time when the server is ready to accept messages from agents because any guaranteed messages that are + * delivered might trigger the agents to send messages back to the server. + * + * NOTE: we don't need to do this - so far, none of the messages the server sends to the agent are marked + * with "guaranteed delivery" (this is on purpose and a good thing) so we don't need to start all the agent clients + * in case they have persisted messages. Since the number of agents could be large this cache could be huge and + * take some time to initialize. If we don't call this, it speeds up start up, and doesn't bloat memory with + * clients we might not ever need (since agents might have affinity to other servers). Agent clients + * can be created lazily at runtime when the server needs it. + */ private void startAgentClients() { log.info("Starting agent clients - any persisted messages with guaranteed delivery will be sent...");
@@ -598,7 +634,7 @@ public class StartupBean implements StartupLocal { final long initialDelay = 1000L * 60; final long interval = 1000L * 60; schedulerBean.scheduleSimpleRepeatingJob(SavedSearchResultCountRecalculationJob.class, true, false, - initialDelay, interval); + initialDelay, interval); } catch (Exception e) { log.error("Cannot schedule asynchronous resource deletion job.", e); } @@ -636,7 +672,7 @@ public class StartupBean implements StartupLocal { final long initialDelay = 1000L * 60; final long interval = 1000L * 60; schedulerBean.scheduleSimpleRepeatingJob(DynaGroupAutoRecalculationJob.class, true, false, initialDelay, - interval); + interval); } catch (Exception e) { log.error("Cannot schedule DynaGroup auto-recalculation job.", e); } @@ -738,9 +774,9 @@ public class StartupBean implements StartupLocal { log.error("Cannot create storage cluster init job", e); } } - + try { - String cronString = "0 30 0 ? * SUN *"; // every sunday starting at 00:30. + String cronString = "0 30 0 ? * SUN *"; // every sunday starting at 00:30. schedulerBean.scheduleSimpleCronJob(StorageClusterReadRepairJob.class, true, true, cronString); } catch (Exception e) { log.error("Cannot create storage cluster read repair job", e); @@ -754,6 +790,7 @@ public class StartupBean implements StartupLocal { * * @deprecated we don't have an embedded agent anymore, leaving this in case we resurrect it */ + @Deprecated private void startEmbeddedAgent() throws RuntimeException { // we can't use EmbeddedAgentBootstrapServiceMBean because if the embedded agent // isn't installed, that class will not be available; we must use JMX API
rhq-commits@lists.fedorahosted.org