From c999057b55d57775d795dfa498ba180c67000233 Mon Sep 17 00:00:00 2001
From: Rob Crittenden <rcritten@redhat.com>
Date: Fri, 5 Jul 2019 13:31:32 -0400
Subject: [PATCH 1/3] Replace replication_wait_timeout with
 certmonger_wait_timeout

The variable is intended to control the timeout for replication
events. If someone had significantly reduced it via configuration
then it could have caused certmogner requests to fail due to timeouts.

Add replication_wait_timeout, certmonger_wait_timeout and
http_timeout to the default.conf man page.

Related: https://pagure.io/freeipa/issue/7971
---
 client/man/default.conf.5         | 9 +++++++++
 ipalib/constants.py               | 2 ++
 ipalib/install/certmonger.py      | 2 +-
 ipaserver/install/cainstance.py   | 2 +-
 ipaserver/install/certs.py        | 2 +-
 ipaserver/install/dsinstance.py   | 2 +-
 ipaserver/install/httpinstance.py | 2 +-
 ipaserver/install/krbinstance.py  | 2 +-
 pylint_plugins.py                 | 1 +
 9 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/client/man/default.conf.5 b/client/man/default.conf.5
index f21d9d5b7a..728fc08717 100644
--- a/client/man/default.conf.5
+++ b/client/man/default.conf.5
@@ -77,6 +77,9 @@ Specifies the hostname of the dogtag CA server. The default is the hostname of t
 .B ca_port <port>
 Specifies the insecure CA end user port. The default is 8080.
 .TP
+.B certmonger_wait_timeout <seconds>
+The time to wait for a certmonger request to complete during installation. The default value is 300 seconds.
+.TP
 .B context <context>
 Specifies the context that IPA is being executed in. IPA may operate differently depending on the context. The current defined contexts are cli and server. Additionally this value is used to load /etc/ipa/\fBcontext\fR.conf to provide context\-specific configuration. For example, if you want to always perform client requests in verbose mode but do not want to have verbose enabled on the server, add the verbose option to \fI/etc/ipa/cli.conf\fR.
 .TP
@@ -98,6 +101,9 @@ Specifies whether an IPA client should attempt to fall back and try other servic
 .B host <hostname>
 Specifies the local system hostname.
 .TP
+.B http_timeout <seconds>
+Timeout for HTTP blocking requests (e.g. connection). The default value is 30 seconds.
+.TP
 .B in_server <boolean>
 Specifies whether requests should be forwarded to an IPA server or handled locally. This is used internally by IPA in a similar way as context. The same IPA framework is used by the ipa command\-line tool and the server. This setting tells the framework whether it should execute the command as if on the server or forward it via XML\-RPC to a remote server.
 .TP
@@ -160,6 +166,9 @@ Specifies the name of the CA back end to use. The current options are \fBdogtag\
 .B realm <realm>
 Specifies the Kerberos realm.
 .TP
+.B replication_wait_timeout <seconds>
+The time to wait for a new entry to be replicated during replica installation. The default value is 300 seconds.
+.TP
 .B server <hostname>
 Specifies the IPA Server hostname.
 .TP
diff --git a/ipalib/constants.py b/ipalib/constants.py
index d4577d668f..85a8179495 100644
--- a/ipalib/constants.py
+++ b/ipalib/constants.py
@@ -173,6 +173,8 @@
     ('http_timeout', 30),
     # How long to wait for an entry to appear on a replica
     ('replication_wait_timeout', 300),
+    # How long to wait for a certmonger request to finish
+    ('certmonger_wait_timeout', 300),
 
     # Web Application mount points
     ('mount_ipa', '/ipa/'),
diff --git a/ipalib/install/certmonger.py b/ipalib/install/certmonger.py
index 8d4170cc04..5e3c98a736 100644
--- a/ipalib/install/certmonger.py
+++ b/ipalib/install/certmonger.py
@@ -326,7 +326,7 @@ def request_and_wait_for_cert(
 
     deadline = time.time() + resubmit_timeout
     while True:  # until success, timeout, or error
-        state = wait_for_request(req_id, api.env.replication_wait_timeout)
+        state = wait_for_request(req_id, api.env.http_timeout)
         ca_error = get_request_value(req_id, 'ca-error')
         if state == 'MONITORING' and ca_error is None:
             # we got a winner, exiting
diff --git a/ipaserver/install/cainstance.py b/ipaserver/install/cainstance.py
index 6e1fc724db..0cd75cd8a7 100644
--- a/ipaserver/install/cainstance.py
+++ b/ipaserver/install/cainstance.py
@@ -868,7 +868,7 @@ def __request_ra_certificate(self):
                 pre_command='renew_ra_cert_pre',
                 post_command='renew_ra_cert',
                 storage="FILE",
-                resubmit_timeout=api.env.replication_wait_timeout
+                resubmit_timeout=api.env.certmonger_wait_timeout
             )
             self.__set_ra_cert_perms()
 
diff --git a/ipaserver/install/certs.py b/ipaserver/install/certs.py
index 9c46bc324c..7064652ba6 100644
--- a/ipaserver/install/certs.py
+++ b/ipaserver/install/certs.py
@@ -661,7 +661,7 @@ def export_pem_cert(self, nickname, location):
     def request_service_cert(self, nickname, principal, host,
                              resubmit_timeout=None):
         if resubmit_timeout is None:
-            resubmit_timeout = api.env.replication_wait_timeout
+            resubmit_timeout = api.env.certmonger_wait_timeout
         return certmonger.request_and_wait_for_cert(
             certpath=self.secdir,
             storage='NSSDB',
diff --git a/ipaserver/install/dsinstance.py b/ipaserver/install/dsinstance.py
index 81cd912c2f..84cebf2f5c 100644
--- a/ipaserver/install/dsinstance.py
+++ b/ipaserver/install/dsinstance.py
@@ -872,7 +872,7 @@ def __enable_ssl(self):
                     profile=dogtag.DEFAULT_PROFILE,
                     dns=[self.fqdn],
                     post_command=cmd,
-                    resubmit_timeout=api.env.replication_wait_timeout
+                    resubmit_timeout=api.env.certmonger_wait_timeout
                 )
             finally:
                 if prev_helper is not None:
diff --git a/ipaserver/install/httpinstance.py b/ipaserver/install/httpinstance.py
index 63d3021255..c3e7f49839 100644
--- a/ipaserver/install/httpinstance.py
+++ b/ipaserver/install/httpinstance.py
@@ -385,7 +385,7 @@ def __setup_ssl(self):
                     post_command='restart_httpd',
                     storage='FILE',
                     passwd_fname=key_passwd_file,
-                    resubmit_timeout=api.env.replication_wait_timeout
+                    resubmit_timeout=api.env.certmonger_wait_timeout
                 )
             finally:
                 if prev_helper is not None:
diff --git a/ipaserver/install/krbinstance.py b/ipaserver/install/krbinstance.py
index 437e469fa2..823723c556 100644
--- a/ipaserver/install/krbinstance.py
+++ b/ipaserver/install/krbinstance.py
@@ -459,7 +459,7 @@ def _call_certmonger(self, certmonger_ca='IPA'):
                 profile=KDC_PROFILE,
                 post_command='renew_kdc_cert',
                 perms=(0o644, 0o600),
-                resubmit_timeout=api.env.replication_wait_timeout
+                resubmit_timeout=api.env.certmonger_wait_timeout
             )
         except dbus.DBusException as e:
             # if the certificate is already tracked, ignore the error
diff --git a/pylint_plugins.py b/pylint_plugins.py
index 6e7526f8e8..1f980e7aac 100644
--- a/pylint_plugins.py
+++ b/pylint_plugins.py
@@ -432,6 +432,7 @@ def wildcard(*args, **kwargs):
     api.env.ca_host = ''
     api.env.ca_install_port = None
     api.env.ca_port = 0
+    api.env.certmonger_wait_timeout = 0
     api.env.conf = ''  # object
     api.env.conf_default = ''  # object
     api.env.confdir = ''  # object

From f9d12761397f89beb6c50886329ad54404b0501e Mon Sep 17 00:00:00 2001
From: Rob Crittenden <rcritten@redhat.com>
Date: Fri, 5 Jul 2019 14:14:53 -0400
Subject: [PATCH 2/3] Log the replication wait timeout for debugging purposes

Related: https://pagure.io/freeipa/issue/7971
---
 ipaserver/install/replication.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ipaserver/install/replication.py b/ipaserver/install/replication.py
index 77146b0b6d..6b27c5a22b 100644
--- a/ipaserver/install/replication.py
+++ b/ipaserver/install/replication.py
@@ -186,7 +186,8 @@ def wait_for_entry(connection, dn, timeout, attr=None, attrvalue='*',
         attrlist.append(attr)
     else:
         filterstr = "(objectclass=*)"
-    log("Waiting for replication (%s) %s %s", connection, dn, filterstr)
+    log("Waiting up to %s seconds for replication (%s) %s %s",
+        connection, dn, filterstr)
     entry = []
     deadline = time.time() + timeout
     for i in itertools.count(start=1):

From 8d9f4126078fb5bf39ddacf9f28872b2b42f4f92 Mon Sep 17 00:00:00 2001
From: Rob Crittenden <rcritten@redhat.com>
Date: Fri, 5 Jul 2019 14:15:32 -0400
Subject: [PATCH 3/3] Log dogtag auth timeout in install, provide hint to
 increase it

There is a loop which keeps trying to bind as the admin user
which will fail until it is replicated.

In the case where there is a lot to replicate the default
5 minute timeout may be insufficient. Provide a hint for
tuning.

Fixes: https://pagure.io/freeipa/issue/7971
---
 ipaserver/install/dogtaginstance.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/ipaserver/install/dogtaginstance.py b/ipaserver/install/dogtaginstance.py
index cc75d89746..1ceab0dc08 100644
--- a/ipaserver/install/dogtaginstance.py
+++ b/ipaserver/install/dogtaginstance.py
@@ -497,7 +497,8 @@ def setup_admin(self):
             self.master_host
         )
         logger.debug(
-            "Waiting for %s to appear on %s", self.admin_dn, master_conn
+            "Waiting %s seconds for %s to appear on %s",
+            api.env.replication_wait_timeout, self.admin_dn, master_conn
         )
         deadline = time.time() + api.env.replication_wait_timeout
         while time.time() < deadline:
@@ -514,6 +515,10 @@ def setup_admin(self):
             logger.error(
                 "Unable to log in as %s on %s", self.admin_dn, master_conn
             )
+            if time.time() > deadline:
+                logger.debug(
+                    "[hint] tune with replication_wait_timeout"
+                )
             raise errors.NotFound(
                 reason="{} did not replicate to {}".format(
                     self.admin_dn, master_conn
