ldap/servers/plugins/replication/repl5_replica_config.c | 31 +++++++++++-----
ldap/servers/plugins/replication/repl5_ruv.c | 29 ++++++++++++++
ldap/servers/plugins/replication/repl5_ruv.h | 1
ldap/servers/plugins/replication/repl_extop.c | 4 --
4 files changed, 54 insertions(+), 11 deletions(-)
New commits:
commit 0968b3f5d5551dc75e00c9d6e9528cc062f451fa
Author: Mark Reynolds <mreynolds(a)redhat.com>
Date: Mon Aug 20 12:17:51 2012 -0400
Ticket 403 - fix CLEANALLRUV regression from last commit
Bug Description: The last commit for CLEANALLRUV caused a regression that could
hang the process when comparing maxcsn's.
Fix Description: Needed to write a new ruv_compare_csn function to handle the
unique scenarios presented by CLEANALLRUV.
Also improved settings verfication, and error handling.
https://fedorahosted.org/389/ticket/403
Reviewed by: ?
diff --git a/ldap/servers/plugins/replication/repl5_replica_config.c
b/ldap/servers/plugins/replication/repl5_replica_config.c
index b408809..e222746 100644
--- a/ldap/servers/plugins/replication/repl5_replica_config.c
+++ b/ldap/servers/plugins/replication/repl5_replica_config.c
@@ -1461,19 +1461,32 @@ replica_cleanallruv_thread(void *arg)
if(data->replica == NULL && data->repl_obj == NULL){
/*
* This thread was initiated at startup because the process did not finish. Due
- * to timing issues, we need to wait to grab the replica obj until we get here.
+ * to startup timing issues, we need to wait before grabbing the replica obj, as
+ * the backends might not be online yet.
*/
+ PR_Lock( notify_lock );
+ PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(5) );
+ PR_Unlock( notify_lock );
data->repl_obj = replica_get_replica_from_dn(data->sdn);
+ if(data->repl_obj == NULL){
+ cleanruv_log(data->task, CLEANALLRUV_ID, "Unable to retrieve repl object
from dn(%s).", data->sdn);
+ aborted = 1;
+ goto done;
+ }
data->replica = (Replica*)object_get_data(data->repl_obj);
free_obj = 1;
- }
- if(data->replica == NULL && data->repl_obj){
+ } else if(data->replica == NULL && data->repl_obj){
data->replica = (Replica*)object_get_data(data->repl_obj);
- }
- if( data->repl_obj == NULL){
+ } else if( data->repl_obj == NULL && data->replica){
data->repl_obj = object_new(data->replica, NULL);
free_obj = 1;
}
+ /* verify we have set our repl objects */
+ if(data->repl_obj == NULL || data->replica == NULL){
+ cleanruv_log(data->task, CLEANALLRUV_ID, "Unable to set the replica
objects.");
+ aborted = 1;
+ goto done;
+ }
if(data->task){
slapi_task_begin(data->task, 1);
}
@@ -1492,11 +1505,13 @@ replica_cleanallruv_thread(void *arg)
ruv_obj = replica_get_ruv(data->replica);
ruv = object_get_data (ruv_obj);
while(data->maxcsn && !is_task_aborted(data->rid) &&
!is_cleaned_rid(data->rid) && !slapi_is_shutting_down()){
- if(csn_get_replicaid(data->maxcsn) == 0 ||
ruv_covers_csn_strict(ruv,data->maxcsn)){
+ if(csn_get_replicaid(data->maxcsn) == 0 ||
ruv_covers_csn_cleanallruv(ruv,data->maxcsn)){
/* We are caught up, now we can clean the ruv's */
break;
}
- DS_Sleep(PR_SecondsToInterval(5));
+ PR_Lock( notify_lock );
+ PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(5) );
+ PR_Unlock( notify_lock );
}
object_release(ruv_obj);
/*
@@ -1963,7 +1978,7 @@ add_cleaned_rid(ReplicaId rid, Replica *r, char *maxcsn)
repl_get_plugin_identity (PLUGIN_MULTIMASTER_REPLICATION), 0);
slapi_modify_internal_pb (pb);
slapi_pblock_get(pb, SLAPI_PLUGIN_INTOP_RESULT, &rc);
- if (rc != LDAP_SUCCESS){
+ if (rc != LDAP_SUCCESS && rc != LDAP_TYPE_OR_VALUE_EXISTS){
slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, "CleanAllRUV Task: failed
to update replica "
"config (%d), rid (%d)\n", rc, rid);
}
diff --git a/ldap/servers/plugins/replication/repl5_ruv.c
b/ldap/servers/plugins/replication/repl5_ruv.c
index fb8ea09..b52dd49 100644
--- a/ldap/servers/plugins/replication/repl5_ruv.c
+++ b/ldap/servers/plugins/replication/repl5_ruv.c
@@ -920,6 +920,35 @@ ruv_covers_csn_strict(const RUV *ruv, const CSN *csn)
return rc;
}
+/*
+ * Used by the cleanallruv task
+ *
+ * We want to return TRUE if replica is NULL,
+ * and we want to use "csn_compare() <="
+ */
+PRBool
+ruv_covers_csn_cleanallruv(const RUV *ruv, const CSN *csn)
+{
+ RUVElement *replica;
+ ReplicaId rid;
+ PRBool return_value;
+
+ if (ruv == NULL || csn == NULL){
+ slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name, "ruv_covers_csn_cleanallruv:
NULL argument\n");
+ return_value = PR_FALSE;
+ } else {
+ rid = csn_get_replicaid(csn);
+ replica = ruvGetReplica (ruv, rid);
+ if (replica == NULL){
+ /* already cleaned */
+ return_value = PR_TRUE;
+ } else {
+ return_value = (csn_compare (csn, replica->csn) <= 0);
+ }
+ }
+
+ return return_value;
+}
/*
* The function gets min{maxcsns of all ruv elements} if get_the_max=0,
diff --git a/ldap/servers/plugins/replication/repl5_ruv.h
b/ldap/servers/plugins/replication/repl5_ruv.h
index f2d96f3..944f5ed 100644
--- a/ldap/servers/plugins/replication/repl5_ruv.h
+++ b/ldap/servers/plugins/replication/repl5_ruv.h
@@ -115,6 +115,7 @@ void ruv_set_replica_generation (RUV *ruv, const char *generation);
PRBool ruv_covers_ruv(const RUV *covering_ruv, const RUV *covered_ruv);
PRBool ruv_covers_csn(const RUV *ruv, const CSN *csn);
PRBool ruv_covers_csn_strict(const RUV *ruv, const CSN *csn);
+PRBool ruv_covers_csn_cleanallruv(const RUV *ruv, const CSN *csn);
int ruv_get_min_csn(const RUV *ruv, CSN **csn);
int ruv_get_max_csn(const RUV *ruv, CSN **csn);
int ruv_get_rid_max_csn(const RUV *ruv, CSN **csn, ReplicaId rid);
diff --git a/ldap/servers/plugins/replication/repl_extop.c
b/ldap/servers/plugins/replication/repl_extop.c
index ecf1f93..334b86b 100644
--- a/ldap/servers/plugins/replication/repl_extop.c
+++ b/ldap/servers/plugins/replication/repl_extop.c
@@ -1541,8 +1541,6 @@ multimaster_extop_cleanruv(Slapi_PBlock *pb)
csn_free(&maxcsn);
rc = 1;
goto free_and_return;
- } else {
- slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, "cleanAllRUV_task: cleaning rid
(%d)...\n", rid);
}
/*
@@ -1618,7 +1616,7 @@ multimaster_extop_cleanruv(Slapi_PBlock *pb)
break;
}
slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, "cleanAllRUV_task: checking if
we're caught up...\n");
- if(ruv_covers_csn_strict(ruv,maxcsn) || csn_get_replicaid(maxcsn) == 0){
+ if(ruv_covers_csn_cleanallruv(ruv,maxcsn) || csn_get_replicaid(maxcsn) == 0){
/* We are caught up */
break;
} else {