This is an automated email from the git hooks/post-receive script.
tbordaz pushed a change to branch 389-ds-base-1.3.6 in repository 389-ds-base.
from 26f37b8 Ticket 49509 - Indexing of internationalized matching rules is failing new f1373f0 Ticket 49463 - After cleanALLruv, there is a flow of keep alive DEL
The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "adds" were already present in the repository and have only been added to this reference.
Summary of changes: ldap/servers/plugins/replication/repl5.h | 50 +++++++++++----------- ldap/servers/plugins/replication/repl5_replica.c | 23 +++++++++- .../plugins/replication/repl5_replica_config.c | 32 +++++++++++--- ldap/servers/plugins/replication/repl_extop.c | 2 + 4 files changed, 77 insertions(+), 30 deletions(-)
This is an automated email from the git hooks/post-receive script.
tbordaz pushed a commit to branch 389-ds-base-1.3.6 in repository 389-ds-base.
commit f1373f03306b8d8fde0017f7439abae8775415cb Author: Thierry Bordaz tbordaz@redhat.com Date: Fri Jan 12 16:29:23 2018 +0100
Ticket 49463 - After cleanALLruv, there is a flow of keep alive DEL
Bug Description: When cleanAllRuv is launched, it spawn cleanAllRuv on all replicas. Each replica will clean its changelog and database RUV AND in addition will DEL the keep alive entry of the target ReplicaID. So for the same entry (keep alive) there will be as many DEL as there are replicas
This flow of DEL is useless as only one DEL is enough. In addition because of https://pagure.io/389-ds-base/issue/49466, replication may loop on each of those DELs.
Fix Description: The fix is only to prevent the flow of DEL. It adds a flag ('original_task') in the task payload. The server receiving the task (replica_execute_cleanall_ruv_task) flags the task as 'original_task'. In the opposite, the propagated cleanAllRuv (multimaster_extop_cleanruv) does not flag the task as 'original_task' Only original task does the DEL of the keep alive entry. Note the propageted payload (extop) is not changed. In a mixed version environment "old" servers will DEL the keep alive and flow can still happen
https://pagure.io/389-ds-base/issue/49463
Reviewed by: Ludwig Krispenz
Platforms tested: F23
Flag Day: no
Doc impact: no --- ldap/servers/plugins/replication/repl5.h | 50 +++++++++++----------- ldap/servers/plugins/replication/repl5_replica.c | 23 +++++++++- .../plugins/replication/repl5_replica_config.c | 32 +++++++++++--- ldap/servers/plugins/replication/repl_extop.c | 2 + 4 files changed, 77 insertions(+), 30 deletions(-)
diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h index 9c4789f..5dd392e 100644 --- a/ldap/servers/plugins/replication/repl5.h +++ b/ldap/servers/plugins/replication/repl5.h @@ -680,12 +680,37 @@ void multimaster_mtnode_construct_replicas(void);
void multimaster_be_state_change (void *handle, char *be_name, int old_be_state, int new_be_state);
+#define CLEANRIDSIZ 64 /* maximum number for concurrent CLEANALLRUV tasks */ + +typedef struct _cleanruv_data +{ + Object *repl_obj; + Replica *replica; + ReplicaId rid; + Slapi_Task *task; + struct berval *payload; + CSN *maxcsn; + char *repl_root; + Slapi_DN *sdn; + char *certify; + char *force; + PRBool original_task; +} cleanruv_data; + +typedef struct _cleanruv_purge_data +{ + int cleaned_rid; + const Slapi_DN *suffix_sdn; + char *replName; + char *replGen; +} cleanruv_purge_data; + /* In repl5_replica_config.c */ int replica_config_init(void); void replica_config_destroy(void); int get_replica_type(Replica *r); int replica_execute_cleanruv_task_ext(Object *r, ReplicaId rid); -void add_cleaned_rid(ReplicaId rid, Replica *r, char *maxcsn, char *forcing); +void add_cleaned_rid(cleanruv_data *data, char *maxcsn); int is_cleaned_rid(ReplicaId rid); int replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter, int *returncode, char *returntext, void *arg); @@ -706,29 +731,6 @@ void set_cleaned_rid(ReplicaId rid); void cleanruv_log(Slapi_Task *task, int rid, char *task_type, int sev_level, char *fmt, ...); char * replica_cleanallruv_get_local_maxcsn(ReplicaId rid, char *base_dn);
-#define CLEANRIDSIZ 64 /* maximum number for concurrent CLEANALLRUV tasks */ - -typedef struct _cleanruv_data -{ - Object *repl_obj; - Replica *replica; - ReplicaId rid; - Slapi_Task *task; - struct berval *payload; - CSN *maxcsn; - char *repl_root; - Slapi_DN *sdn; - char *certify; - char *force; -} cleanruv_data; - -typedef struct _cleanruv_purge_data -{ - int cleaned_rid; - const Slapi_DN *suffix_sdn; - char *replName; - char *replGen; -} cleanruv_purge_data;
/* replutil.c */ LDAPControl* create_managedsait_control(void); diff --git a/ldap/servers/plugins/replication/repl5_replica.c b/ldap/servers/plugins/replication/repl5_replica.c index c6a3135..dfa9d93 100644 --- a/ldap/servers/plugins/replication/repl5_replica.c +++ b/ldap/servers/plugins/replication/repl5_replica.c @@ -2298,6 +2298,7 @@ replica_check_for_tasks(Replica *r, Slapi_Entry *e) char csnstr[CSN_STRSIZE]; char *token = NULL; char *forcing; + PRBool original_task; char *csnpart; char *ridstr; char *iter = NULL; @@ -2327,8 +2328,15 @@ replica_check_for_tasks(Replica *r, Slapi_Entry *e) csn_init_by_string(maxcsn, csnpart); csn_as_string(maxcsn, PR_FALSE, csnstr); forcing = ldap_utf8strtok_r(iter, ":", &iter); - if(forcing == NULL){ + original_task = PR_TRUE; + if (forcing == NULL){ forcing = "no"; + } else if (!strcasecmp(forcing, "yes") || !strcasecmp(forcing, "no")) { + /* forcing was correctly set, lets try to read the original task flag */ + token = ldap_utf8strtok_r(iter, ":", &iter); + if (token && !atoi(token)) { + original_task = PR_FALSE; + } }
slapi_log_err(SLAPI_LOG_NOTICE, repl_plugin_name, "CleanAllRUV Task - cleanAllRUV task found, " @@ -2365,6 +2373,13 @@ replica_check_for_tasks(Replica *r, Slapi_Entry *e) data->force = slapi_ch_strdup(forcing); data->repl_root = NULL;
+ /* This is a corner case, a cleanAllRuv task was interrupted by a shutdown or a crash + * We retrieved from type_replicaCleanRUV if the cleanAllRuv request + * was received from a direct task ADD or if was received via + * the cleanAllRuv extop. + */ + data->original_task = original_task; + thread = PR_CreateThread(PR_USER_THREAD, replica_cleanallruv_thread_ext, (void *)data, PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD, PR_UNJOINABLE_THREAD, SLAPD_DEFAULT_THREAD_STACKSIZE); @@ -2455,6 +2470,12 @@ done: data->sdn = slapi_sdn_dup(r->repl_root); data->certify = slapi_ch_strdup(certify);
+ /* This is a corner case, a cleanAllRuv task was interrupted by a shutdown or a crash + * Let's assum this replica was the original receiver of the task. + * This flag has no impact on Abort cleanAllRuv + */ + data->original_task = PR_TRUE; + thread = PR_CreateThread(PR_USER_THREAD, replica_abort_task_thread, (void *)data, PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD, PR_UNJOINABLE_THREAD, SLAPD_DEFAULT_THREAD_STACKSIZE); diff --git a/ldap/servers/plugins/replication/repl5_replica_config.c b/ldap/servers/plugins/replication/repl5_replica_config.c index 1b103a2..d30a35c 100644 --- a/ldap/servers/plugins/replication/repl5_replica_config.c +++ b/ldap/servers/plugins/replication/repl5_replica_config.c @@ -1725,6 +1725,11 @@ replica_execute_cleanall_ruv_task (Object *r, ReplicaId rid, Slapi_Task *task, c data->repl_root = slapi_ch_strdup(basedn); data->force = slapi_ch_strdup(force_cleaning);
+ /* It is either a consequence of a direct ADD cleanAllRuv task + * or modify of the replica to add nsds5task: cleanAllRuv + */ + data->original_task = PR_TRUE; + thread = PR_CreateThread(PR_USER_THREAD, replica_cleanallruv_thread, (void *)data, PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD, PR_UNJOINABLE_THREAD, SLAPD_DEFAULT_THREAD_STACKSIZE); @@ -1848,7 +1853,7 @@ replica_cleanallruv_thread(void *arg) /* * Add the cleanallruv task to the repl config - so we can handle restarts */ - add_cleaned_rid(data->rid, data->replica, csnstr, data->force); /* marks config that we started cleaning a rid */ + add_cleaned_rid(data, csnstr); /* marks config that we started cleaning a rid */ cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, SLAPI_LOG_INFO, "Cleaning rid (%d)...", data->rid); /* * First, wait for the maxcsn to be covered @@ -2025,7 +2030,13 @@ done: */ delete_cleaned_rid_config(data); check_replicas_are_done_cleaning(data); - remove_keep_alive_entry(data->task, data->rid, data->repl_root); + if (data->original_task) { + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, SLAPI_LOG_INFO, "Original task deletes Keep alive entry (%d).", data->rid); + remove_keep_alive_entry(data->task, data->rid, data->repl_root); + } else { + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, SLAPI_LOG_INFO, "Propagated task does not delete Keep alive entry (%d).", data->rid); + } + clean_agmts(data); remove_cleaned_rid(data->rid); cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, SLAPI_LOG_INFO, "Successfully cleaned rid(%d).", data->rid); @@ -2176,7 +2187,7 @@ check_replicas_are_done_cleaning(cleanruv_data *data ) "Waiting for all the replicas to finish cleaning...");
csn_as_string(data->maxcsn, PR_FALSE, csnstr); - filter = PR_smprintf("(%s=%d:%s:%s)", type_replicaCleanRUV,(int)data->rid, csnstr, data->force); + filter = PR_smprintf("(%s=%d:%s:%s:%d)", type_replicaCleanRUV, (int)data->rid, csnstr, data->force, data->original_task ? 1 : 0); while(not_all_cleaned && !is_task_aborted(data->rid) && !slapi_is_shutting_down()){ agmt_obj = agmtlist_get_first_agreement_for_replica (data->replica); if(agmt_obj == NULL){ @@ -2650,7 +2661,7 @@ set_cleaned_rid(ReplicaId rid) * Add the rid and maxcsn to the repl config (so we can resume after a server restart) */ void -add_cleaned_rid(ReplicaId rid, Replica *r, char *maxcsn, char *forcing) +add_cleaned_rid(cleanruv_data *cleanruv_data, char *maxcsn) { Slapi_PBlock *pb; struct berval *vals[2]; @@ -2660,6 +2671,16 @@ add_cleaned_rid(ReplicaId rid, Replica *r, char *maxcsn, char *forcing) char data[CSN_STRSIZE + 10]; char *dn; int rc; + ReplicaId rid; + Replica *r; + char *forcing; + + if (data == NULL) { + return; + } + rid = cleanruv_data->rid; + r = cleanruv_data->replica; + forcing = cleanruv_data->force;
if(r == NULL || maxcsn == NULL){ return; @@ -2667,7 +2688,7 @@ add_cleaned_rid(ReplicaId rid, Replica *r, char *maxcsn, char *forcing) /* * Write the rid & maxcsn to the config entry */ - val.bv_len = PR_snprintf(data, sizeof(data),"%d:%s:%s", rid, maxcsn, forcing); + val.bv_len = PR_snprintf(data, sizeof(data), "%d:%s:%s:%d", rid, maxcsn, forcing, cleanruv_data->original_task ? 1 : 0); dn = replica_get_dn(r); pb = slapi_pblock_new(); mod.mod_op = LDAP_MOD_ADD|LDAP_MOD_BVALUES; @@ -3099,6 +3120,7 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb __attribute__((unused)), data->repl_root = slapi_ch_strdup(base_dn); data->sdn = NULL; data->certify = slapi_ch_strdup(certify_all); + data->original_task = PR_TRUE;
thread = PR_CreateThread(PR_USER_THREAD, replica_abort_task_thread, (void *)data, PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD, diff --git a/ldap/servers/plugins/replication/repl_extop.c b/ldap/servers/plugins/replication/repl_extop.c index 814effb..a91047b 100644 --- a/ldap/servers/plugins/replication/repl_extop.c +++ b/ldap/servers/plugins/replication/repl_extop.c @@ -1561,6 +1561,7 @@ multimaster_extop_abort_cleanruv(Slapi_PBlock *pb) data->rid = rid; data->repl_root = slapi_ch_strdup(repl_root); data->certify = slapi_ch_strdup(certify_all); + data->original_task = PR_FALSE; /* * Set the aborted rid and stop the cleaning */ @@ -1702,6 +1703,7 @@ multimaster_extop_cleanruv(Slapi_PBlock *pb) data->payload = slapi_ch_bvdup(extop_payload); data->force = slapi_ch_strdup(force); data->repl_root = slapi_ch_strdup(repl_root); + data->original_task = PR_FALSE;
thread = PR_CreateThread(PR_USER_THREAD, replica_cleanallruv_thread_ext, (void *)data, PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD,
389-commits@lists.fedoraproject.org