ldap/servers/plugins/replication/repl5_replica.c | 44 ++---- ldap/servers/plugins/replication/repl5_ruv.c | 147 +++++++++++++++++++---- ldap/servers/plugins/replication/repl5_ruv.h | 16 ++ 3 files changed, 158 insertions(+), 49 deletions(-)
New commits: commit 6bac1a7876ddd2a1fe986505f16aa0330ab4a671 Author: Rich Megginson rmeggins@redhat.com Date: Thu Sep 8 20:03:14 2011 -0600
Bug 590826 - Reloading database from ldif causes changelog to emit "data no longer matches" errors
https://bugzilla.redhat.com/show_bug.cgi?id=590826 Resolves: bug 590826 Bug Description: Reloading database from ldif causes changelog to emit "data no longer matches" errors Reviewed by: nkinder, nhosoi (Thanks!) Branch: master Fix Description: When there are obsolete or decommissioned masters in the RUV, this should not invalidate the changelog. Instead, warn the user that there are replicas in the database RUV that are not in the changelog max RUV. In this case, the CLEANRUV task can be used to remove the obsolete masters from the database RUV. I had to add a function to generate a string representation of the replica RUVElement* for logging purposes, and used that function elsewhere. The new function ruv_compare_ruv should be used instead of ruv_contains_ruv since it gives more flexibility about logging and handling different cases. Platforms tested: RHEL6 x86_64 Flag Day: no Doc impact: no
diff --git a/ldap/servers/plugins/replication/repl5_replica.c b/ldap/servers/plugins/replication/repl5_replica.c index b2ebf93..92322ee 100644 --- a/ldap/servers/plugins/replication/repl5_replica.c +++ b/ldap/servers/plugins/replication/repl5_replica.c @@ -1431,7 +1431,6 @@ int replica_check_for_data_reload (Replica *r, void *arg) RUV *upper_bound_ruv = NULL; RUV *r_ruv = NULL; Object *r_obj, *ruv_obj; - int cl_cover_be, be_cover_cl;
PR_ASSERT (r);
@@ -1452,6 +1451,7 @@ int replica_check_for_data_reload (Replica *r, void *arg)
if (upper_bound_ruv) { + char ebuf[BUFSIZ]; ruv_obj = replica_get_ruv (r); r_ruv = object_get_data (ruv_obj); PR_ASSERT (r_ruv); @@ -1474,40 +1474,19 @@ int replica_check_for_data_reload (Replica *r, void *arg) * sessions. */
- be_cover_cl = ruv_covers_ruv (r_ruv, upper_bound_ruv); - cl_cover_be = ruv_covers_ruv (upper_bound_ruv, r_ruv); - if (!cl_cover_be) + rc = ruv_compare_ruv(upper_bound_ruv, "changelog max RUV", r_ruv, "database RUV", 0, SLAPI_LOG_FATAL); + if (RUV_COMP_IS_FATAL(rc)) { - /* the data was reloaded, or we had disorderly shutdown between - * writing RUV and CL, and we can no longer use existing CL */ - char ebuf[BUFSIZ]; - char cl_csn_str[CSN_STRSIZE] = {0}; - char be_csn_str[CSN_STRSIZE] = {0}; - CSN *cl_csn = NULL; - CSN *be_csn = NULL; - - if (ruv_get_max_csn( r_ruv, &be_csn ) == RUV_SUCCESS) { - csn_as_string( be_csn, PR_FALSE, be_csn_str ); - csn_free( &be_csn ); - } - - if (ruv_get_max_csn( upper_bound_ruv, &cl_csn ) == RUV_SUCCESS) { - csn_as_string( cl_csn, PR_FALSE, cl_csn_str ); - csn_free( &cl_csn ); - } - /* create a temporary replica object to conform to the interface */ r_obj = object_new (r, NULL);
/* We can't use existing changelog - remove existing file */ slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, "replica_check_for_data_reload: " - "Warning: data for replica %s does not match the data in the changelog " - "(replica data (%s) %s changelog (%s)). Recreating the changelog file. " + "Warning: data for replica %s does not match the data in the changelog. " + "Recreating the changelog file. " "This could affect replication with replica's consumers in which case the " "consumers should be reinitialized.\n", - escape_string(slapi_sdn_get_dn(r->repl_root),ebuf), - (*be_csn_str=='\0' ? "unknown" : be_csn_str), - ((!be_cover_cl) ? "<>" : ">"), (*cl_csn_str=='\0' ? "unknown" : cl_csn_str)); + escape_string(slapi_sdn_get_dn(r->repl_root),ebuf));
rc = cl5DeleteDBSync (r_obj);
@@ -1519,6 +1498,17 @@ int replica_check_for_data_reload (Replica *r, void *arg) rc = replica_log_ruv_elements (r); } } + else if (rc) + { + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, "replica_check_for_data_reload: " + "Warning: for replica %s there were some differences between the changelog max RUV and the " + "database RUV. If there are obsolete elements in the database RUV, you " + "should remove them using CLEANRUV task. If they are not obsolete, " + "you should check their status to see why there are no changes from those " + "servers in the changelog.\n", + escape_string(slapi_sdn_get_dn(r->repl_root),ebuf)); + rc = 0; + }
object_release (ruv_obj); } diff --git a/ldap/servers/plugins/replication/repl5_ruv.c b/ldap/servers/plugins/replication/repl5_ruv.c index ce0da4d..2e4af87 100644 --- a/ldap/servers/plugins/replication/repl5_ruv.c +++ b/ldap/servers/plugins/replication/repl5_ruv.c @@ -1006,6 +1006,34 @@ ruv_enumerate_elements (const RUV *ruv, FNEnumRUV fn, void *arg) return rc; }
+void +ruv_element_to_string(RUVElement *ruvelem, struct berval *bv, char *buf, size_t bufsize) +{ + char csnStr1[CSN_STRSIZE]; + char csnStr2[CSN_STRSIZE]; + const char *fmtstr = "%s%d%s%s}%s%s%s%s"; + if (buf && bufsize) { + PR_snprintf(buf, bufsize, fmtstr, + prefix_ruvcsn, ruvelem->rid, + ruvelem->replica_purl == NULL ? "" : " ", + ruvelem->replica_purl == NULL ? "" : ruvelem->replica_purl, + ruvelem->min_csn == NULL ? "" : " ", + ruvelem->min_csn == NULL ? "" : csn_as_string (ruvelem->min_csn, PR_FALSE, csnStr1), + ruvelem->csn == NULL ? "" : " ", + ruvelem->csn == NULL ? "" : csn_as_string (ruvelem->csn, PR_FALSE, csnStr2)); + } else { + bv->bv_val = slapi_ch_smprintf(fmtstr, + prefix_ruvcsn, ruvelem->rid, + ruvelem->replica_purl == NULL ? "" : " ", + ruvelem->replica_purl == NULL ? "" : ruvelem->replica_purl, + ruvelem->min_csn == NULL ? "" : " ", + ruvelem->min_csn == NULL ? "" : csn_as_string (ruvelem->min_csn, PR_FALSE, csnStr1), + ruvelem->csn == NULL ? "" : " ", + ruvelem->csn == NULL ? "" : csn_as_string (ruvelem->csn, PR_FALSE, csnStr2)); + bv->bv_len = strlen(bv->bv_val); + } +} + /* * Convert a replica update vector to a NULL-terminated array * of bervals. The caller is responsible for freeing the bervals. @@ -1025,8 +1053,6 @@ ruv_to_bervals(const RUV *ruv, struct berval ***bvals) int count; int i; RUVElement *replica; - char csnStr1 [CSN_STRSIZE]; - char csnStr2 [CSN_STRSIZE]; int cookie; slapi_rwlock_rdlock (ruv->lock); count = dl_get_count (ruv->elements) + 2; @@ -1040,15 +1066,7 @@ ruv_to_bervals(const RUV *ruv, struct berval ***bvals) i++, replica = dl_get_next (ruv->elements, &cookie)) { returned_bervals[i] = (struct berval *)slapi_ch_malloc(sizeof(struct berval)); - returned_bervals[i]->bv_val = slapi_ch_smprintf("%s%d%s%s}%s%s%s%s", - prefix_ruvcsn, replica->rid, - replica->replica_purl == NULL ? "" : " ", - replica->replica_purl == NULL ? "" : replica->replica_purl, - replica->min_csn == NULL ? "" : " ", - replica->min_csn == NULL ? "" : csn_as_string (replica->min_csn, PR_FALSE, csnStr1), - replica->csn == NULL ? "" : " ", - replica->csn == NULL ? "" : csn_as_string (replica->csn, PR_FALSE, csnStr2)); - returned_bervals[i]->bv_len = strlen(returned_bervals[i]->bv_val); + ruv_element_to_string(replica, returned_bervals[i], NULL, 0); } slapi_rwlock_unlock (ruv->lock); return_value = RUV_SUCCESS; @@ -1072,29 +1090,21 @@ ruv_to_smod(const RUV *ruv, Slapi_Mod *smod) struct berval val; RUVElement *replica; int cookie; - char csnStr1 [CSN_STRSIZE]; - char csnStr2 [CSN_STRSIZE]; #define B_SIZ 1024 char buf[B_SIZ]; slapi_rwlock_rdlock (ruv->lock); slapi_mod_init (smod, dl_get_count (ruv->elements) + 1); slapi_mod_set_type (smod, type_ruvElement); slapi_mod_set_operation (smod, LDAP_MOD_REPLACE | LDAP_MOD_BVALUES); - PR_snprintf(buf, B_SIZ, "%s %s", prefix_replicageneration, ruv->replGen); + PR_snprintf(buf, sizeof(buf), "%s %s", prefix_replicageneration, ruv->replGen); val.bv_val = buf; val.bv_len = strlen(buf); slapi_mod_add_value(smod, &val); for (replica = dl_get_first (ruv->elements, &cookie); replica; replica = dl_get_next (ruv->elements, &cookie)) { - - PR_snprintf(buf, B_SIZ, "%s%d%s%s}%s%s%s%s", prefix_ruvcsn, replica->rid, - replica->replica_purl == NULL ? "" : " ", - replica->replica_purl == NULL ? "" : replica->replica_purl, - replica->min_csn == NULL ? "" : " ", - replica->min_csn == NULL ? "" : csn_as_string (replica->min_csn, PR_FALSE, csnStr1), - replica->csn == NULL ? "" : " ", - replica->csn == NULL ? "" : csn_as_string (replica->csn, PR_FALSE, csnStr2)); + ruv_element_to_string(replica, NULL, buf, sizeof(buf)); + val.bv_val = buf; val.bv_len = strlen(buf); slapi_mod_add_value(smod, &val); } @@ -1172,6 +1182,99 @@ ruv_covers_ruv(const RUV *covering_ruv, const RUV *covered_ruv) return return_value; }
+/* + * This compares two ruvs to see if they are compatible. This is + * used, for example, when the data is reloaded, to see if the ruv + * from the database is compatible with the ruv from the changelog. + * If the replica generation is empty or does not match, the data + * is not compatible. + * If the maxcsns are not compatible, the ruvs are not compatible. + * However, if the first ruv has replica IDs that the second RUV + * does not have, and this is the only difference, the application + * may allow that with a warning. + */ +int +ruv_compare_ruv(const RUV *ruv1, const char *ruv1name, const RUV *ruv2, const char *ruv2name, int strict, int loglevel) +{ + int rc = 0; + int ii = 0; + const RUV *ruvalist[] = {ruv1, ruv2}; + const RUV *ruvblist[] = {ruv2, ruv1}; + int missinglist[2] = {0, 0}; + const char *ruvanames[] = {ruv1name, ruv2name}; + const char *ruvbnames[] = {ruv2name, ruv1name}; + const int nitems = 2; + + /* compare replica generations first */ + if (ruv1->replGen == NULL || ruv2->replGen == NULL) { + slapi_log_error(loglevel, repl_plugin_name, + "ruv_compare_ruv: RUV [%s] is missing the replica generation\n", + ruv1->replGen ? ruv2name : ruv1name); + return RUV_COMP_NO_GENERATION; + } + + if (strcasecmp (ruv1->replGen, ruv2->replGen)) { + slapi_log_error(loglevel, repl_plugin_name, + "ruv_compare_ruv: RUV [%s] replica generation [%s] does not match RUV [%s] [%s]\n", + ruv1name, ruv1->replGen, ruv2name, ruv2->replGen); + return RUV_COMP_GENERATION_DIFFERS; + } + + /* replica generation is the same, now compare element by element */ + for (ii = 0; ii < nitems; ++ii) { + int cookie; + const RUV *ruva = ruvalist[ii]; + const RUV *ruvb = ruvblist[ii]; + int *missing = &missinglist[ii]; + const char *ruvaname = ruvanames[ii]; + const char *ruvbname = ruvbnames[ii]; + RUVElement *replicab; + + for (replicab = dl_get_first (ruvb->elements, &cookie); + NULL != replicab; + replicab = dl_get_next (ruvb->elements, &cookie)) { + if (replicab->csn) { + ReplicaId rid = csn_get_replicaid(replicab->csn); + RUVElement *replicaa = ruvGetReplica(ruva, rid); + char csnstra[CSN_STRSIZE]; + char csnstrb[CSN_STRSIZE]; + char ruvelem[1024]; + ruv_element_to_string(replicab, NULL, ruvelem, sizeof(ruvelem)); + csn_as_string(replicab->csn, PR_FALSE, csnstrb); + if (replicaa == NULL) { + (*missing)++; + slapi_log_error(loglevel, repl_plugin_name, + "ruv_compare_ruv: RUV [%s] does not contain element [%s] " + "which is present in RUV [%s]\n", + ruvaname, ruvelem, ruvbname); + } else if (strict && (csn_compare (replicab->csn, replicaa->csn) >= 0)) { + csn_as_string(replicaa->csn, PR_FALSE, csnstra); + slapi_log_error(loglevel, repl_plugin_name, + "ruv_compare_ruv: the max CSN [%s] from RUV [%s] is larger " + "than or equal to the max CSN [%s] from RUV [%s] for element [%s]\n", + csnstrb, ruvbname, csnstra, ruvaname, ruvelem); + rc = RUV_COMP_CSN_DIFFERS; + } else if (csn_compare (replicab->csn, replicaa->csn) > 0) { + csn_as_string(replicaa->csn, PR_FALSE, csnstra); + slapi_log_error(loglevel, repl_plugin_name, + "ruv_compare_ruv: the max CSN [%s] from RUV [%s] is larger " + "than the max CSN [%s] from RUV [%s] for element [%s]\n", + csnstrb, ruvbname, csnstra, ruvaname, ruvelem); + rc = RUV_COMP_CSN_DIFFERS; + } + } + } + } + if (!rc) { + if (missinglist[0]) { + rc = RUV_COMP_RUV1_MISSING; + } else if (missinglist[1]) { + rc = RUV_COMP_RUV2_MISSING; + } + } + return rc; +} + PRInt32 ruv_replica_count (const RUV *ruv) { diff --git a/ldap/servers/plugins/replication/repl5_ruv.h b/ldap/servers/plugins/replication/repl5_ruv.h index 0a0777e..d329dc3 100644 --- a/ldap/servers/plugins/replication/repl5_ruv.h +++ b/ldap/servers/plugins/replication/repl5_ruv.h @@ -69,6 +69,21 @@ enum RUV_COVERS_CSN };
+/* return values from ruv_compare_ruv */ +enum +{ + RUV_COMP_SUCCESS=0, + RUV_COMP_NO_GENERATION, /* one or both of the RUVs is missing the replica generation */ + RUV_COMP_GENERATION_DIFFERS, /* the RUVs have different replica generations */ + /* one of the maxcsns in one of the RUVs is out of date with respect to the + corresponding maxcsn in the corresponding replica in the other RUV */ + RUV_COMP_CSN_DIFFERS, + RUV_COMP_RUV1_MISSING, /* ruv2 contains replicas not in ruv1 - CLEANRUV */ + RUV_COMP_RUV2_MISSING /* ruv1 contains replicas not in ruv2 */ +}; + +#define RUV_COMP_IS_FATAL(ruvcomp) (ruvcomp && (ruvcomp < RUV_COMP_RUV1_MISSING)) + typedef struct ruv_enum_data { CSN *csn; @@ -122,6 +137,7 @@ PRBool ruv_has_both_csns(const RUV *ruv); PRBool ruv_is_newer (Object *sruv, Object *cruv); void ruv_force_csn_update (RUV *ruv, CSN *csn); void ruv_insert_dummy_min_csn (RUV *ruv); +int ruv_compare_ruv(const RUV *ruv1, const char *ruv1name, const RUV *ruv2, const char *ruv2name, int strict, int loglevel); #ifdef __cplusplus } #endif
389-commits@lists.fedoraproject.org