dirsrvtests/tickets/ticket47871_test.py | 306 ++++++++++++++++++++++++++++ ldap/servers/plugins/retrocl/retrocl.h | 2 ldap/servers/plugins/retrocl/retrocl_trim.c | 163 +++++++------- 3 files changed, 393 insertions(+), 78 deletions(-)
New commits: commit 495151b7308e257c3813842f8b1828447fe1c5dd Author: Thierry bordaz (tbordaz) tbordaz@redhat.com Date: Thu Aug 21 11:49:18 2014 +0200
Ticket 47871 - 389-ds-base-1.3.2.21-1.fc20 crashed over the weekend
Bug Description: Retro-changelog trimming uses a slapi_search_internal_callback_pb to check if the oldest record needs to be trim. The callback function handle_getchangerecord_search stores the entry in the callback structure. Later it evaluates the entry to check its changetime. This is not valid, as the entry is only valid inside the callback.
Fix Description: The callback function 'handle_getchangetime_search' evaluate the changetime and returns it in the callback struct.
https://fedorahosted.org/389/ticket/47871
Reviewed by: Noriko Hosoi (Thanks Noriko !)
Platforms tested: F17/F20
Flag Day: no
Doc impact: no
diff --git a/dirsrvtests/tickets/ticket47871_test.py b/dirsrvtests/tickets/ticket47871_test.py new file mode 100644 index 0000000..1d1351b --- /dev/null +++ b/dirsrvtests/tickets/ticket47871_test.py @@ -0,0 +1,306 @@ +''' +Created on Nov 7, 2013 + +@author: tbordaz +''' +import os +import sys +import time +import ldap +import logging +import socket +import time +import logging +import pytest +import re +from lib389 import DirSrv, Entry, tools +from lib389.tools import DirSrvTools +from lib389._constants import * +from lib389.properties import * +from constants import * + +logging.getLogger(__name__).setLevel(logging.DEBUG) +log = logging.getLogger(__name__) + +installation_prefix = None + +TEST_REPL_DN = "cn=test_repl, %s" % SUFFIX +ENTRY_DN = "cn=test_entry, %s" % SUFFIX + +OTHER_NAME = 'other_entry' +MAX_OTHERS = 10 + +ATTRIBUTES = [ 'street', 'countryName', 'description', 'postalAddress', 'postalCode', 'title', 'l', 'roomNumber' ] + +class TopologyMasterConsumer(object): + def __init__(self, master, consumer): + master.open() + self.master = master + + consumer.open() + self.consumer = consumer + + def __repr__(self): + return "Master[%s] -> Consumer[%s" % (self.master, self.consumer) + + +@pytest.fixture(scope="module") +def topology(request): + ''' + This fixture is used to create a replicated topology for the 'module'. + The replicated topology is MASTER -> Consumer. + At the beginning, It may exists a master instance and/or a consumer instance. + It may also exists a backup for the master and/or the consumer. + + Principle: + If master instance exists: + restart it + If consumer instance exists: + restart it + If backup of master AND backup of consumer exists: + create or rebind to consumer + create or rebind to master + + restore master from backup + restore consumer from backup + else: + Cleanup everything + remove instances + remove backups + Create instances + Initialize replication + Create backups + ''' + global installation_prefix + + if installation_prefix: + args_instance[SER_DEPLOYED_DIR] = installation_prefix + + master = DirSrv(verbose=False) + consumer = DirSrv(verbose=False) + + # Args for the master instance + args_instance[SER_HOST] = HOST_MASTER + args_instance[SER_PORT] = PORT_MASTER + args_instance[SER_SERVERID_PROP] = SERVERID_MASTER + args_master = args_instance.copy() + master.allocate(args_master) + + # Args for the consumer instance + args_instance[SER_HOST] = HOST_CONSUMER + args_instance[SER_PORT] = PORT_CONSUMER + args_instance[SER_SERVERID_PROP] = SERVERID_CONSUMER + args_consumer = args_instance.copy() + consumer.allocate(args_consumer) + + + # Get the status of the backups + backup_master = master.checkBackupFS() + backup_consumer = consumer.checkBackupFS() + + # Get the status of the instance and restart it if it exists + instance_master = master.exists() + if instance_master: + master.stop(timeout=10) + master.start(timeout=10) + + instance_consumer = consumer.exists() + if instance_consumer: + consumer.stop(timeout=10) + consumer.start(timeout=10) + + if backup_master and backup_consumer: + # The backups exist, assuming they are correct + # we just re-init the instances with them + if not instance_master: + master.create() + # Used to retrieve configuration information (dbdir, confdir...) + master.open() + + if not instance_consumer: + consumer.create() + # Used to retrieve configuration information (dbdir, confdir...) + consumer.open() + + # restore master from backup + master.stop(timeout=10) + master.restoreFS(backup_master) + master.start(timeout=10) + + # restore consumer from backup + consumer.stop(timeout=10) + consumer.restoreFS(backup_consumer) + consumer.start(timeout=10) + else: + # We should be here only in two conditions + # - This is the first time a test involve master-consumer + # so we need to create everything + # - Something weird happened (instance/backup destroyed) + # so we discard everything and recreate all + + # Remove all the backups. So even if we have a specific backup file + # (e.g backup_master) we clear all backups that an instance my have created + if backup_master: + master.clearBackupFS() + if backup_consumer: + consumer.clearBackupFS() + + # Remove all the instances + if instance_master: + master.delete() + if instance_consumer: + consumer.delete() + + # Create the instances + master.create() + master.open() + consumer.create() + consumer.open() + + # + # Now prepare the Master-Consumer topology + # + # First Enable replication + master.replica.enableReplication(suffix=SUFFIX, role=REPLICAROLE_MASTER, replicaId=REPLICAID_MASTER) + consumer.replica.enableReplication(suffix=SUFFIX, role=REPLICAROLE_CONSUMER) + + # Initialize the supplier->consumer + + properties = {RA_NAME: r'meTo_$host:$port', + RA_BINDDN: defaultProperties[REPLICATION_BIND_DN], + RA_BINDPW: defaultProperties[REPLICATION_BIND_PW], + RA_METHOD: defaultProperties[REPLICATION_BIND_METHOD], + RA_TRANSPORT_PROT: defaultProperties[REPLICATION_TRANSPORT]} + repl_agreement = master.agreement.create(suffix=SUFFIX, host=consumer.host, port=consumer.port, properties=properties) + + if not repl_agreement: + log.fatal("Fail to create a replica agreement") + sys.exit(1) + + log.debug("%s created" % repl_agreement) + master.agreement.init(SUFFIX, HOST_CONSUMER, PORT_CONSUMER) + master.waitForReplInit(repl_agreement) + + # Check replication is working fine + master.add_s(Entry((TEST_REPL_DN, { + 'objectclass': "top person".split(), + 'sn': 'test_repl', + 'cn': 'test_repl'}))) + loop = 0 + while loop <= 10: + try: + ent = consumer.getEntry(TEST_REPL_DN, ldap.SCOPE_BASE, "(objectclass=*)") + break + except ldap.NO_SUCH_OBJECT: + time.sleep(1) + loop += 1 + + # Time to create the backups + master.stop(timeout=10) + master.backupfile = master.backupFS() + master.start(timeout=10) + + consumer.stop(timeout=10) + consumer.backupfile = consumer.backupFS() + consumer.start(timeout=10) + + # clear the tmp directory + master.clearTmpDir(__file__) + + # + # Here we have two instances master and consumer + # with replication working. Either coming from a backup recovery + # or from a fresh (re)init + # Time to return the topology + return TopologyMasterConsumer(master, consumer) + + +def test_ticket47871_init(topology): + """ + Initialize the test environment + """ + topology.master.plugins.enable(name=PLUGIN_RETRO_CHANGELOG) + mod = [(ldap.MOD_REPLACE, 'nsslapd-changelogmaxage', "10s"), # 10 second triming + (ldap.MOD_REPLACE, 'nsslapd-changelog-trim-interval', "5s")] + topology.master.modify_s("cn=%s,%s" % (PLUGIN_RETRO_CHANGELOG, DN_PLUGIN), mod) + #topology.master.plugins.enable(name=PLUGIN_MEMBER_OF) + #topology.master.plugins.enable(name=PLUGIN_REFER_INTEGRITY) + topology.master.stop(timeout=10) + topology.master.start(timeout=10) + + topology.master.log.info("test_ticket47871_init topology %r" % (topology)) + # the test case will check if a warning message is logged in the + # error log of the supplier + topology.master.errorlog_file = open(topology.master.errlog, "r") + + +def test_ticket47871_1(topology): + ''' + ADD entries and check they are all in the retrocl + ''' + # add dummy entries + for cpt in range(MAX_OTHERS): + name = "%s%d" % (OTHER_NAME, cpt) + topology.master.add_s(Entry(("cn=%s,%s" % (name, SUFFIX), { + 'objectclass': "top person".split(), + 'sn': name, + 'cn': name}))) + + topology.master.log.info("test_ticket47871_init: %d entries ADDed %s[0..%d]" % (MAX_OTHERS, OTHER_NAME, MAX_OTHERS-1)) + + # Check the number of entries in the retro changelog + time.sleep(1) + ents = topology.master.search_s(RETROCL_SUFFIX, ldap.SCOPE_ONELEVEL, "(objectclass=*)") + assert len(ents) == MAX_OTHERS + topology.master.log.info("Added entries are") + for ent in ents: + topology.master.log.info("%s" % ent.dn) + +def test_ticket47871_2(topology): + ''' + Wait until there is just a last entries + ''' + MAX_TRIES = 10 + TRY_NO = 1 + while TRY_NO <= MAX_TRIES: + time.sleep(6) # at least 1 trimming occurred + ents = topology.master.search_s(RETROCL_SUFFIX, ldap.SCOPE_ONELEVEL, "(objectclass=*)") + assert len(ents) <= MAX_OTHERS + topology.master.log.info("\nTry no %d it remains %d entries" % (TRY_NO, len(ents))) + for ent in ents: + topology.master.log.info("%s" % ent.dn) + if len(ents) > 1: + TRY_NO += 1 + else: + break + assert TRY_NO <= MAX_TRIES + assert len(ents) <= 1 + + + +def test_ticket47871_final(topology): + topology.master.stop(timeout=10) + topology.consumer.stop(timeout=10) + +def run_isolated(): + ''' + run_isolated is used to run these test cases independently of a test scheduler (xunit, py.test..) + To run isolated without py.test, you need to + - edit this file and comment '@pytest.fixture' line before 'topology' function. + - set the installation prefix + - run this program + ''' + global installation_prefix + installation_prefix = None + + topo = topology(True) + test_ticket47871_init(topo) + test_ticket47871_1(topo) + test_ticket47871_2(topo) + + test_ticket47871_final(topo) + + +if __name__ == '__main__': + run_isolated() + diff --git a/ldap/servers/plugins/retrocl/retrocl.h b/ldap/servers/plugins/retrocl/retrocl.h index 052c369..27f55dc 100644 --- a/ldap/servers/plugins/retrocl/retrocl.h +++ b/ldap/servers/plugins/retrocl/retrocl.h @@ -62,7 +62,7 @@ typedef unsigned long changeNumber; typedef struct _cnum_result_t { int crt_nentries; /* number of entries returned from search */ int crt_err; /* err returned from backend */ - Slapi_Entry *crt_entry; /* The entry returned from the backend */ + time_t crt_time; /* The changetime of the entry retrieved from the backend */ } cnum_result_t;
typedef struct _cnumRet { diff --git a/ldap/servers/plugins/retrocl/retrocl_trim.c b/ldap/servers/plugins/retrocl/retrocl_trim.c index 4d17377..ceb3467 100644 --- a/ldap/servers/plugins/retrocl/retrocl_trim.c +++ b/ldap/servers/plugins/retrocl/retrocl_trim.c @@ -131,51 +131,68 @@ delete_changerecord( changeNumber cnum ) }
/* - * Function: handle_getchangerecord_result + * Function: handle_getchangetime_result * Arguments: op - pointer to Operation struct for this operation * err - error code returned from search * Returns: nothing - * Description: result handler for get_changerecord(). Sets the crt_err + * Description: result handler for get_changetime(). Sets the crt_err * field of the cnum_result_t struct to the error returned * from the backend. */ static void -handle_getchangerecord_result( int err, void *callback_data ) +handle_getchangetime_result( int err, void *callback_data ) { cnum_result_t *crt = callback_data;
if ( crt == NULL ) { slapi_log_error( SLAPI_LOG_FATAL, RETROCL_PLUGIN_NAME, - "handle_getchangerecord_result: callback_data NULL\n" ); + "handle_getchangetime_result: callback_data NULL\n" ); } else { crt->crt_err = err; } }
/* - * Function: handle_getchangerecord_search + * Function: handle_getchangetime_search * Arguments: op - pointer to Operation struct for this operation * e - entry returned by backend * Returns: 0 in all cases - * Description: Search result operation handler for get_changerecord(). + * Description: Search result operation handler for get_changetime(). * Sets fields in the cnum_result_t struct pointed to by * op->o_handler_data. */ static int -handle_getchangerecord_search( Slapi_Entry *e, void *callback_data) +handle_getchangetime_search( Slapi_Entry *e, void *callback_data) { cnum_result_t *crt = callback_data; - - if ( crt == NULL ) { - slapi_log_error( SLAPI_LOG_FATAL, RETROCL_PLUGIN_NAME, - "handle_getchangerecord_search: op->o_handler_data NULL\n" ); - } else if ( crt->crt_nentries > 0 ) { - /* only return the first entry, I guess */ - slapi_log_error( SLAPI_LOG_FATAL, RETROCL_PLUGIN_NAME, - "handle_getchangerecord_search: multiple entries returned\n" ); + int rc; + Slapi_Attr *attr; + + if (crt == NULL) { + slapi_log_error(SLAPI_LOG_FATAL, RETROCL_PLUGIN_NAME, + "handle_getchangetime_search: op->o_handler_data NULL\n"); + } else if (crt->crt_nentries > 0) { + /* only return the first entry, I guess */ + slapi_log_error(SLAPI_LOG_FATAL, RETROCL_PLUGIN_NAME, + "handle_getchangetime_search: multiple entries returned\n"); } else { - crt->crt_nentries++; - crt->crt_entry = e; + crt->crt_nentries++; + crt->crt_time = 0; + + if (NULL != e) { + Slapi_Value *sval = NULL; + const struct berval *val = NULL; + rc = slapi_entry_attr_find(e, attr_changetime, &attr); + /* Bug 624442: Logic checking for lack of timestamp was + reversed. */ + if (0 != rc || slapi_attr_first_value(attr, &sval) == -1 || + (val = slapi_value_get_berval(sval)) == NULL || + NULL == val->bv_val) { + crt->crt_time = 0; + } else { + crt->crt_time = parse_localTime(val->bv_val); + } + } }
return 0; @@ -183,47 +200,52 @@ handle_getchangerecord_search( Slapi_Entry *e, void *callback_data)
/* - * Function: get_changerecord + * Function: get_changetime * Arguments: cnum - number of change record to retrieve - * Returns: Pointer to an entry structure. The caller must free the entry. - * If "err" is non-NULL, an error code is returned in the memory - * location it points to. - * Description: Retrieve the change record entry whose number is "cnum". + * Returns: Taking the attr_changetime of the 'cnum' entry, + * it converts it into time_t (parse_localTime) and returns this time value. + * It returns 0 in the following cases: + * - changerecord entry has not attr_changetime + * - attr_changetime attribute has no value + * - attr_changetime attribute value is empty + * + * Description: Retrieve attr_changetime ("changetime") from a changerecord whose number is "cnum". */ -static Slapi_Entry *get_changerecord( changeNumber cnum, int *err ) +static time_t get_changetime( changeNumber cnum, int *err ) { - cnum_result_t crt, *crtp = &crt; - char fstr[ 16 + CNUMSTR_LEN + 2 ]; + cnum_result_t crt, *crtp = &crt; + char fstr[ 16 + CNUMSTR_LEN + 2 ]; Slapi_PBlock *pb;
- if ( cnum == 0UL ) { - if ( err != NULL ) { - *err = LDAP_PARAM_ERROR; - } - return NULL; + if (cnum == 0UL) { + if (err != NULL) { + *err = LDAP_PARAM_ERROR; + } + return 0; } - crtp->crt_nentries = crtp->crt_err = 0; crtp->crt_entry = NULL; - PR_snprintf( fstr, sizeof(fstr), "%s=%ld", attr_changenumber, cnum ); - - pb = slapi_pblock_new (); - slapi_search_internal_set_pb (pb, RETROCL_CHANGELOG_DN, - LDAP_SCOPE_SUBTREE, fstr, - (char **)get_cleattrs(), /* cast const */ - 0 /* attrsonly */, - NULL /* controls */, NULL /* uniqueid */, - g_plg_identity[PLUGIN_RETROCL], - 0 /* actions */); - - slapi_search_internal_callback_pb (pb, crtp, - handle_getchangerecord_result, - handle_getchangerecord_search, NULL ); - if ( err != NULL ) { - *err = crtp->crt_err; + crtp->crt_nentries = crtp->crt_err = 0; + crtp->crt_time = 0; + PR_snprintf(fstr, sizeof (fstr), "%s=%ld", attr_changenumber, cnum); + + pb = slapi_pblock_new(); + slapi_search_internal_set_pb(pb, RETROCL_CHANGELOG_DN, + LDAP_SCOPE_SUBTREE, fstr, + (char **) get_cleattrs(), /* cast const */ + 0 /* attrsonly */, + NULL /* controls */, NULL /* uniqueid */, + g_plg_identity[PLUGIN_RETROCL], + 0 /* actions */); + + slapi_search_internal_callback_pb(pb, crtp, + handle_getchangetime_result, + handle_getchangetime_search, NULL); + if (err != NULL) { + *err = crtp->crt_err; }
- slapi_pblock_destroy (pb); + slapi_pblock_destroy(pb);
- return( crtp->crt_entry ); + return ( crtp->crt_time); }
/* @@ -241,7 +263,6 @@ static int trim_changelog(void) int rc = 0, ldrc, done; time_t now; changeNumber first_in_log = 0, last_in_log = 0; - Slapi_Entry *e = NULL; int num_deleted = 0; int me,lt;
@@ -264,7 +285,6 @@ static int trim_changelog(void)
while ( !done && retrocl_trimming == 1 ) { int did_delete; - Slapi_Attr *attr;
did_delete = 0; first_in_log = retrocl_get_first_changenumber(); @@ -282,32 +302,21 @@ static int trim_changelog(void) break; } if ( me > 0L ) { - e = get_changerecord( first_in_log, &ldrc ); - if ( NULL != e ) { - Slapi_Value *sval = NULL; - const struct berval *val = NULL; - rc = slapi_entry_attr_find( e, attr_changetime, &attr ); - /* Bug 624442: Logic checking for lack of timestamp was - reversed. */ - if ( 0 != rc || slapi_attr_first_value( attr,&sval ) == -1 || - (val = slapi_value_get_berval ( sval )) == NULL || - NULL == val->bv_val ) { - /* What to do if there's no timestamp? Just delete it. */ - retrocl_set_first_changenumber( first_in_log + 1 ); - ldrc = delete_changerecord( first_in_log ); - num_deleted++; - did_delete = 1; - } else { - time_t change_time = parse_localTime( val->bv_val ); - if ( change_time + me < now ) { - retrocl_set_first_changenumber( first_in_log + 1 ); - ldrc = delete_changerecord( first_in_log ); - num_deleted++; - did_delete = 1; - } - /* slapi_entry_free( e ); */ /* XXXggood should we be freeing this? */ - } - } + time_t change_time = get_changetime(first_in_log, &ldrc); + if (change_time) { + if ((change_time + me) < now) { + retrocl_set_first_changenumber(first_in_log + 1); + ldrc = delete_changerecord(first_in_log); + num_deleted++; + did_delete = 1; + } + } else { + /* What to do if there's no timestamp? Just delete it. */ + retrocl_set_first_changenumber(first_in_log + 1); + ldrc = delete_changerecord(first_in_log); + num_deleted++; + did_delete = 1; + } } if ( !did_delete ) { done = 1;
389-commits@lists.fedoraproject.org