July 2015 - 389-commits - Fedora Mailing-Lists

Branch '389-ds-base-1.3.4' - ldap/servers

by Mark Reynolds

ldap/servers/plugins/replication/repl5_replica_config.c | 49 ++++++++++------ 1 file changed, 31 insertions(+), 18 deletions(-) New commits: commit 0bb881aea92d64e509cf7604e86559779e4f9b77 Author: Mark Reynolds <mreynolds(a)redhat.com> Date: Thu Jul 9 09:59:46 2015 -0400 Ticket 48217 - cleanAllRUV hangs shutdown if not all of the replicas are online Bug Description: There are race conditions where we might not notify the clean task when a shutdown is occuring. This casues the task refcount to be not decremented, which hangs the destructor function. Fix Description: Check that the server is not shutting down before going to sleep, and notify the clean/abort tasks to stop in the destructor functions(instead of in the mmr plugin stop function). https://fedorahosted.org/389/ticket/48217 Reviewed by: lkrispen(Thanks!) (cherry picked from commit d6269f2e6898a187d43e3368860b13cdbd39ec55) diff --git a/ldap/servers/plugins/replication/repl5_replica_config.c b/ldap/servers/plugins/replication/repl5_replica_config.c index faa86b8..446da3f 100644 --- a/ldap/servers/plugins/replication/repl5_replica_config.c +++ b/ldap/servers/plugins/replication/repl5_replica_config.c @@ -1738,7 +1738,9 @@ replica_cleanallruv_thread(void *arg) } if (data->task) { slapi_task_inc_refcount(data->task); - slapi_log_error(SLAPI_LOG_PLUGIN, repl_plugin_name, "replica_cleanallruv_thread --> refcount incremented.\n"); + slapi_log_error(SLAPI_LOG_PLUGIN, repl_plugin_name, + "replica_cleanallruv_thread --> refcount incremented (%d).\n", + data->task->task_refcount); } /* * Initialize our settings @@ -1871,10 +1873,11 @@ replica_cleanallruv_thread(void *arg) */ cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Not all replicas have received the " "cleanallruv extended op, retrying in %d seconds",interval); - PR_Lock( notify_lock ); - PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); - PR_Unlock( notify_lock ); - + if(!slapi_is_shutting_down()){ + PR_Lock( notify_lock ); + PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); + PR_Unlock( notify_lock ); + } if(interval < 14400){ /* 4 hour max */ interval = interval * 2; } else { @@ -1974,6 +1977,7 @@ done: if(data->repl_obj && free_obj){ object_release(data->repl_obj); } + csn_free(&data->maxcsn); slapi_sdn_free(&data->sdn); slapi_ch_free_string(&data->repl_root); @@ -1987,6 +1991,7 @@ replica_cleanall_ruv_destructor(Slapi_Task *task) { slapi_log_error( SLAPI_LOG_PLUGIN, repl_plugin_name, "replica_cleanall_ruv_destructor -->\n" ); + stop_ruv_cleaning(); if (task) { while (slapi_task_get_refcount(task) > 0) { /* Yield to wait for the fixup task finishes. */ @@ -2002,6 +2007,7 @@ replica_cleanall_ruv_abort_destructor(Slapi_Task *task) { slapi_log_error( SLAPI_LOG_PLUGIN, repl_plugin_name, "replica_cleanall_ruv_abort_destructor -->\n" ); + stop_ruv_cleaning(); if (task) { while (slapi_task_get_refcount(task) > 0) { /* Yield to wait for the fixup task finishes. */ @@ -2055,9 +2061,11 @@ check_replicas_are_done_cleaning(cleanruv_data *data ) break; } cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Not all replicas finished cleaning, retrying in %d seconds",interval); - PR_Lock( notify_lock ); - PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); - PR_Unlock( notify_lock ); + if(!slapi_is_shutting_down()){ + PR_Lock( notify_lock ); + PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); + PR_Unlock( notify_lock ); + } if(interval < 14400){ /* 4 hour max */ interval = interval * 2; } else { @@ -2158,9 +2166,11 @@ check_replicas_are_done_aborting(cleanruv_data *data ) break; } cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID, "Not all replicas finished aborting, retrying in %d seconds",interval); - PR_Lock( notify_lock ); - PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); - PR_Unlock( notify_lock ); + if(!slapi_is_shutting_down()){ + PR_Lock( notify_lock ); + PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); + PR_Unlock( notify_lock ); + } if(interval < 14400){ /* 4 hour max */ interval = interval * 2; } else { @@ -2212,10 +2222,11 @@ check_agmts_are_caught_up(cleanruv_data *data, char *maxcsn) } cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Not all replicas caught up, retrying in %d seconds",interval); - PR_Lock( notify_lock ); - PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); - PR_Unlock( notify_lock ); - + if(!slapi_is_shutting_down()){ + PR_Lock( notify_lock ); + PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); + PR_Unlock( notify_lock ); + } if(interval < 14400){ /* 4 hour max */ interval = interval * 2; } else { @@ -2271,10 +2282,12 @@ check_agmts_are_alive(Replica *replica, ReplicaId rid, Slapi_Task *task) } cleanruv_log(task, rid, CLEANALLRUV_ID, "Not all replicas online, retrying in %d seconds...", interval); - PR_Lock( notify_lock ); - PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); - PR_Unlock( notify_lock ); + if(!slapi_is_shutting_down()){ + PR_Lock( notify_lock ); + PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); + PR_Unlock( notify_lock ); + } if(interval < 14400){ /* 4 hour max */ interval = interval * 2; } else {

8 years, 11 months

1
0
0 / 0

ldap/servers

by Mark Reynolds

ldap/servers/plugins/replication/repl5_replica_config.c | 49 ++++++++++------ 1 file changed, 31 insertions(+), 18 deletions(-) New commits: commit d6269f2e6898a187d43e3368860b13cdbd39ec55 Author: Mark Reynolds <mreynolds(a)redhat.com> Date: Thu Jul 9 09:59:46 2015 -0400 Ticket 48217 - cleanAllRUV hangs shutdown if not all of the replicas are online Bug Description: There are race conditions where we might not notify the clean task when a shutdown is occuring. This casues the task refcount to be not decremented, which hangs the destructor function. Fix Description: Check that the server is not shutting down before going to sleep, and notify the clean/abort tasks to stop in the destructor functions(instead of in the mmr plugin stop function). https://fedorahosted.org/389/ticket/48217 Reviewed by: lkrispen(Thanks!) diff --git a/ldap/servers/plugins/replication/repl5_replica_config.c b/ldap/servers/plugins/replication/repl5_replica_config.c index faa86b8..446da3f 100644 --- a/ldap/servers/plugins/replication/repl5_replica_config.c +++ b/ldap/servers/plugins/replication/repl5_replica_config.c @@ -1738,7 +1738,9 @@ replica_cleanallruv_thread(void *arg) } if (data->task) { slapi_task_inc_refcount(data->task); - slapi_log_error(SLAPI_LOG_PLUGIN, repl_plugin_name, "replica_cleanallruv_thread --> refcount incremented.\n"); + slapi_log_error(SLAPI_LOG_PLUGIN, repl_plugin_name, + "replica_cleanallruv_thread --> refcount incremented (%d).\n", + data->task->task_refcount); } /* * Initialize our settings @@ -1871,10 +1873,11 @@ replica_cleanallruv_thread(void *arg) */ cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Not all replicas have received the " "cleanallruv extended op, retrying in %d seconds",interval); - PR_Lock( notify_lock ); - PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); - PR_Unlock( notify_lock ); - + if(!slapi_is_shutting_down()){ + PR_Lock( notify_lock ); + PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); + PR_Unlock( notify_lock ); + } if(interval < 14400){ /* 4 hour max */ interval = interval * 2; } else { @@ -1974,6 +1977,7 @@ done: if(data->repl_obj && free_obj){ object_release(data->repl_obj); } + csn_free(&data->maxcsn); slapi_sdn_free(&data->sdn); slapi_ch_free_string(&data->repl_root); @@ -1987,6 +1991,7 @@ replica_cleanall_ruv_destructor(Slapi_Task *task) { slapi_log_error( SLAPI_LOG_PLUGIN, repl_plugin_name, "replica_cleanall_ruv_destructor -->\n" ); + stop_ruv_cleaning(); if (task) { while (slapi_task_get_refcount(task) > 0) { /* Yield to wait for the fixup task finishes. */ @@ -2002,6 +2007,7 @@ replica_cleanall_ruv_abort_destructor(Slapi_Task *task) { slapi_log_error( SLAPI_LOG_PLUGIN, repl_plugin_name, "replica_cleanall_ruv_abort_destructor -->\n" ); + stop_ruv_cleaning(); if (task) { while (slapi_task_get_refcount(task) > 0) { /* Yield to wait for the fixup task finishes. */ @@ -2055,9 +2061,11 @@ check_replicas_are_done_cleaning(cleanruv_data *data ) break; } cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Not all replicas finished cleaning, retrying in %d seconds",interval); - PR_Lock( notify_lock ); - PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); - PR_Unlock( notify_lock ); + if(!slapi_is_shutting_down()){ + PR_Lock( notify_lock ); + PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); + PR_Unlock( notify_lock ); + } if(interval < 14400){ /* 4 hour max */ interval = interval * 2; } else { @@ -2158,9 +2166,11 @@ check_replicas_are_done_aborting(cleanruv_data *data ) break; } cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID, "Not all replicas finished aborting, retrying in %d seconds",interval); - PR_Lock( notify_lock ); - PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); - PR_Unlock( notify_lock ); + if(!slapi_is_shutting_down()){ + PR_Lock( notify_lock ); + PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); + PR_Unlock( notify_lock ); + } if(interval < 14400){ /* 4 hour max */ interval = interval * 2; } else { @@ -2212,10 +2222,11 @@ check_agmts_are_caught_up(cleanruv_data *data, char *maxcsn) } cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Not all replicas caught up, retrying in %d seconds",interval); - PR_Lock( notify_lock ); - PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); - PR_Unlock( notify_lock ); - + if(!slapi_is_shutting_down()){ + PR_Lock( notify_lock ); + PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); + PR_Unlock( notify_lock ); + } if(interval < 14400){ /* 4 hour max */ interval = interval * 2; } else { @@ -2271,10 +2282,12 @@ check_agmts_are_alive(Replica *replica, ReplicaId rid, Slapi_Task *task) } cleanruv_log(task, rid, CLEANALLRUV_ID, "Not all replicas online, retrying in %d seconds...", interval); - PR_Lock( notify_lock ); - PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); - PR_Unlock( notify_lock ); + if(!slapi_is_shutting_down()){ + PR_Lock( notify_lock ); + PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); + PR_Unlock( notify_lock ); + } if(interval < 14400){ /* 4 hour max */ interval = interval * 2; } else {

8 years, 11 months

1
0
0 / 0

Branch '389-ds-base-1.3.4' - dirsrvtests/tickets ldap/servers

by Mark Reynolds

dirsrvtests/tickets/ticket48013_test.py | 134 +++++++++++++++++++++++++++++++ ldap/servers/plugins/sync/sync_refresh.c | 7 - 2 files changed, 138 insertions(+), 3 deletions(-) New commits: commit 41dff5ba7a6368bfb2d8a2057dd5ba5b6a91d175 Author: Mark Reynolds <mreynolds(a)redhat.com> Date: Wed Jul 8 17:21:57 2015 -0400 Ticket 48013 - Inconsistent behaviour of DS when LDAP Sync is used with an invalid cookie Bug Description: Some invalid cookies are treated as errors, while others are not. Fix Description: Perform the cookie parsing and validation in the same step. This gives consistent results. https://fedorahosted.org/389/ticket/48013 Reviewed by: nhosoi(Thanks!) (cherry picked from commit fdf46817fcc3b334bd477316d253bc18f243c0f6) diff --git a/dirsrvtests/tickets/ticket48013_test.py b/dirsrvtests/tickets/ticket48013_test.py new file mode 100644 index 0000000..0ccdeba --- /dev/null +++ b/dirsrvtests/tickets/ticket48013_test.py @@ -0,0 +1,134 @@ +import os +import sys +import time +import ldap +import logging +import pytest +import pyasn1 +import pyasn1_modules +import ldap,ldapurl +from ldap.ldapobject import SimpleLDAPObject +from ldap.syncrepl import SyncreplConsumer +from lib389 import DirSrv, Entry, tools, tasks +from lib389.tools import DirSrvTools +from lib389._constants import * +from lib389.properties import * +from lib389.tasks import * +from lib389.utils import * + +logging.getLogger(__name__).setLevel(logging.DEBUG) +log = logging.getLogger(__name__) + +installation1_prefix = None + + +class TopologyStandalone(object): + def __init__(self, standalone): + standalone.open() + self.standalone = standalone + + +class SyncObject(SimpleLDAPObject, SyncreplConsumer): + def __init__(self, uri): + # Init the ldap connection + SimpleLDAPObject.__init__(self, uri) + + def sync_search(self, test_cookie): + self.syncrepl_search('dc=example,dc=com', ldap.SCOPE_SUBTREE, + filterstr='(objectclass=*)', mode='refreshOnly', + cookie=test_cookie) + + def poll(self): + self.syncrepl_poll(all=1) + + +(a)pytest.fixture(scope="module") +def topology(request): + global installation1_prefix + if installation1_prefix: + args_instance[SER_DEPLOYED_DIR] = installation1_prefix + + # Creating standalone instance ... + standalone = DirSrv(verbose=False) + args_instance[SER_HOST] = HOST_STANDALONE + args_instance[SER_PORT] = PORT_STANDALONE + args_instance[SER_SERVERID_PROP] = SERVERID_STANDALONE + args_instance[SER_CREATION_SUFFIX] = DEFAULT_SUFFIX + args_standalone = args_instance.copy() + standalone.allocate(args_standalone) + instance_standalone = standalone.exists() + if instance_standalone: + standalone.delete() + standalone.create() + standalone.open() + + # Clear out the tmp dir + standalone.clearTmpDir(__file__) + + return TopologyStandalone(standalone) + + +def test_ticket48013(topology): + ''' + Content Synchonization: Test that invalid cookies are caught + ''' + + cookies = ('#', '##', 'a#a#a', 'a#a#1') + + # Enable dynamic plugins + try: + topology.standalone.modify_s(DN_CONFIG, [(ldap.MOD_REPLACE, 'nsslapd-dynamic-plugins', 'on')]) + except ldap.LDAPError as e: + ldap.error('Failed to enable dynamic plugin!' + e.message['desc']) + assert False + + # Enable retro changelog + topology.standalone.plugins.enable(name=PLUGIN_RETRO_CHANGELOG) + + # Enbale content sync plugin + topology.standalone.plugins.enable(name=PLUGIN_REPL_SYNC) + + # Set everything up + ldap_url = ldapurl.LDAPUrl('ldap://localhost:31389') + ldap_connection = SyncObject(ldap_url.initializeUrl()) + + # Authenticate + try: + ldap_connection.simple_bind_s(DN_DM, PASSWORD) + except ldap.LDAPError as e: + print('Login to LDAP server failed: %s' % e.message['desc']) + assert False + + # Test invalid cookies + for invalid_cookie in cookies: + log.info('Testing cookie: %s' % invalid_cookie) + try: + ldap_connection.sync_search(invalid_cookie) + ldap_connection.poll() + log.fatal('Invalid cookie accepted!') + assert False + except Exception as e: + log.info('Invalid cookie correctly rejected: %s' % e.message['info']) + pass + + # Success + log.info('Test complete') + + +def test_ticket48013_final(topology): + topology.standalone.delete() + log.info('Testcase PASSED') + + +def run_isolated(): + global installation1_prefix + installation1_prefix = None + + topo = topology(True) + test_ticket48013(topo) + test_ticket48013_final(topo) + + +if __name__ == '__main__': + run_isolated() + diff --git a/ldap/servers/plugins/sync/sync_refresh.c b/ldap/servers/plugins/sync/sync_refresh.c index 1ae2604..beb87ab 100644 --- a/ldap/servers/plugins/sync/sync_refresh.c +++ b/ldap/servers/plugins/sync/sync_refresh.c @@ -113,9 +113,10 @@ int sync_srch_refresh_pre_search(Slapi_PBlock *pb) * -- return e-syncRefreshRequired if the data referenced in the cookie are no * longer in the history */ - if (cookie && - ( client_cookie = sync_cookie_parse (cookie))) { - if (sync_cookie_isvalid(client_cookie, session_cookie)) { + if (cookie) { + if ((client_cookie = sync_cookie_parse (cookie)) && + sync_cookie_isvalid(client_cookie, session_cookie)) + { rc = sync_refresh_update_content(pb, client_cookie, session_cookie); if (rc == 0) entries_sent = 1;

8 years, 11 months

1
0
0 / 0

dirsrvtests/tickets ldap/servers

by Mark Reynolds

dirsrvtests/tickets/ticket48013_test.py | 134 +++++++++++++++++++++++++++++++ ldap/servers/plugins/sync/sync_refresh.c | 7 - 2 files changed, 138 insertions(+), 3 deletions(-) New commits: commit fdf46817fcc3b334bd477316d253bc18f243c0f6 Author: Mark Reynolds <mreynolds(a)redhat.com> Date: Wed Jul 8 17:21:57 2015 -0400 Ticket 48013 - Inconsistent behaviour of DS when LDAP Sync is used with an invalid cookie Bug Description: Some invalid cookies are treated as errors, while others are not. Fix Description: Perform the cookie parsing and validation in the same step. This gives consistent results. https://fedorahosted.org/389/ticket/48013 Reviewed by: nhosoi(Thanks!) diff --git a/dirsrvtests/tickets/ticket48013_test.py b/dirsrvtests/tickets/ticket48013_test.py new file mode 100644 index 0000000..0ccdeba --- /dev/null +++ b/dirsrvtests/tickets/ticket48013_test.py @@ -0,0 +1,134 @@ +import os +import sys +import time +import ldap +import logging +import pytest +import pyasn1 +import pyasn1_modules +import ldap,ldapurl +from ldap.ldapobject import SimpleLDAPObject +from ldap.syncrepl import SyncreplConsumer +from lib389 import DirSrv, Entry, tools, tasks +from lib389.tools import DirSrvTools +from lib389._constants import * +from lib389.properties import * +from lib389.tasks import * +from lib389.utils import * + +logging.getLogger(__name__).setLevel(logging.DEBUG) +log = logging.getLogger(__name__) + +installation1_prefix = None + + +class TopologyStandalone(object): + def __init__(self, standalone): + standalone.open() + self.standalone = standalone + + +class SyncObject(SimpleLDAPObject, SyncreplConsumer): + def __init__(self, uri): + # Init the ldap connection + SimpleLDAPObject.__init__(self, uri) + + def sync_search(self, test_cookie): + self.syncrepl_search('dc=example,dc=com', ldap.SCOPE_SUBTREE, + filterstr='(objectclass=*)', mode='refreshOnly', + cookie=test_cookie) + + def poll(self): + self.syncrepl_poll(all=1) + + +(a)pytest.fixture(scope="module") +def topology(request): + global installation1_prefix + if installation1_prefix: + args_instance[SER_DEPLOYED_DIR] = installation1_prefix + + # Creating standalone instance ... + standalone = DirSrv(verbose=False) + args_instance[SER_HOST] = HOST_STANDALONE + args_instance[SER_PORT] = PORT_STANDALONE + args_instance[SER_SERVERID_PROP] = SERVERID_STANDALONE + args_instance[SER_CREATION_SUFFIX] = DEFAULT_SUFFIX + args_standalone = args_instance.copy() + standalone.allocate(args_standalone) + instance_standalone = standalone.exists() + if instance_standalone: + standalone.delete() + standalone.create() + standalone.open() + + # Clear out the tmp dir + standalone.clearTmpDir(__file__) + + return TopologyStandalone(standalone) + + +def test_ticket48013(topology): + ''' + Content Synchonization: Test that invalid cookies are caught + ''' + + cookies = ('#', '##', 'a#a#a', 'a#a#1') + + # Enable dynamic plugins + try: + topology.standalone.modify_s(DN_CONFIG, [(ldap.MOD_REPLACE, 'nsslapd-dynamic-plugins', 'on')]) + except ldap.LDAPError as e: + ldap.error('Failed to enable dynamic plugin!' + e.message['desc']) + assert False + + # Enable retro changelog + topology.standalone.plugins.enable(name=PLUGIN_RETRO_CHANGELOG) + + # Enbale content sync plugin + topology.standalone.plugins.enable(name=PLUGIN_REPL_SYNC) + + # Set everything up + ldap_url = ldapurl.LDAPUrl('ldap://localhost:31389') + ldap_connection = SyncObject(ldap_url.initializeUrl()) + + # Authenticate + try: + ldap_connection.simple_bind_s(DN_DM, PASSWORD) + except ldap.LDAPError as e: + print('Login to LDAP server failed: %s' % e.message['desc']) + assert False + + # Test invalid cookies + for invalid_cookie in cookies: + log.info('Testing cookie: %s' % invalid_cookie) + try: + ldap_connection.sync_search(invalid_cookie) + ldap_connection.poll() + log.fatal('Invalid cookie accepted!') + assert False + except Exception as e: + log.info('Invalid cookie correctly rejected: %s' % e.message['info']) + pass + + # Success + log.info('Test complete') + + +def test_ticket48013_final(topology): + topology.standalone.delete() + log.info('Testcase PASSED') + + +def run_isolated(): + global installation1_prefix + installation1_prefix = None + + topo = topology(True) + test_ticket48013(topo) + test_ticket48013_final(topo) + + +if __name__ == '__main__': + run_isolated() + diff --git a/ldap/servers/plugins/sync/sync_refresh.c b/ldap/servers/plugins/sync/sync_refresh.c index 1ae2604..beb87ab 100644 --- a/ldap/servers/plugins/sync/sync_refresh.c +++ b/ldap/servers/plugins/sync/sync_refresh.c @@ -113,9 +113,10 @@ int sync_srch_refresh_pre_search(Slapi_PBlock *pb) * -- return e-syncRefreshRequired if the data referenced in the cookie are no * longer in the history */ - if (cookie && - ( client_cookie = sync_cookie_parse (cookie))) { - if (sync_cookie_isvalid(client_cookie, session_cookie)) { + if (cookie) { + if ((client_cookie = sync_cookie_parse (cookie)) && + sync_cookie_isvalid(client_cookie, session_cookie)) + { rc = sync_refresh_update_content(pb, client_cookie, session_cookie); if (rc == 0) entries_sent = 1;

8 years, 11 months

1
0
0 / 0

Branch '389-ds-base-1.3.4' - ldap/servers

by Noriko Hosoi

ldap/servers/slapd/tools/ldclt/ldapfct.c | 4 ++ ldap/servers/slapd/tools/ldclt/ldclt.c | 35 ++++++++++++++++---- ldap/servers/slapd/tools/ldclt/ldclt.h | 11 +++++- ldap/servers/slapd/tools/ldclt/threadMain.c | 48 ++++++++++++++++++---------- 4 files changed, 73 insertions(+), 25 deletions(-) New commits: commit 0680a45773ab4b0e92ec26caa3acbb6bab379103 Author: Noriko Hosoi <nhosoi(a)redhat.com> Date: Wed Jul 8 10:19:15 2015 -0700 Ticket #47799 - Any negative LDAP error code number reported as Illegal error by ldclt. Description: ldclt was implemented with mozldap, which did not expect negative erorr codes, but openldap does. E.g., LDAP_FILTER_ERROR (-7) This patch prepares a negativeError array for the negative error codes. Example: $ ldclt [...] -e esearch -e random -b "<basedn>" -f "<bad filter>" -v Filter = "<bad filter>" ... ldclt[16030]: T000: Cannot ldap_search(), error=-7 (Bad search filter) -- NULL result ... ldclt[16030]: Global error -7 (Bad search filter) occurs 1001 times ldclt[16030]: Exit status 3 - Max errors reached. https://fedorahosted.org/389/ticket/47799 Reviewed by mreynolds(a)redhat.com (Thank you, Mark!!) (cherry picked from commit 71be5faaa478593bb056887410ca8e48e05b2fe4) diff --git a/ldap/servers/slapd/tools/ldclt/ldapfct.c b/ldap/servers/slapd/tools/ldclt/ldapfct.c index f906c5a..13e66b8 100644 --- a/ldap/servers/slapd/tools/ldclt/ldapfct.c +++ b/ldap/servers/slapd/tools/ldclt/ldapfct.c @@ -1382,6 +1382,10 @@ printErrorFromLdap ( printf ("ldclt[%d]: T%03d: %s, error=%d (%s", mctx.pid, tttctx->thrdNum, errmsg, errcode, my_ldap_err2string (errcode)); + if (!res) { + printf (") -- NULL result\n"); + return -1; + } /* * See if there is an additional error message... diff --git a/ldap/servers/slapd/tools/ldclt/ldclt.c b/ldap/servers/slapd/tools/ldclt/ldclt.c index edb687f..9e573a5 100644 --- a/ldap/servers/slapd/tools/ldclt/ldclt.c +++ b/ldap/servers/slapd/tools/ldclt/ldclt.c @@ -716,19 +716,35 @@ printGlobalStatistics (void) * Note: Maybe implement a way to stop the running threads ? */ found = 0; - for (i=0 ; i<MAX_ERROR_NB ; i++) - if (mctx.errors[i] > 0) - { + for (i = 0; i < MAX_ERROR_NB; i++) { + if (mctx.errors[i] > 0) { found = 1; sprintf (buf, "(%s)", my_ldap_err2string (i)); printf ("ldclt[%d]: Global error %2d %s occurs %5d times\n", mctx.pid, i, buf, mctx.errors[i]); } + } +#if defined(USE_OPENLDAP) + for (i = 0; i < ABS(NEGATIVE_MAX_ERROR_NB); i++) { + if (mctx.negativeErrors[i] > 0) { + found = 1; + sprintf (buf, "(%s)", my_ldap_err2string (-i)); + printf ("ldclt[%d]: Global error %2d %s occurs %5d times\n", + mctx.pid, -i, buf, mctx.negativeErrors[i]); + } + } +#endif if (mctx.errorsBad > 0) { found = 1; - printf ("ldclt[%d]: Global illegal errors (codes not in [0, %d]) occurs %5d times\n", - mctx.pid, MAX_ERROR_NB-1, mctx.errorsBad); + printf("ldclt[%d]: Global illegal errors (codes not in [%d, %d]) occurs %5d times\n", + mctx.pid, +#if defined(USE_OPENLDAP) + NEGATIVE_MAX_ERROR_NB, +#else + 0, +#endif + MAX_ERROR_NB-1, mctx.errorsBad); } if (!found) printf ("ldclt[%d]: Global no error occurs during this session.\n", mctx.pid); @@ -1293,9 +1309,14 @@ basicInit (void) mctx.totNbOpers = 0; mctx.totNbSamples = 0; mctx.errorsBad = 0; - for (i=0 ; i<MAX_ERROR_NB ; i++) + for (i = 0; i < MAX_ERROR_NB; i++) { mctx.errors[i] = 0; - + } +#if defined(USE_OPENLDAP) + for (i = 0; i < ABS(NEGATIVE_MAX_ERROR_NB); i++) { + mctx.negativeErrors[i] = 0; + } +#endif /* * Initiate the mutex that protect the errors statistics */ diff --git a/ldap/servers/slapd/tools/ldclt/ldclt.h b/ldap/servers/slapd/tools/ldclt/ldclt.h index a48ab79..4f8f485 100644 --- a/ldap/servers/slapd/tools/ldclt/ldclt.h +++ b/ldap/servers/slapd/tools/ldclt/ldclt.h @@ -169,6 +169,9 @@ dd/mm/yy | Author | Comments #ifndef LDCLT_H #define LDCLT_H +#if defined(USE_OPENLDAP) +#define ABS(x) ((x > 0) ? (x) : (-x)) +#endif /* * Misc constant definitions */ @@ -183,7 +186,10 @@ dd/mm/yy | Author | Comments #define DEF_PORT_CHECK 16000 /* Port used for check processing */ #define MAX_ATTRIBS 40 /* Max number of attributes */ /*JLS 28-03-01*/ #define MAX_DN_LENGTH 1024 /* Max length for a DN */ -#define MAX_ERROR_NB 0x62 /* Max ldap err number + 1 */ +#define MAX_ERROR_NB 0x7b /* Max ldap err number + 1 */ +#if defined(USE_OPENLDAP) +#define NEGATIVE_MAX_ERROR_NB (LDAP_X_CONNECTING - 1) /* Mininum ldap err number */ +#endif #define MAX_IGN_ERRORS 20 /* Max errors ignored */ #define MAX_FILTER 512 /* Max filters length */ #define MAX_THREADS 1000 /* Max number of threads */ /*JLS 21-11-00*/ @@ -504,6 +510,9 @@ typedef struct main_context { char *certfile; /* certificate file */ /* BK 11-10-00 */ char *cltcertname; /* client cert name */ /* BK 23 11-00 */ data_list_file *dlf; /* Data list files */ /*JLS 23-03-01*/ +#if defined(USE_OPENLDAP) + int negativeErrors[ABS(NEGATIVE_MAX_ERROR_NB)]; /* Err stats */ +#endif int errors[MAX_ERROR_NB]; /* Err stats */ int errorsBad; /* Bad errors */ ldclt_mutex_t errors_mutex; /* Protect errors */ /*JLS 28-11-00*/ diff --git a/ldap/servers/slapd/tools/ldclt/threadMain.c b/ldap/servers/slapd/tools/ldclt/threadMain.c index be41186..5d915fd 100644 --- a/ldap/servers/slapd/tools/ldclt/threadMain.c +++ b/ldap/servers/slapd/tools/ldclt/threadMain.c @@ -430,14 +430,26 @@ addErrorStat ( /* * Update the counters */ +#if defined(USE_OPENLDAP) + if ((err <= NEGATIVE_MAX_ERROR_NB) || (err >= MAX_ERROR_NB)) +#else if ((err <= 0) || (err >= MAX_ERROR_NB)) +#endif { fprintf (stderr, "ldclt[%d]: Illegal error number %d\n", mctx.pid, err); fflush (stderr); mctx.errorsBad++; } +#if defined(USE_OPENLDAP) + else if (err < 0) + { + mctx.negativeErrors[abs(err)]++; + } +#endif else + { mctx.errors[err]++; + } /* * Release the mutex @@ -460,26 +472,28 @@ addErrorStat ( * Ok, we should not ignore this error... * Maybe the limit is reached ? */ +#if defined(USE_OPENLDAP) + if ((err <= NEGATIVE_MAX_ERROR_NB) || (err >= MAX_ERROR_NB)) +#else if ((err <= 0) || (err >= MAX_ERROR_NB)) - { - if (mctx.errorsBad > mctx.maxErrors) - { - printf ("ldclt[%d]: Max error limit reached - exiting.\n", mctx.pid); - (void) printGlobalStatistics(); /*JLS 25-08-00*/ - fflush (stdout); - ldclt_sleep (5); - ldcltExit (EXIT_MAX_ERRORS); /*JLS 25-08-00*/ +#endif + { + if (mctx.errorsBad > mctx.maxErrors) { + printf ("ldclt[%d]: Max error limit reached - exiting.\n", mctx.pid); + (void) printGlobalStatistics(); /*JLS 25-08-00*/ + fflush (stdout); + ldclt_sleep (5); + ldcltExit (EXIT_MAX_ERRORS); /*JLS 25-08-00*/ } - } - else - if (mctx.errors[err] > mctx.maxErrors) - { - printf ("ldclt[%d]: Max error limit reached - exiting.\n", mctx.pid); - (void) printGlobalStatistics(); /*JLS 25-08-00*/ - fflush (stdout); - ldclt_sleep (5); - ldcltExit (EXIT_MAX_ERRORS); /*JLS 25-08-00*/ + } else { + if (mctx.errors[err] + mctx.negativeErrors[abs(err)] > mctx.maxErrors) { + printf ("ldclt[%d]: Max error limit reached - exiting.\n", mctx.pid); + (void) printGlobalStatistics(); /*JLS 25-08-00*/ + fflush (stdout); + ldclt_sleep (5); + ldcltExit (EXIT_MAX_ERRORS); /*JLS 25-08-00*/ } + } } /*

8 years, 11 months

1
0
0 / 0

ldap/servers

by Noriko Hosoi

ldap/servers/slapd/tools/ldclt/ldapfct.c | 4 ++ ldap/servers/slapd/tools/ldclt/ldclt.c | 35 ++++++++++++++++---- ldap/servers/slapd/tools/ldclt/ldclt.h | 11 +++++- ldap/servers/slapd/tools/ldclt/threadMain.c | 48 ++++++++++++++++++---------- 4 files changed, 73 insertions(+), 25 deletions(-) New commits: commit 71be5faaa478593bb056887410ca8e48e05b2fe4 Author: Noriko Hosoi <nhosoi(a)redhat.com> Date: Wed Jul 8 10:19:15 2015 -0700 Ticket #47799 - Any negative LDAP error code number reported as Illegal error by ldclt. Description: ldclt was implemented with mozldap, which did not expect negative erorr codes, but openldap does. E.g., LDAP_FILTER_ERROR (-7) This patch prepares a negativeError array for the negative error codes. Example: $ ldclt [...] -e esearch -e random -b "<basedn>" -f "<bad filter>" -v Filter = "<bad filter>" ... ldclt[16030]: T000: Cannot ldap_search(), error=-7 (Bad search filter) -- NULL result ... ldclt[16030]: Global error -7 (Bad search filter) occurs 1001 times ldclt[16030]: Exit status 3 - Max errors reached. https://fedorahosted.org/389/ticket/47799 Reviewed by mreynolds(a)redhat.com (Thank you, Mark!!) diff --git a/ldap/servers/slapd/tools/ldclt/ldapfct.c b/ldap/servers/slapd/tools/ldclt/ldapfct.c index f906c5a..13e66b8 100644 --- a/ldap/servers/slapd/tools/ldclt/ldapfct.c +++ b/ldap/servers/slapd/tools/ldclt/ldapfct.c @@ -1382,6 +1382,10 @@ printErrorFromLdap ( printf ("ldclt[%d]: T%03d: %s, error=%d (%s", mctx.pid, tttctx->thrdNum, errmsg, errcode, my_ldap_err2string (errcode)); + if (!res) { + printf (") -- NULL result\n"); + return -1; + } /* * See if there is an additional error message... diff --git a/ldap/servers/slapd/tools/ldclt/ldclt.c b/ldap/servers/slapd/tools/ldclt/ldclt.c index edb687f..9e573a5 100644 --- a/ldap/servers/slapd/tools/ldclt/ldclt.c +++ b/ldap/servers/slapd/tools/ldclt/ldclt.c @@ -716,19 +716,35 @@ printGlobalStatistics (void) * Note: Maybe implement a way to stop the running threads ? */ found = 0; - for (i=0 ; i<MAX_ERROR_NB ; i++) - if (mctx.errors[i] > 0) - { + for (i = 0; i < MAX_ERROR_NB; i++) { + if (mctx.errors[i] > 0) { found = 1; sprintf (buf, "(%s)", my_ldap_err2string (i)); printf ("ldclt[%d]: Global error %2d %s occurs %5d times\n", mctx.pid, i, buf, mctx.errors[i]); } + } +#if defined(USE_OPENLDAP) + for (i = 0; i < ABS(NEGATIVE_MAX_ERROR_NB); i++) { + if (mctx.negativeErrors[i] > 0) { + found = 1; + sprintf (buf, "(%s)", my_ldap_err2string (-i)); + printf ("ldclt[%d]: Global error %2d %s occurs %5d times\n", + mctx.pid, -i, buf, mctx.negativeErrors[i]); + } + } +#endif if (mctx.errorsBad > 0) { found = 1; - printf ("ldclt[%d]: Global illegal errors (codes not in [0, %d]) occurs %5d times\n", - mctx.pid, MAX_ERROR_NB-1, mctx.errorsBad); + printf("ldclt[%d]: Global illegal errors (codes not in [%d, %d]) occurs %5d times\n", + mctx.pid, +#if defined(USE_OPENLDAP) + NEGATIVE_MAX_ERROR_NB, +#else + 0, +#endif + MAX_ERROR_NB-1, mctx.errorsBad); } if (!found) printf ("ldclt[%d]: Global no error occurs during this session.\n", mctx.pid); @@ -1293,9 +1309,14 @@ basicInit (void) mctx.totNbOpers = 0; mctx.totNbSamples = 0; mctx.errorsBad = 0; - for (i=0 ; i<MAX_ERROR_NB ; i++) + for (i = 0; i < MAX_ERROR_NB; i++) { mctx.errors[i] = 0; - + } +#if defined(USE_OPENLDAP) + for (i = 0; i < ABS(NEGATIVE_MAX_ERROR_NB); i++) { + mctx.negativeErrors[i] = 0; + } +#endif /* * Initiate the mutex that protect the errors statistics */ diff --git a/ldap/servers/slapd/tools/ldclt/ldclt.h b/ldap/servers/slapd/tools/ldclt/ldclt.h index a48ab79..4f8f485 100644 --- a/ldap/servers/slapd/tools/ldclt/ldclt.h +++ b/ldap/servers/slapd/tools/ldclt/ldclt.h @@ -169,6 +169,9 @@ dd/mm/yy | Author | Comments #ifndef LDCLT_H #define LDCLT_H +#if defined(USE_OPENLDAP) +#define ABS(x) ((x > 0) ? (x) : (-x)) +#endif /* * Misc constant definitions */ @@ -183,7 +186,10 @@ dd/mm/yy | Author | Comments #define DEF_PORT_CHECK 16000 /* Port used for check processing */ #define MAX_ATTRIBS 40 /* Max number of attributes */ /*JLS 28-03-01*/ #define MAX_DN_LENGTH 1024 /* Max length for a DN */ -#define MAX_ERROR_NB 0x62 /* Max ldap err number + 1 */ +#define MAX_ERROR_NB 0x7b /* Max ldap err number + 1 */ +#if defined(USE_OPENLDAP) +#define NEGATIVE_MAX_ERROR_NB (LDAP_X_CONNECTING - 1) /* Mininum ldap err number */ +#endif #define MAX_IGN_ERRORS 20 /* Max errors ignored */ #define MAX_FILTER 512 /* Max filters length */ #define MAX_THREADS 1000 /* Max number of threads */ /*JLS 21-11-00*/ @@ -504,6 +510,9 @@ typedef struct main_context { char *certfile; /* certificate file */ /* BK 11-10-00 */ char *cltcertname; /* client cert name */ /* BK 23 11-00 */ data_list_file *dlf; /* Data list files */ /*JLS 23-03-01*/ +#if defined(USE_OPENLDAP) + int negativeErrors[ABS(NEGATIVE_MAX_ERROR_NB)]; /* Err stats */ +#endif int errors[MAX_ERROR_NB]; /* Err stats */ int errorsBad; /* Bad errors */ ldclt_mutex_t errors_mutex; /* Protect errors */ /*JLS 28-11-00*/ diff --git a/ldap/servers/slapd/tools/ldclt/threadMain.c b/ldap/servers/slapd/tools/ldclt/threadMain.c index be41186..5d915fd 100644 --- a/ldap/servers/slapd/tools/ldclt/threadMain.c +++ b/ldap/servers/slapd/tools/ldclt/threadMain.c @@ -430,14 +430,26 @@ addErrorStat ( /* * Update the counters */ +#if defined(USE_OPENLDAP) + if ((err <= NEGATIVE_MAX_ERROR_NB) || (err >= MAX_ERROR_NB)) +#else if ((err <= 0) || (err >= MAX_ERROR_NB)) +#endif { fprintf (stderr, "ldclt[%d]: Illegal error number %d\n", mctx.pid, err); fflush (stderr); mctx.errorsBad++; } +#if defined(USE_OPENLDAP) + else if (err < 0) + { + mctx.negativeErrors[abs(err)]++; + } +#endif else + { mctx.errors[err]++; + } /* * Release the mutex @@ -460,26 +472,28 @@ addErrorStat ( * Ok, we should not ignore this error... * Maybe the limit is reached ? */ +#if defined(USE_OPENLDAP) + if ((err <= NEGATIVE_MAX_ERROR_NB) || (err >= MAX_ERROR_NB)) +#else if ((err <= 0) || (err >= MAX_ERROR_NB)) - { - if (mctx.errorsBad > mctx.maxErrors) - { - printf ("ldclt[%d]: Max error limit reached - exiting.\n", mctx.pid); - (void) printGlobalStatistics(); /*JLS 25-08-00*/ - fflush (stdout); - ldclt_sleep (5); - ldcltExit (EXIT_MAX_ERRORS); /*JLS 25-08-00*/ +#endif + { + if (mctx.errorsBad > mctx.maxErrors) { + printf ("ldclt[%d]: Max error limit reached - exiting.\n", mctx.pid); + (void) printGlobalStatistics(); /*JLS 25-08-00*/ + fflush (stdout); + ldclt_sleep (5); + ldcltExit (EXIT_MAX_ERRORS); /*JLS 25-08-00*/ } - } - else - if (mctx.errors[err] > mctx.maxErrors) - { - printf ("ldclt[%d]: Max error limit reached - exiting.\n", mctx.pid); - (void) printGlobalStatistics(); /*JLS 25-08-00*/ - fflush (stdout); - ldclt_sleep (5); - ldcltExit (EXIT_MAX_ERRORS); /*JLS 25-08-00*/ + } else { + if (mctx.errors[err] + mctx.negativeErrors[abs(err)] > mctx.maxErrors) { + printf ("ldclt[%d]: Max error limit reached - exiting.\n", mctx.pid); + (void) printGlobalStatistics(); /*JLS 25-08-00*/ + fflush (stdout); + ldclt_sleep (5); + ldcltExit (EXIT_MAX_ERRORS); /*JLS 25-08-00*/ } + } } /*

8 years, 11 months

1
0
0 / 0

ldap/admin

by Mark Reynolds

ldap/admin/src/scripts/ds-logpipe.py | 70 +++++++++++++++++----------------- ldap/admin/src/scripts/failedbinds.py | 12 ++++- ldap/admin/src/scripts/logregex.py | 2 3 files changed, 46 insertions(+), 38 deletions(-) New commits: commit 674eac235a72565075129f125e3aefdecb033a05 Author: Mark Reynolds <mreynolds(a)redhat.com> Date: Wed Jul 8 14:25:04 2015 -0400 Ticket 48204 - Add Python 3 compatibility to ds-logpipe From: Petr Viktorin <pviktori(a)redhat.com> Description: - Use 'as' syntax when catching exceptions - Use 0o... syntax for octal literals - Don't use unbuffered text files in Python 3 https://fedorahosted.org/389/ticket/48204 Reviewed by: mreynolds diff --git a/ldap/admin/src/scripts/ds-logpipe.py b/ldap/admin/src/scripts/ds-logpipe.py index b2d8304..ca6c27f 100644 --- a/ldap/admin/src/scripts/ds-logpipe.py +++ b/ldap/admin/src/scripts/ds-logpipe.py @@ -1,5 +1,7 @@ #!/usr/bin/env python +from __future__ import print_function + import sys import os, os.path import errno @@ -11,7 +13,7 @@ import fcntl import pwd maxlines = 1000 # set on command line -S_IFIFO = 0010000 +S_IFIFO = 0o010000 buffer = [] # default circular buffer used by default plugin totallines = 0 @@ -29,8 +31,8 @@ def defaultplugin(line): def printbuffer(): sys.stdout.writelines(buffer) - print "Read %d total lines" % totallines - print logfname, "=" * 60 + print("Read %d total lines" % totallines) + print(logfname, "=" * 60) sys.stdout.flush() def defaultpost(): printbuffer() @@ -51,7 +53,7 @@ def sighandler(signum, frame): signal.signal(signal.SIGTERM, signal.SIG_DFL) signal.signal(signal.SIGALRM, signal.SIG_DFL) if signum == signal.SIGALRM and debug: - print "script timed out waiting to open pipe" + print("script timed out waiting to open pipe") finish() else: printbuffer() @@ -126,7 +128,7 @@ def parse_plugins(parser, options, args): newargs.append(arg) if prefunc: if debug: - print 'Calling "pre" function in', plgfile + print('Calling "pre" function in', plgfile) if not prefunc(bvals): parser.error('the "pre" function in %s returned an error' % plgfile) args = newargs @@ -140,27 +142,27 @@ def open_pipe(logfname): try: logf = open(logfname, 'r') # blocks until there is some input opencompleted = True - except IOError, e: + except IOError as e: if e.errno == errno.EINTR: continue # open was interrupted, try again else: # hard error - raise Exception, "%s [%d]" % (e.strerror, e.errno) + raise Exception("%s [%d]" % (e.strerror, e.errno)) return logf def is_proc_alive(procpid): retval = False try: retval = os.path.exists("/proc/%d" % procpid) - except IOError, e: + except IOError as e: if e.errno != errno.ENOENT: # may not exist yet - that's ok # otherwise, probably permissions or other badness - raise Exception, "could not open file %s - %s [%d]" % (procfile, e.strerror, e.errno) + raise Exception("could not open file %s - %s [%d]" % (procfile, e.strerror, e.errno)) # using /proc/pid failed, try kill if not retval: try: os.kill(procpid, 0) # sig 0 is a "ping" retval = True # if we got here, proc exists - except OSError, e: + except OSError as e: pass # no such process, or EPERM/EACCES return retval @@ -172,10 +174,10 @@ def get_pid_from_file(pidfile): pfd = open(pidfile, 'r') line = pfd.readline() pfd.close() - except IOError, e: + except IOError as e: if e.errno != errno.ENOENT: # may not exist yet - that's ok # otherwise, probably permissions or other badness - raise Exception, "Could not read pid from file %s - %s [%d]" % (pidfile, e.strerror, e.errno) + raise Exception("Could not read pid from file %s - %s [%d]" % (pidfile, e.strerror, e.errno)) if line: procpid = int(line) return procpid @@ -185,8 +187,8 @@ def write_pid_file(pidfile): pfd = open(pidfile, 'w') pfd.write("%d\n" % os.getpid()) pfd.close() - except IOError, e: - raise Exception, "Could not write pid to file %s - %s [%d]" % (pidfile, e.strerror, e.errno) + except IOError as e: + raise Exception("Could not write pid to file %s - %s [%d]" % (pidfile, e.strerror, e.errno)) def handle_script_pidfile(scriptpidfile): scriptpid = get_pid_from_file(scriptpidfile) @@ -194,7 +196,7 @@ def handle_script_pidfile(scriptpidfile): if scriptpid and is_proc_alive(scriptpid): # already running if debug: - print "Script is already running: process id %d" % scriptpid + print("Script is already running: process id %d" % scriptpid) return False else: # either process is not running or no file @@ -210,15 +212,15 @@ def read_and_process_line(logf, plgfuncs): try: line = logf.readline() readcompleted = True # read completed - except IOError, e: + except IOError as e: if e.errno == errno.EINTR: continue # read was interrupted, try again else: # hard error - raise Exception, "%s [%d]" % (e.strerror, e.errno) + raise Exception("%s [%d]" % (e.strerror, e.errno)) if line: # read something for plgfunc in plgfuncs: if not plgfunc(line): - print "Aborting processing due to function %s.%s" % (plgfunc.__module__, plgfunc.__name__) + print("Aborting processing due to function %s.%s" % (plgfunc.__module__, plgfunc.__name__)) finish() # this will exit the process done = True break @@ -281,28 +283,28 @@ if options.scriptpidfile: serverpid = options.serverpid if serverpid: if not is_proc_alive(serverpid): - print "Server pid [%d] is not alive - exiting" % serverpid + print("Server pid [%d] is not alive - exiting" % serverpid) sys.exit(1) try: if os.stat(logfname).st_mode & S_IFIFO: if debug: - print "Using existing log pipe", logfname + print("Using existing log pipe", logfname) else: - print "Error:", logfname, "exists and is not a log pipe" - print "use a filename other than", logfname + print("Error:", logfname, "exists and is not a log pipe") + print("use a filename other than", logfname) sys.exit(1) -except OSError, e: +except OSError as e: if e.errno == errno.ENOENT: if debug: - print "Creating log pipe", logfname + print("Creating log pipe", logfname) os.mkfifo(logfname) - os.chmod(logfname, 0600) + os.chmod(logfname, 0o600) else: - raise Exception, "%s [%d]" % (e.strerror, e.errno) + raise Exception("%s [%d]" % (e.strerror, e.errno)) if debug: - print "Listening to log pipe", logfname, "number of lines", maxlines + print("Listening to log pipe", logfname, "number of lines", maxlines) # set up our signal handlers signal.signal(signal.SIGHUP, sighandler) @@ -333,14 +335,14 @@ while not done: logf = open_pipe(logfname) # if we get here, logf is not None if debug: - print "opened pipe", logf + print("opened pipe", logf) if timerisset: # cancel the timer - the open succeeded timerisset = False signal.setitimer(signal.ITIMER_REAL, 0) if debug: - print "cancelled startup timer" + print("cancelled startup timer") lines = 0 # read and process the next line in the pipe @@ -352,11 +354,11 @@ while not done: # the other end of the pipe closed - we close our end too if debug: - print "read", lines, "lines" + print("read", lines, "lines") logf.close() logf = None if debug: - print "closed log pipe", logfname + print("closed log pipe", logfname) if not serverpid and options.serverpidfile: # see if the server has written its server pid file yet @@ -368,7 +370,7 @@ while not done: if serverpid and not is_proc_alive(serverpid): done = True if debug: - print "server pid", serverpid, "exited - script exiting" + print("server pid", serverpid, "exited - script exiting") if neverdone: done = False @@ -387,12 +389,12 @@ while not done: signal.setitimer(signal.ITIMER_REAL, 0.25) timerisset = True if debug: - print "set startup timer - see if server is really shut down" + print("set startup timer - see if server is really shut down") else: # we read something # pipe closed - usually when server shuts down done = True if not done and debug: - print "log pipe", logfname, "closed - reopening - read", totallines, "total lines" + print("log pipe", logfname, "closed - reopening - read", totallines, "total lines") finish() diff --git a/ldap/admin/src/scripts/failedbinds.py b/ldap/admin/src/scripts/failedbinds.py index 8afe0ff..23a7bea 100644 --- a/ldap/admin/src/scripts/failedbinds.py +++ b/ldap/admin/src/scripts/failedbinds.py @@ -1,4 +1,5 @@ import re +import sys import os, os.path # regex that matches a BIND request line @@ -91,12 +92,15 @@ def pre(plgargs): global logf logfile = plgargs.get('logfile', None) if not logfile: - print "Error: missing required argument failedbinds.logfile" + print("Error: missing required argument failedbinds.logfile") return False needchmod = False if not os.path.isfile(logfile): needchmod = True - logf = open(logfile, 'a', 0) # 0 for unbuffered output - if needchmod: os.chmod(logfile, 0600) + if sys.version_info < (3, 0): + logf = open(logfile, 'a', 0) # 0 for unbuffered output + else: + logf = open(logfile, 'a') + if needchmod: os.chmod(logfile, 0o600) return True def post(): @@ -153,6 +157,7 @@ def plugin(line): logmsg = conn.addreq(timestamp, opnum, dn, method, mech) if logmsg: logf.write(logmsg + "\n") + logf.flush() return True # is this a RESULT line? @@ -164,6 +169,7 @@ def plugin(line): logmsg = conn.addres(timestamp, opnum, errnum) if logmsg: logf.write(logmsg + "\n") + logf.flush() return True return True # no match diff --git a/ldap/admin/src/scripts/logregex.py b/ldap/admin/src/scripts/logregex.py index 7537953..8b1f87f 100644 --- a/ldap/admin/src/scripts/logregex.py +++ b/ldap/admin/src/scripts/logregex.py @@ -10,7 +10,7 @@ def pre(plgargs): global regex_regex_ary regexary = plgargs.get('regex', None) if not regexary: - print "Error: missing required argument logregex.regex" + print("Error: missing required argument logregex.regex") return False if isinstance(regexary,list): regex_regex_ary = [re.compile(xx) for xx in regexary]

8 years, 11 months

1
0
0 / 0

Branch '389-ds-base-1.3.4' - ldap/servers

by Mark Reynolds

ldap/servers/plugins/replication/cl5_api.c | 447 ++++++++++++++-- ldap/servers/plugins/replication/cl5_api.h | 5 ldap/servers/plugins/replication/repl5_replica_config.c | 44 - 3 files changed, 430 insertions(+), 66 deletions(-) New commits: commit 9e4cf12cfbfde0761325b75c3fd5a8b39223760a Author: Mark Reynolds <mreynolds(a)redhat.com> Date: Wed Jul 8 11:48:27 2015 -0400 Ticket 48208 - CleanAllRUV should completely purge changelog Bug Description: After cleanAllRUV finishes, the changelog still contains entries from the cleaned rid. Under certain conditions this can allow the RUV to get polluted again, and the ruv element will be missing the replica url. Fix Description: At the end of the cleaning task, fire of a thread to to completely purge the changelog of all entries containing the cleaned rid. Also, improved the cleanAllRUV task when dealing with a server shutdown - previously if the timing is right the task can "delay/hang" the shutdown process. https://fedorahosted.org/389/ticket/48208 Reviewed by: nhosoi(Thanks!) (cherry picked from commit ff1c34538b0600259dba4801da2b2f0993fa5404) diff --git a/ldap/servers/plugins/replication/cl5_api.c b/ldap/servers/plugins/replication/cl5_api.c index a10c3ac..ae23353 100644 --- a/ldap/servers/plugins/replication/cl5_api.c +++ b/ldap/servers/plugins/replication/cl5_api.c @@ -319,14 +319,17 @@ static void _cl5TrimCleanup (); static int _cl5TrimMain (void *param); static void _cl5DoTrimming (ReplicaId rid); static void _cl5CompactDBs(); -static void _cl5TrimFile (Object *obj, long *numToTrim, ReplicaId cleaned_rid); +static void _cl5PurgeRID(Object *obj, ReplicaId cleaned_rid); +static int _cl5PurgeGetFirstEntry (Object *obj, CL5Entry *entry, void **iterator, DB_TXN *txnid, int rid, DBT *key); +static int _cl5PurgeGetNextEntry (CL5Entry *entry, void *iterator, DBT *key); +static void _cl5TrimFile (Object *obj, long *numToTrim); static PRBool _cl5CanTrim (time_t time, long *numToTrim); static int _cl5ReadRUV (const char *replGen, Object *obj, PRBool purge); static int _cl5WriteRUV (CL5DBFile *file, PRBool purge); static int _cl5ConstructRUV (const char *replGen, Object *obj, PRBool purge); static int _cl5UpdateRUV (Object *obj, CSN *csn, PRBool newReplica, PRBool purge); static int _cl5GetRUV2Purge2 (Object *fileObj, RUV **ruv); -void trigger_cl_trimming_thread(void *rid); +void trigger_cl_purging_thread(void *rid); /* bakup/recovery, import/export */ static int _cl5LDIF2Operation (char *ldifEntry, slapi_operation_parameters *op, @@ -3470,9 +3473,17 @@ static void _cl5DoTrimming (ReplicaId rid) trimmed more often than other. We might have to fix that by, for example, randomizing starting point */ obj = objset_first_obj (s_cl5Desc.dbFiles); - while (obj && _cl5CanTrim ((time_t)0, &numToTrim)) + while (obj && (_cl5CanTrim ((time_t)0, &numToTrim) || rid)) { - _cl5TrimFile (obj, &numToTrim, rid); + if (rid){ + /* + * We are cleaning an invalid rid, and need to strip it + * from the changelog. + */ + _cl5PurgeRID (obj, rid); + } else { + _cl5TrimFile (obj, &numToTrim); + } obj = objset_next_obj (s_cl5Desc.dbFiles, obj); } @@ -3549,12 +3560,351 @@ bail: return; } +/* + * If the rid is not set it is the very first iteration of the changelog. + * If the rid is set, we are doing another pass, and we have a key as our + * starting point. + */ +static int +_cl5PurgeGetFirstEntry(Object *obj, CL5Entry *entry, void **iterator, DB_TXN *txnid, int rid, DBT *key) +{ + DBC *cursor = NULL; + DBT data = {0}; + CL5Iterator *it; + CL5DBFile *file; + int rc; + + file = (CL5DBFile*)object_get_data (obj); + + /* create cursor */ + rc = file->db->cursor(file->db, txnid, &cursor, 0); + if (rc != 0) + { + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + "_cl5PurgeGetFirstEntry: failed to create cursor; db error - %d %s\n", rc, db_strerror(rc)); + rc = CL5_DB_ERROR; + goto done; + } + + key->flags = DB_DBT_MALLOC; + data.flags = DB_DBT_MALLOC; + while ((rc = cursor->c_get(cursor, key, &data, rid?DB_SET:DB_NEXT)) == 0) + { + /* skip service entries on the first pass (rid == 0)*/ + if (!rid && cl5HelperEntry ((char*)key->data, NULL)) + { + slapi_ch_free(&key->data); + slapi_ch_free(&(data.data)); + continue; + } + + /* format entry */ + rc = cl5DBData2Entry(data.data, data.size, entry); + slapi_ch_free(&(data.data)); + if (rc != 0) + { + slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name_cl, + "_cl5PurgeGetFirstEntry: failed to format entry: %d\n", rc); + goto done; + } + + it = (CL5Iterator*)slapi_ch_malloc(sizeof (CL5Iterator)); + it->cursor = cursor; + object_acquire (obj); + it->file = obj; + *(CL5Iterator**)iterator = it; + + return CL5_SUCCESS; + } + + slapi_ch_free(&key->data); + slapi_ch_free(&(data.data)); + + /* walked of the end of the file */ + if (rc == DB_NOTFOUND) + { + rc = CL5_NOTFOUND; + goto done; + } + + /* db error occured while iterating */ + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + "_cl5PurgeGetFirstEntry: failed to get entry; db error - %d %s\n", + rc, db_strerror(rc)); + rc = CL5_DB_ERROR; + +done: + /* + * We didn't success in assigning this cursor to the iterator, + * so we need to free the cursor here. + */ + if (cursor) + cursor->c_close(cursor); + + return rc; +} + +/* + * Get the next entry. If we get a lock error we will restart the process + * starting at the current key. + */ +static int +_cl5PurgeGetNextEntry (CL5Entry *entry, void *iterator, DBT *key) +{ + CL5Iterator *it; + DBT data={0}; + int rc; + + it = (CL5Iterator*) iterator; + + key->flags = DB_DBT_MALLOC; + data.flags = DB_DBT_MALLOC; + while ((rc = it->cursor->c_get(it->cursor, key, &data, DB_NEXT)) == 0) + { + if (cl5HelperEntry ((char*)key->data, NULL)) + { + slapi_ch_free(&key->data); + slapi_ch_free(&(data.data)); + continue; + } + + /* format entry */ + rc = cl5DBData2Entry (data.data, data.size, entry); + slapi_ch_free (&(data.data)); + if (rc != 0) + { + if (rc != CL5_DB_LOCK_ERROR){ + /* Not a lock error, free the key */ + slapi_ch_free(&key->data); + } + slapi_log_error(rc == CL5_DB_LOCK_ERROR?SLAPI_LOG_REPL:SLAPI_LOG_FATAL, + repl_plugin_name_cl, + "_cl5PurgeGetNextEntry: failed to format entry: %d\n", + rc); + + } + + return rc; + } + slapi_ch_free(&(data.data)); + + /* walked of the end of the file or entry is out of range */ + if (rc == 0 || rc == DB_NOTFOUND){ + slapi_ch_free(&key->data); + return CL5_NOTFOUND; + } + if (rc != CL5_DB_LOCK_ERROR){ + /* Not a lock error, free the key */ + slapi_ch_free(&key->data); + } + + /* cursor operation failed */ + slapi_log_error(rc == CL5_DB_LOCK_ERROR?SLAPI_LOG_REPL:SLAPI_LOG_FATAL, + repl_plugin_name_cl, + "_cl5PurgeGetNextEntry: failed to get entry; db error - %d %s\n", + rc, db_strerror(rc)); + + return rc; +} + +#define MAX_RETRIES 10 +/* + * _cl5PurgeRID(Object *obj, ReplicaId cleaned_rid) + * + * Clean the entire changelog of updates from the "cleaned rid" via CLEANALLRUV + * Delete entries in batches so we don't consume too many db locks, and we don't + * lockup the changelog during the entire purging process using one transaction. + * We save the key from the last iteration so we don't have to start from the + * beginning for each new iteration. + */ +static void +_cl5PurgeRID(Object *obj, ReplicaId cleaned_rid) +{ + slapi_operation_parameters op = {0}; + ReplicaId csn_rid; + CL5Entry entry; + DB_TXN *txnid = NULL; + DBT key = {0}; + void *iterator = NULL; + long totalTrimmed = 0; + long trimmed = 0; + char *starting_key = NULL; + int batch_count = 0; + int db_lock_retry_count = 0; + int first_pass = 1; + int finished = 0; + int rc = 0; + + PR_ASSERT (obj); + entry.op = &op; + + /* + * Keep processing the changelog until we are done, shutting down, or we + * maxed out on the db lock retries. + */ + while (!finished && db_lock_retry_count < MAX_RETRIES && !slapi_is_shutting_down()){ + trimmed = 0; + + /* + * Sleep a bit to allow others to use the changelog - we can't hog the + * changelog for the entire purge. + */ + DS_Sleep(PR_MillisecondsToInterval(100)); + + rc = TXN_BEGIN(s_cl5Desc.dbEnv, NULL, &txnid, 0); + if (rc != 0){ + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + "_cl5PurgeRID: failed to begin transaction; db error - %d %s. " + "Changelog was not purged of rid(%d)\n", + rc, db_strerror(rc), cleaned_rid); + return; + } + + /* + * Check every changelog entry for the cleaned rid + */ + rc = _cl5PurgeGetFirstEntry(obj, &entry, &iterator, txnid, first_pass?0:cleaned_rid, &key); + first_pass = 0; + while (rc == CL5_SUCCESS && !slapi_is_shutting_down()) { + /* + * Store the new starting key - we need this starting key in case + * we run out of locks and have to start the transaction over. + */ + slapi_ch_free_string(&starting_key); + starting_key = slapi_ch_strdup((char*)key.data); + + if(trimmed == 10000 || (batch_count && trimmed == batch_count)){ + /* + * Break out, and commit these deletes. Do not free the key, + * we need it for the next pass. + */ + cl5_operation_parameters_done (&op); + db_lock_retry_count = 0; /* reset the retry count */ + break; + } + if(op.csn){ + csn_rid = csn_get_replicaid (op.csn); + if (csn_rid == cleaned_rid){ + rc = _cl5CurrentDeleteEntry (iterator); + if (rc != CL5_SUCCESS){ + /* log error */ + cl5_operation_parameters_done (&op); + if (rc == CL5_DB_LOCK_ERROR){ + /* + * Ran out of locks, need to restart the transaction. + * Reduce the the batch count and reset the key to + * the starting point + */ + slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name_cl, + "_cl5PurgeRID: Ran out of db locks deleting entry. " + "Reduce the batch value and restart.\n"); + batch_count = trimmed - 10; + if (batch_count < 10){ + batch_count = 10; + } + trimmed = 0; + slapi_ch_free(&(key.data)); + key.data = starting_key; + starting_key = NULL; + db_lock_retry_count++; + break; + } else { + /* fatal error */ + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + "_cl5PurgeRID: fatal error (%d)\n", rc); + slapi_ch_free(&(key.data)); + finished = 1; + break; + } + } + trimmed++; + } + } + slapi_ch_free(&(key.data)); + cl5_operation_parameters_done (&op); + + rc = _cl5PurgeGetNextEntry (&entry, iterator, &key); + if (rc == CL5_DB_LOCK_ERROR){ + /* + * Ran out of locks, need to restart the transaction. + * Reduce the the batch count and reset the key to the starting + * point. + */ + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + "_cl5PurgeRID: Ran out of db locks getting the next entry. " + "Reduce the batch value and restart.\n"); + batch_count = trimmed - 10; + if (batch_count < 10){ + batch_count = 10; + } + trimmed = 0; + cl5_operation_parameters_done (&op); + slapi_ch_free(&(key.data)); + key.data = starting_key; + starting_key = NULL; + db_lock_retry_count++; + break; + } + } + + if (rc == CL5_NOTFOUND){ + /* Scanned the entire changelog, we're done */ + finished = 1; + } + + /* Destroy the iterator before we finish with the txn */ + cl5DestroyIterator (iterator); + + /* + * Commit or abort the txn + */ + if (rc == CL5_SUCCESS || rc == CL5_NOTFOUND){ + rc = TXN_COMMIT (txnid, 0); + if (rc != 0){ + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + "_cl5PurgeRID: failed to commit transaction; db error - %d %s. " + "Changelog was not completely purged of rid (%d)\n", + rc, db_strerror(rc), cleaned_rid); + break; + } else if (finished){ + /* We're done */ + totalTrimmed += trimmed; + break; + } else { + /* Not done yet */ + totalTrimmed += trimmed; + trimmed = 0; + } + } else { + rc = TXN_ABORT (txnid); + if (rc != 0){ + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + "_cl5PurgeRID: failed to abort transaction; db error - %d %s. " + "Changelog was not completely purged of rid (%d)\n", + rc, db_strerror(rc), cleaned_rid); + } + if (batch_count == 0){ + /* This was not a retry. Fatal error, break out */ + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + "_cl5PurgeRID: Changelog was not purged of rid (%d)\n", + cleaned_rid); + break; + } + } + } + slapi_ch_free_string(&starting_key); + + slapi_log_error (SLAPI_LOG_REPL, repl_plugin_name_cl, + "_cl5PurgeRID: Removed (%ld entries) that originated from rid (%d)\n", + totalTrimmed, cleaned_rid); +} + /* Note that each file contains changes for a single replicated area. trimming algorithm: */ #define CL5_TRIM_MAX_PER_TRANSACTION 10 -static void _cl5TrimFile (Object *obj, long *numToTrim, ReplicaId cleaned_rid) +static void _cl5TrimFile (Object *obj, long *numToTrim) { DB_TXN *txnid; RUV *ruv = NULL; @@ -3577,7 +3927,6 @@ static void _cl5TrimFile (Object *obj, long *numToTrim, ReplicaId cleaned_rid) } entry.op = &op; - while ( !finished && !slapi_is_shutting_down() ) { it = NULL; @@ -3598,7 +3947,7 @@ static void _cl5TrimFile (Object *obj, long *numToTrim, ReplicaId cleaned_rid) } finished = _cl5GetFirstEntry (obj, &entry, &it, txnid); - while ( !finished ) + while ( !finished && !slapi_is_shutting_down()) { /* * This change can be trimmed if it exceeds purge @@ -3612,11 +3961,12 @@ static void _cl5TrimFile (Object *obj, long *numToTrim, ReplicaId cleaned_rid) continue; } csn_rid = csn_get_replicaid (op.csn); + if ( (*numToTrim > 0 || _cl5CanTrim (entry.time, numToTrim)) && ruv_covers_csn_strict (ruv, op.csn) ) { rc = _cl5CurrentDeleteEntry (it); - if ( rc == CL5_SUCCESS && cleaned_rid != csn_rid) + if ( rc == CL5_SUCCESS) { rc = _cl5UpdateRUV (obj, op.csn, PR_FALSE, PR_TRUE); } @@ -3630,7 +3980,6 @@ static void _cl5TrimFile (Object *obj, long *numToTrim, ReplicaId cleaned_rid) /* The above two functions have logged the error */ abort = PR_TRUE; } - } else { @@ -3687,7 +4036,7 @@ static void _cl5TrimFile (Object *obj, long *numToTrim, ReplicaId cleaned_rid) rc = TXN_ABORT (txnid); if (rc != 0) { - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, "_cl5TrimFile: failed to abort transaction; db error - %d %s\n", rc, db_strerror(rc)); } @@ -3698,7 +4047,7 @@ static void _cl5TrimFile (Object *obj, long *numToTrim, ReplicaId cleaned_rid) if (rc != 0) { finished = 1; - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, "_cl5TrimFile: failed to commit transaction; db error - %d %s\n", rc, db_strerror(rc)); } @@ -4722,9 +5071,9 @@ static int _cl5WriteOperationTxn(const char *replName, const char *replGen, goto done; } #endif - /* back off */ + /* back off */ interval = PR_MillisecondsToInterval(slapi_rand() % 100); - DS_Sleep(interval); + DS_Sleep(interval); } #if USE_DB_TXN /* begin transaction */ @@ -4770,19 +5119,19 @@ static int _cl5WriteOperationTxn(const char *replName, const char *replGen, } cnt ++; } - + if (rc == 0) /* we successfully added entry */ { #if USE_DB_TXN rc = TXN_COMMIT (txnid, 0); #endif } - else + else { - char s[CSN_STRSIZE]; - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + char s[CSN_STRSIZE]; + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, "_cl5WriteOperationTxn: failed to write entry with csn (%s); " - "db error - %d %s\n", csn_as_string(op->csn,PR_FALSE,s), + "db error - %d %s\n", csn_as_string(op->csn,PR_FALSE,s), rc, db_strerror(rc)); #if USE_DB_TXN rc = TXN_ABORT (txnid); @@ -4803,7 +5152,7 @@ static int _cl5WriteOperationTxn(const char *replName, const char *replGen, /* update purge vector if we have not seen any changes from this replica before */ _cl5UpdateRUV (file_obj, op->csn, PR_TRUE, PR_TRUE); - slapi_log_error(SLAPI_LOG_PLUGIN, repl_plugin_name_cl, + slapi_log_error(SLAPI_LOG_PLUGIN, repl_plugin_name_cl, "cl5WriteOperationTxn: successfully written entry with csn (%s)\n", csnStr); rc = CL5_SUCCESS; done: @@ -4817,7 +5166,7 @@ done: return rc; } -static int _cl5WriteOperation(const char *replName, const char *replGen, +static int _cl5WriteOperation(const char *replName, const char *replGen, const slapi_operation_parameters *op, PRBool local) { return _cl5WriteOperationTxn(replName, replGen, op, local, NULL); @@ -4868,7 +5217,7 @@ static int _cl5GetFirstEntry (Object *obj, CL5Entry *entry, void **iterator, DB_ goto done; } - it = (CL5Iterator*)slapi_ch_malloc (sizeof (CL5Iterator)); + it = (CL5Iterator*)slapi_ch_malloc(sizeof (CL5Iterator)); it->cursor = cursor; object_acquire (obj); it->file = obj; @@ -4943,7 +5292,7 @@ static int _cl5GetNextEntry (CL5Entry *entry, void *iterator) slapi_ch_free (&(data.data)); if (rc != 0) { - slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name_cl, + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, "_cl5GetNextEntry: failed to format entry: %d\n", rc); } @@ -4972,38 +5321,42 @@ static int _cl5GetNextEntry (CL5Entry *entry, void *iterator) } /* cursor operation failed */ - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, - "_cl5GetNextEntry: failed to get entry; db error - %d %s\n", - rc, db_strerror(rc)); + slapi_log_error(rc == CL5_DB_LOCK_ERROR?SLAPI_LOG_REPL:SLAPI_LOG_FATAL, + repl_plugin_name_cl, + "_cl5GetNextEntry: failed to get entry; db error - %d %s\n", + rc, db_strerror(rc)); - return CL5_DB_ERROR; + return rc; } static int _cl5CurrentDeleteEntry (void *iterator) { int rc; CL5Iterator *it; - CL5DBFile *file; + CL5DBFile *file; - PR_ASSERT (iterator); + PR_ASSERT (iterator); it = (CL5Iterator*)iterator; rc = it->cursor->c_del (it->cursor, 0); if (rc == 0) { - /* decrement entry count */ - file = (CL5DBFile*)object_get_data (it->file); - PR_AtomicDecrement (&file->entryCount); - return CL5_SUCCESS; - } else { - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, - "_cl5CurrentDeleteEntry failed, err=%d %s\n", - rc, db_strerror(rc)); - /* We don't free(close) the cursor here, as the caller will free it by a call to cl5DestroyIterator */ - /* Freeing it here is a potential bug, as the cursor can't be referenced later once freed */ - return CL5_DB_ERROR; - } + /* decrement entry count */ + file = (CL5DBFile*)object_get_data (it->file); + PR_AtomicDecrement (&file->entryCount); + return CL5_SUCCESS; + } else { + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + "_cl5CurrentDeleteEntry failed, err=%d %s\n", + rc, db_strerror(rc)); + /* + * We don't free(close) the cursor here, as the caller will free it by + * a call to cl5DestroyIterator. Freeing it here is a potential bug, + * as the cursor can't be referenced later once freed. + */ + return rc; + } } static PRBool _cl5IsValidIterator (const CL5Iterator *iterator) @@ -6275,7 +6628,7 @@ static int _cl5ExportFile (PRFileDesc *prFile, Object *obj) slapi_write_buffer (prFile, "\n", strlen("\n")); entry.op = &op; - rc = _cl5GetFirstEntry (obj, &entry, &iterator, NULL); + rc = _cl5GetFirstEntry (obj, &entry, &iterator, NULL); while (rc == CL5_SUCCESS) { rc = _cl5Operation2LDIF (&op, file->replGen, &buff, &len); @@ -6696,16 +7049,16 @@ cl5CleanRUV(ReplicaId rid){ slapi_rwlock_unlock (s_cl5Desc.stLock); } -void trigger_cl_trimming(ReplicaId rid){ +void trigger_cl_purging(ReplicaId rid){ PRThread *trim_tid = NULL; - slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name_cl, "trigger_cl_trimming: rid (%d)\n",(int)rid); - trim_tid = PR_CreateThread(PR_USER_THREAD, (VFP)(void*)trigger_cl_trimming_thread, + slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name_cl, "trigger_cl_purging: rid (%d)\n",(int)rid); + trim_tid = PR_CreateThread(PR_USER_THREAD, (VFP)(void*)trigger_cl_purging_thread, (void *)&rid, PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD, PR_UNJOINABLE_THREAD, DEFAULT_THREAD_STACKSIZE); if (NULL == trim_tid){ slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, - "trigger_cl_trimming: failed to create trimming " + "trigger_cl_purging: failed to create trimming " "thread; NSPR error - %d\n", PR_GetError ()); } else { /* need a little time for the thread to get started */ @@ -6714,7 +7067,7 @@ void trigger_cl_trimming(ReplicaId rid){ } void -trigger_cl_trimming_thread(void *arg){ +trigger_cl_purging_thread(void *arg){ ReplicaId rid = *(ReplicaId *)arg; /* make sure we have a change log, and we aren't closing it */ @@ -6723,7 +7076,7 @@ trigger_cl_trimming_thread(void *arg){ } if (CL5_SUCCESS != _cl5AddThread()) { slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, - "trigger_cl_trimming: failed to increment thread count " + "trigger_cl_purging: failed to increment thread count " "NSPR error - %d\n", PR_GetError ()); } _cl5DoTrimming(rid); diff --git a/ldap/servers/plugins/replication/cl5_api.h b/ldap/servers/plugins/replication/cl5_api.h index 5809570..4c3b8e8 100644 --- a/ldap/servers/plugins/replication/cl5_api.h +++ b/ldap/servers/plugins/replication/cl5_api.h @@ -117,6 +117,9 @@ enum CL5_CSN_ERROR, /* CSN API failed */ CL5_RUV_ERROR, /* RUV API failed */ CL5_OBJSET_ERROR, /* namedobjset api failed */ + CL5_DB_LOCK_ERROR, /* bdb returns error 12 when the db runs out of locks, + this var needs to be in slot 12 of the list. + Do not re-order enum above! */ CL5_PURGED_DATA, /* requested data has been purged */ CL5_MISSING_DATA, /* data should be in the changelog, but is missing */ CL5_UNKNOWN_ERROR, /* unclassified error */ @@ -464,6 +467,6 @@ int cl5WriteRUV(); int cl5DeleteRUV(); void cl5CleanRUV(ReplicaId rid); void cl5NotifyCleanup(int rid); -void trigger_cl_trimming(ReplicaId rid); +void trigger_cl_purging(ReplicaId rid); #endif diff --git a/ldap/servers/plugins/replication/repl5_replica_config.c b/ldap/servers/plugins/replication/repl5_replica_config.c index 660b134..faa86b8 100644 --- a/ldap/servers/plugins/replication/repl5_replica_config.c +++ b/ldap/servers/plugins/replication/repl5_replica_config.c @@ -1439,6 +1439,11 @@ replica_execute_cleanruv_task (Object *r, ReplicaId rid, char *returntext /* not */ cl5CleanRUV(rid); + /* + * Now purge the changelog + */ + trigger_cl_purging(rid); + if (rc != RUV_SUCCESS){ slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, "cleanruv_task: task failed(%d)\n",rc); return LDAP_OPERATIONS_ERROR; @@ -1837,7 +1842,7 @@ replica_cleanallruv_thread(void *arg) /* no agmts, just clean this replica */ break; } - while (agmt_obj){ + while (agmt_obj && !slapi_is_shutting_down()){ agmt = (Repl_Agmt*)object_get_data (agmt_obj); if(!agmt_is_enabled(agmt) || get_agmt_agreement_type(agmt) == REPLICA_TYPE_WINDOWS){ agmt_obj = agmtlist_get_next_agreement_for_replica (data->replica, agmt_obj); @@ -1919,13 +1924,15 @@ replica_cleanallruv_thread(void *arg) break; } /* - * need to sleep between passes + * Need to sleep between passes unless we are shutting down */ - cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Replicas have not been cleaned yet, " - "retrying in %d seconds", interval); - PR_Lock( notify_lock ); - PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); - PR_Unlock( notify_lock ); + if (!slapi_is_shutting_down()){ + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Replicas have not been cleaned yet, " + "retrying in %d seconds", interval); + PR_Lock( notify_lock ); + PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); + PR_Unlock( notify_lock ); + } if(interval < 14400){ /* 4 hour max */ interval = interval * 2; @@ -1936,10 +1943,9 @@ replica_cleanallruv_thread(void *arg) done: /* - * If the replicas are cleaned, release the rid, and trim the changelog + * If the replicas are cleaned, release the rid */ if(!aborted){ - trigger_cl_trimming(data->rid); delete_cleaned_rid_config(data); /* make sure all the replicas have been "pre_cleaned" before finishing */ check_replicas_are_done_cleaning(data); @@ -1949,7 +1955,7 @@ done: /* * Shutdown or abort */ - if(!is_task_aborted(data->rid)){ + if(!is_task_aborted(data->rid) || slapi_is_shutting_down()){ cleanruv_log(data->task, data->rid, CLEANALLRUV_ID,"Server shutting down. Process will resume at server startup"); } else { cleanruv_log(data->task, data->rid, CLEANALLRUV_ID,"Task aborted for rid(%d).",data->rid); @@ -2184,7 +2190,7 @@ check_agmts_are_caught_up(cleanruv_data *data, char *maxcsn) not_all_caughtup = 0; break; } - while (agmt_obj){ + while (agmt_obj && !slapi_is_shutting_down()){ agmt = (Repl_Agmt*)object_get_data (agmt_obj); if(!agmt_is_enabled(agmt) || get_agmt_agreement_type(agmt) == REPLICA_TYPE_WINDOWS){ agmt_obj = agmtlist_get_next_agreement_for_replica (data->replica, agmt_obj); @@ -2242,7 +2248,7 @@ check_agmts_are_alive(Replica *replica, ReplicaId rid, Slapi_Task *task) not_all_alive = 0; break; } - while (agmt_obj){ + while (agmt_obj && !slapi_is_shutting_down()){ agmt = (Repl_Agmt*)object_get_data (agmt_obj); if(!agmt_is_enabled(agmt) || get_agmt_agreement_type(agmt) == REPLICA_TYPE_WINDOWS){ agmt_obj = agmtlist_get_next_agreement_for_replica (replica, agmt_obj); @@ -3022,12 +3028,14 @@ replica_abort_task_thread(void *arg) break; } /* - * need to sleep between passes + * Need to sleep between passes. unless we are shutting down */ - cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID,"Retrying in %d seconds",interval); - PR_Lock( notify_lock ); - PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); - PR_Unlock( notify_lock ); + if (!slapi_is_shutting_down()){ + cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID,"Retrying in %d seconds",interval); + PR_Lock( notify_lock ); + PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); + PR_Unlock( notify_lock ); + } if(interval < 14400){ /* 4 hour max */ interval = interval * 2; @@ -3045,7 +3053,7 @@ done: * Wait for this server to stop its cleanallruv task(which removes the rid from the cleaned list) */ cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID, "Waiting for CleanAllRUV task to abort..."); - while(is_cleaned_rid(data->rid)){ + while(is_cleaned_rid(data->rid) && !slapi_is_shutting_down()){ DS_Sleep(PR_SecondsToInterval(1)); count++; if(count == 60){ /* it should not take this long */

8 years, 11 months

1
0
0 / 0

ldap/servers

by Mark Reynolds

ldap/servers/plugins/replication/cl5_api.c | 447 ++++++++++++++-- ldap/servers/plugins/replication/cl5_api.h | 5 ldap/servers/plugins/replication/repl5_replica_config.c | 44 - 3 files changed, 430 insertions(+), 66 deletions(-) New commits: commit ff1c34538b0600259dba4801da2b2f0993fa5404 Author: Mark Reynolds <mreynolds(a)redhat.com> Date: Wed Jul 8 11:48:27 2015 -0400 Ticket 48208 - CleanAllRUV should completely purge changelog Bug Description: After cleanAllRUV finishes, the changelog still contains entries from the cleaned rid. Under certain conditions this can allow the RUV to get polluted again, and the ruv element will be missing the replica url. Fix Description: At the end of the cleaning task, fire of a thread to to completely purge the changelog of all entries containing the cleaned rid. Also, improved the cleanAllRUV task when dealing with a server shutdown - previously if the timing is right the task can "delay/hang" the shutdown process. https://fedorahosted.org/389/ticket/48208 Reviewed by: nhosoi(Thanks!) diff --git a/ldap/servers/plugins/replication/cl5_api.c b/ldap/servers/plugins/replication/cl5_api.c index a10c3ac..ae23353 100644 --- a/ldap/servers/plugins/replication/cl5_api.c +++ b/ldap/servers/plugins/replication/cl5_api.c @@ -319,14 +319,17 @@ static void _cl5TrimCleanup (); static int _cl5TrimMain (void *param); static void _cl5DoTrimming (ReplicaId rid); static void _cl5CompactDBs(); -static void _cl5TrimFile (Object *obj, long *numToTrim, ReplicaId cleaned_rid); +static void _cl5PurgeRID(Object *obj, ReplicaId cleaned_rid); +static int _cl5PurgeGetFirstEntry (Object *obj, CL5Entry *entry, void **iterator, DB_TXN *txnid, int rid, DBT *key); +static int _cl5PurgeGetNextEntry (CL5Entry *entry, void *iterator, DBT *key); +static void _cl5TrimFile (Object *obj, long *numToTrim); static PRBool _cl5CanTrim (time_t time, long *numToTrim); static int _cl5ReadRUV (const char *replGen, Object *obj, PRBool purge); static int _cl5WriteRUV (CL5DBFile *file, PRBool purge); static int _cl5ConstructRUV (const char *replGen, Object *obj, PRBool purge); static int _cl5UpdateRUV (Object *obj, CSN *csn, PRBool newReplica, PRBool purge); static int _cl5GetRUV2Purge2 (Object *fileObj, RUV **ruv); -void trigger_cl_trimming_thread(void *rid); +void trigger_cl_purging_thread(void *rid); /* bakup/recovery, import/export */ static int _cl5LDIF2Operation (char *ldifEntry, slapi_operation_parameters *op, @@ -3470,9 +3473,17 @@ static void _cl5DoTrimming (ReplicaId rid) trimmed more often than other. We might have to fix that by, for example, randomizing starting point */ obj = objset_first_obj (s_cl5Desc.dbFiles); - while (obj && _cl5CanTrim ((time_t)0, &numToTrim)) + while (obj && (_cl5CanTrim ((time_t)0, &numToTrim) || rid)) { - _cl5TrimFile (obj, &numToTrim, rid); + if (rid){ + /* + * We are cleaning an invalid rid, and need to strip it + * from the changelog. + */ + _cl5PurgeRID (obj, rid); + } else { + _cl5TrimFile (obj, &numToTrim); + } obj = objset_next_obj (s_cl5Desc.dbFiles, obj); } @@ -3549,12 +3560,351 @@ bail: return; } +/* + * If the rid is not set it is the very first iteration of the changelog. + * If the rid is set, we are doing another pass, and we have a key as our + * starting point. + */ +static int +_cl5PurgeGetFirstEntry(Object *obj, CL5Entry *entry, void **iterator, DB_TXN *txnid, int rid, DBT *key) +{ + DBC *cursor = NULL; + DBT data = {0}; + CL5Iterator *it; + CL5DBFile *file; + int rc; + + file = (CL5DBFile*)object_get_data (obj); + + /* create cursor */ + rc = file->db->cursor(file->db, txnid, &cursor, 0); + if (rc != 0) + { + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + "_cl5PurgeGetFirstEntry: failed to create cursor; db error - %d %s\n", rc, db_strerror(rc)); + rc = CL5_DB_ERROR; + goto done; + } + + key->flags = DB_DBT_MALLOC; + data.flags = DB_DBT_MALLOC; + while ((rc = cursor->c_get(cursor, key, &data, rid?DB_SET:DB_NEXT)) == 0) + { + /* skip service entries on the first pass (rid == 0)*/ + if (!rid && cl5HelperEntry ((char*)key->data, NULL)) + { + slapi_ch_free(&key->data); + slapi_ch_free(&(data.data)); + continue; + } + + /* format entry */ + rc = cl5DBData2Entry(data.data, data.size, entry); + slapi_ch_free(&(data.data)); + if (rc != 0) + { + slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name_cl, + "_cl5PurgeGetFirstEntry: failed to format entry: %d\n", rc); + goto done; + } + + it = (CL5Iterator*)slapi_ch_malloc(sizeof (CL5Iterator)); + it->cursor = cursor; + object_acquire (obj); + it->file = obj; + *(CL5Iterator**)iterator = it; + + return CL5_SUCCESS; + } + + slapi_ch_free(&key->data); + slapi_ch_free(&(data.data)); + + /* walked of the end of the file */ + if (rc == DB_NOTFOUND) + { + rc = CL5_NOTFOUND; + goto done; + } + + /* db error occured while iterating */ + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + "_cl5PurgeGetFirstEntry: failed to get entry; db error - %d %s\n", + rc, db_strerror(rc)); + rc = CL5_DB_ERROR; + +done: + /* + * We didn't success in assigning this cursor to the iterator, + * so we need to free the cursor here. + */ + if (cursor) + cursor->c_close(cursor); + + return rc; +} + +/* + * Get the next entry. If we get a lock error we will restart the process + * starting at the current key. + */ +static int +_cl5PurgeGetNextEntry (CL5Entry *entry, void *iterator, DBT *key) +{ + CL5Iterator *it; + DBT data={0}; + int rc; + + it = (CL5Iterator*) iterator; + + key->flags = DB_DBT_MALLOC; + data.flags = DB_DBT_MALLOC; + while ((rc = it->cursor->c_get(it->cursor, key, &data, DB_NEXT)) == 0) + { + if (cl5HelperEntry ((char*)key->data, NULL)) + { + slapi_ch_free(&key->data); + slapi_ch_free(&(data.data)); + continue; + } + + /* format entry */ + rc = cl5DBData2Entry (data.data, data.size, entry); + slapi_ch_free (&(data.data)); + if (rc != 0) + { + if (rc != CL5_DB_LOCK_ERROR){ + /* Not a lock error, free the key */ + slapi_ch_free(&key->data); + } + slapi_log_error(rc == CL5_DB_LOCK_ERROR?SLAPI_LOG_REPL:SLAPI_LOG_FATAL, + repl_plugin_name_cl, + "_cl5PurgeGetNextEntry: failed to format entry: %d\n", + rc); + + } + + return rc; + } + slapi_ch_free(&(data.data)); + + /* walked of the end of the file or entry is out of range */ + if (rc == 0 || rc == DB_NOTFOUND){ + slapi_ch_free(&key->data); + return CL5_NOTFOUND; + } + if (rc != CL5_DB_LOCK_ERROR){ + /* Not a lock error, free the key */ + slapi_ch_free(&key->data); + } + + /* cursor operation failed */ + slapi_log_error(rc == CL5_DB_LOCK_ERROR?SLAPI_LOG_REPL:SLAPI_LOG_FATAL, + repl_plugin_name_cl, + "_cl5PurgeGetNextEntry: failed to get entry; db error - %d %s\n", + rc, db_strerror(rc)); + + return rc; +} + +#define MAX_RETRIES 10 +/* + * _cl5PurgeRID(Object *obj, ReplicaId cleaned_rid) + * + * Clean the entire changelog of updates from the "cleaned rid" via CLEANALLRUV + * Delete entries in batches so we don't consume too many db locks, and we don't + * lockup the changelog during the entire purging process using one transaction. + * We save the key from the last iteration so we don't have to start from the + * beginning for each new iteration. + */ +static void +_cl5PurgeRID(Object *obj, ReplicaId cleaned_rid) +{ + slapi_operation_parameters op = {0}; + ReplicaId csn_rid; + CL5Entry entry; + DB_TXN *txnid = NULL; + DBT key = {0}; + void *iterator = NULL; + long totalTrimmed = 0; + long trimmed = 0; + char *starting_key = NULL; + int batch_count = 0; + int db_lock_retry_count = 0; + int first_pass = 1; + int finished = 0; + int rc = 0; + + PR_ASSERT (obj); + entry.op = &op; + + /* + * Keep processing the changelog until we are done, shutting down, or we + * maxed out on the db lock retries. + */ + while (!finished && db_lock_retry_count < MAX_RETRIES && !slapi_is_shutting_down()){ + trimmed = 0; + + /* + * Sleep a bit to allow others to use the changelog - we can't hog the + * changelog for the entire purge. + */ + DS_Sleep(PR_MillisecondsToInterval(100)); + + rc = TXN_BEGIN(s_cl5Desc.dbEnv, NULL, &txnid, 0); + if (rc != 0){ + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + "_cl5PurgeRID: failed to begin transaction; db error - %d %s. " + "Changelog was not purged of rid(%d)\n", + rc, db_strerror(rc), cleaned_rid); + return; + } + + /* + * Check every changelog entry for the cleaned rid + */ + rc = _cl5PurgeGetFirstEntry(obj, &entry, &iterator, txnid, first_pass?0:cleaned_rid, &key); + first_pass = 0; + while (rc == CL5_SUCCESS && !slapi_is_shutting_down()) { + /* + * Store the new starting key - we need this starting key in case + * we run out of locks and have to start the transaction over. + */ + slapi_ch_free_string(&starting_key); + starting_key = slapi_ch_strdup((char*)key.data); + + if(trimmed == 10000 || (batch_count && trimmed == batch_count)){ + /* + * Break out, and commit these deletes. Do not free the key, + * we need it for the next pass. + */ + cl5_operation_parameters_done (&op); + db_lock_retry_count = 0; /* reset the retry count */ + break; + } + if(op.csn){ + csn_rid = csn_get_replicaid (op.csn); + if (csn_rid == cleaned_rid){ + rc = _cl5CurrentDeleteEntry (iterator); + if (rc != CL5_SUCCESS){ + /* log error */ + cl5_operation_parameters_done (&op); + if (rc == CL5_DB_LOCK_ERROR){ + /* + * Ran out of locks, need to restart the transaction. + * Reduce the the batch count and reset the key to + * the starting point + */ + slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name_cl, + "_cl5PurgeRID: Ran out of db locks deleting entry. " + "Reduce the batch value and restart.\n"); + batch_count = trimmed - 10; + if (batch_count < 10){ + batch_count = 10; + } + trimmed = 0; + slapi_ch_free(&(key.data)); + key.data = starting_key; + starting_key = NULL; + db_lock_retry_count++; + break; + } else { + /* fatal error */ + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + "_cl5PurgeRID: fatal error (%d)\n", rc); + slapi_ch_free(&(key.data)); + finished = 1; + break; + } + } + trimmed++; + } + } + slapi_ch_free(&(key.data)); + cl5_operation_parameters_done (&op); + + rc = _cl5PurgeGetNextEntry (&entry, iterator, &key); + if (rc == CL5_DB_LOCK_ERROR){ + /* + * Ran out of locks, need to restart the transaction. + * Reduce the the batch count and reset the key to the starting + * point. + */ + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + "_cl5PurgeRID: Ran out of db locks getting the next entry. " + "Reduce the batch value and restart.\n"); + batch_count = trimmed - 10; + if (batch_count < 10){ + batch_count = 10; + } + trimmed = 0; + cl5_operation_parameters_done (&op); + slapi_ch_free(&(key.data)); + key.data = starting_key; + starting_key = NULL; + db_lock_retry_count++; + break; + } + } + + if (rc == CL5_NOTFOUND){ + /* Scanned the entire changelog, we're done */ + finished = 1; + } + + /* Destroy the iterator before we finish with the txn */ + cl5DestroyIterator (iterator); + + /* + * Commit or abort the txn + */ + if (rc == CL5_SUCCESS || rc == CL5_NOTFOUND){ + rc = TXN_COMMIT (txnid, 0); + if (rc != 0){ + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + "_cl5PurgeRID: failed to commit transaction; db error - %d %s. " + "Changelog was not completely purged of rid (%d)\n", + rc, db_strerror(rc), cleaned_rid); + break; + } else if (finished){ + /* We're done */ + totalTrimmed += trimmed; + break; + } else { + /* Not done yet */ + totalTrimmed += trimmed; + trimmed = 0; + } + } else { + rc = TXN_ABORT (txnid); + if (rc != 0){ + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + "_cl5PurgeRID: failed to abort transaction; db error - %d %s. " + "Changelog was not completely purged of rid (%d)\n", + rc, db_strerror(rc), cleaned_rid); + } + if (batch_count == 0){ + /* This was not a retry. Fatal error, break out */ + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + "_cl5PurgeRID: Changelog was not purged of rid (%d)\n", + cleaned_rid); + break; + } + } + } + slapi_ch_free_string(&starting_key); + + slapi_log_error (SLAPI_LOG_REPL, repl_plugin_name_cl, + "_cl5PurgeRID: Removed (%ld entries) that originated from rid (%d)\n", + totalTrimmed, cleaned_rid); +} + /* Note that each file contains changes for a single replicated area. trimming algorithm: */ #define CL5_TRIM_MAX_PER_TRANSACTION 10 -static void _cl5TrimFile (Object *obj, long *numToTrim, ReplicaId cleaned_rid) +static void _cl5TrimFile (Object *obj, long *numToTrim) { DB_TXN *txnid; RUV *ruv = NULL; @@ -3577,7 +3927,6 @@ static void _cl5TrimFile (Object *obj, long *numToTrim, ReplicaId cleaned_rid) } entry.op = &op; - while ( !finished && !slapi_is_shutting_down() ) { it = NULL; @@ -3598,7 +3947,7 @@ static void _cl5TrimFile (Object *obj, long *numToTrim, ReplicaId cleaned_rid) } finished = _cl5GetFirstEntry (obj, &entry, &it, txnid); - while ( !finished ) + while ( !finished && !slapi_is_shutting_down()) { /* * This change can be trimmed if it exceeds purge @@ -3612,11 +3961,12 @@ static void _cl5TrimFile (Object *obj, long *numToTrim, ReplicaId cleaned_rid) continue; } csn_rid = csn_get_replicaid (op.csn); + if ( (*numToTrim > 0 || _cl5CanTrim (entry.time, numToTrim)) && ruv_covers_csn_strict (ruv, op.csn) ) { rc = _cl5CurrentDeleteEntry (it); - if ( rc == CL5_SUCCESS && cleaned_rid != csn_rid) + if ( rc == CL5_SUCCESS) { rc = _cl5UpdateRUV (obj, op.csn, PR_FALSE, PR_TRUE); } @@ -3630,7 +3980,6 @@ static void _cl5TrimFile (Object *obj, long *numToTrim, ReplicaId cleaned_rid) /* The above two functions have logged the error */ abort = PR_TRUE; } - } else { @@ -3687,7 +4036,7 @@ static void _cl5TrimFile (Object *obj, long *numToTrim, ReplicaId cleaned_rid) rc = TXN_ABORT (txnid); if (rc != 0) { - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, "_cl5TrimFile: failed to abort transaction; db error - %d %s\n", rc, db_strerror(rc)); } @@ -3698,7 +4047,7 @@ static void _cl5TrimFile (Object *obj, long *numToTrim, ReplicaId cleaned_rid) if (rc != 0) { finished = 1; - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, "_cl5TrimFile: failed to commit transaction; db error - %d %s\n", rc, db_strerror(rc)); } @@ -4722,9 +5071,9 @@ static int _cl5WriteOperationTxn(const char *replName, const char *replGen, goto done; } #endif - /* back off */ + /* back off */ interval = PR_MillisecondsToInterval(slapi_rand() % 100); - DS_Sleep(interval); + DS_Sleep(interval); } #if USE_DB_TXN /* begin transaction */ @@ -4770,19 +5119,19 @@ static int _cl5WriteOperationTxn(const char *replName, const char *replGen, } cnt ++; } - + if (rc == 0) /* we successfully added entry */ { #if USE_DB_TXN rc = TXN_COMMIT (txnid, 0); #endif } - else + else { - char s[CSN_STRSIZE]; - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + char s[CSN_STRSIZE]; + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, "_cl5WriteOperationTxn: failed to write entry with csn (%s); " - "db error - %d %s\n", csn_as_string(op->csn,PR_FALSE,s), + "db error - %d %s\n", csn_as_string(op->csn,PR_FALSE,s), rc, db_strerror(rc)); #if USE_DB_TXN rc = TXN_ABORT (txnid); @@ -4803,7 +5152,7 @@ static int _cl5WriteOperationTxn(const char *replName, const char *replGen, /* update purge vector if we have not seen any changes from this replica before */ _cl5UpdateRUV (file_obj, op->csn, PR_TRUE, PR_TRUE); - slapi_log_error(SLAPI_LOG_PLUGIN, repl_plugin_name_cl, + slapi_log_error(SLAPI_LOG_PLUGIN, repl_plugin_name_cl, "cl5WriteOperationTxn: successfully written entry with csn (%s)\n", csnStr); rc = CL5_SUCCESS; done: @@ -4817,7 +5166,7 @@ done: return rc; } -static int _cl5WriteOperation(const char *replName, const char *replGen, +static int _cl5WriteOperation(const char *replName, const char *replGen, const slapi_operation_parameters *op, PRBool local) { return _cl5WriteOperationTxn(replName, replGen, op, local, NULL); @@ -4868,7 +5217,7 @@ static int _cl5GetFirstEntry (Object *obj, CL5Entry *entry, void **iterator, DB_ goto done; } - it = (CL5Iterator*)slapi_ch_malloc (sizeof (CL5Iterator)); + it = (CL5Iterator*)slapi_ch_malloc(sizeof (CL5Iterator)); it->cursor = cursor; object_acquire (obj); it->file = obj; @@ -4943,7 +5292,7 @@ static int _cl5GetNextEntry (CL5Entry *entry, void *iterator) slapi_ch_free (&(data.data)); if (rc != 0) { - slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name_cl, + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, "_cl5GetNextEntry: failed to format entry: %d\n", rc); } @@ -4972,38 +5321,42 @@ static int _cl5GetNextEntry (CL5Entry *entry, void *iterator) } /* cursor operation failed */ - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, - "_cl5GetNextEntry: failed to get entry; db error - %d %s\n", - rc, db_strerror(rc)); + slapi_log_error(rc == CL5_DB_LOCK_ERROR?SLAPI_LOG_REPL:SLAPI_LOG_FATAL, + repl_plugin_name_cl, + "_cl5GetNextEntry: failed to get entry; db error - %d %s\n", + rc, db_strerror(rc)); - return CL5_DB_ERROR; + return rc; } static int _cl5CurrentDeleteEntry (void *iterator) { int rc; CL5Iterator *it; - CL5DBFile *file; + CL5DBFile *file; - PR_ASSERT (iterator); + PR_ASSERT (iterator); it = (CL5Iterator*)iterator; rc = it->cursor->c_del (it->cursor, 0); if (rc == 0) { - /* decrement entry count */ - file = (CL5DBFile*)object_get_data (it->file); - PR_AtomicDecrement (&file->entryCount); - return CL5_SUCCESS; - } else { - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, - "_cl5CurrentDeleteEntry failed, err=%d %s\n", - rc, db_strerror(rc)); - /* We don't free(close) the cursor here, as the caller will free it by a call to cl5DestroyIterator */ - /* Freeing it here is a potential bug, as the cursor can't be referenced later once freed */ - return CL5_DB_ERROR; - } + /* decrement entry count */ + file = (CL5DBFile*)object_get_data (it->file); + PR_AtomicDecrement (&file->entryCount); + return CL5_SUCCESS; + } else { + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + "_cl5CurrentDeleteEntry failed, err=%d %s\n", + rc, db_strerror(rc)); + /* + * We don't free(close) the cursor here, as the caller will free it by + * a call to cl5DestroyIterator. Freeing it here is a potential bug, + * as the cursor can't be referenced later once freed. + */ + return rc; + } } static PRBool _cl5IsValidIterator (const CL5Iterator *iterator) @@ -6275,7 +6628,7 @@ static int _cl5ExportFile (PRFileDesc *prFile, Object *obj) slapi_write_buffer (prFile, "\n", strlen("\n")); entry.op = &op; - rc = _cl5GetFirstEntry (obj, &entry, &iterator, NULL); + rc = _cl5GetFirstEntry (obj, &entry, &iterator, NULL); while (rc == CL5_SUCCESS) { rc = _cl5Operation2LDIF (&op, file->replGen, &buff, &len); @@ -6696,16 +7049,16 @@ cl5CleanRUV(ReplicaId rid){ slapi_rwlock_unlock (s_cl5Desc.stLock); } -void trigger_cl_trimming(ReplicaId rid){ +void trigger_cl_purging(ReplicaId rid){ PRThread *trim_tid = NULL; - slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name_cl, "trigger_cl_trimming: rid (%d)\n",(int)rid); - trim_tid = PR_CreateThread(PR_USER_THREAD, (VFP)(void*)trigger_cl_trimming_thread, + slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name_cl, "trigger_cl_purging: rid (%d)\n",(int)rid); + trim_tid = PR_CreateThread(PR_USER_THREAD, (VFP)(void*)trigger_cl_purging_thread, (void *)&rid, PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD, PR_UNJOINABLE_THREAD, DEFAULT_THREAD_STACKSIZE); if (NULL == trim_tid){ slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, - "trigger_cl_trimming: failed to create trimming " + "trigger_cl_purging: failed to create trimming " "thread; NSPR error - %d\n", PR_GetError ()); } else { /* need a little time for the thread to get started */ @@ -6714,7 +7067,7 @@ void trigger_cl_trimming(ReplicaId rid){ } void -trigger_cl_trimming_thread(void *arg){ +trigger_cl_purging_thread(void *arg){ ReplicaId rid = *(ReplicaId *)arg; /* make sure we have a change log, and we aren't closing it */ @@ -6723,7 +7076,7 @@ trigger_cl_trimming_thread(void *arg){ } if (CL5_SUCCESS != _cl5AddThread()) { slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, - "trigger_cl_trimming: failed to increment thread count " + "trigger_cl_purging: failed to increment thread count " "NSPR error - %d\n", PR_GetError ()); } _cl5DoTrimming(rid); diff --git a/ldap/servers/plugins/replication/cl5_api.h b/ldap/servers/plugins/replication/cl5_api.h index 5809570..4c3b8e8 100644 --- a/ldap/servers/plugins/replication/cl5_api.h +++ b/ldap/servers/plugins/replication/cl5_api.h @@ -117,6 +117,9 @@ enum CL5_CSN_ERROR, /* CSN API failed */ CL5_RUV_ERROR, /* RUV API failed */ CL5_OBJSET_ERROR, /* namedobjset api failed */ + CL5_DB_LOCK_ERROR, /* bdb returns error 12 when the db runs out of locks, + this var needs to be in slot 12 of the list. + Do not re-order enum above! */ CL5_PURGED_DATA, /* requested data has been purged */ CL5_MISSING_DATA, /* data should be in the changelog, but is missing */ CL5_UNKNOWN_ERROR, /* unclassified error */ @@ -464,6 +467,6 @@ int cl5WriteRUV(); int cl5DeleteRUV(); void cl5CleanRUV(ReplicaId rid); void cl5NotifyCleanup(int rid); -void trigger_cl_trimming(ReplicaId rid); +void trigger_cl_purging(ReplicaId rid); #endif diff --git a/ldap/servers/plugins/replication/repl5_replica_config.c b/ldap/servers/plugins/replication/repl5_replica_config.c index 660b134..faa86b8 100644 --- a/ldap/servers/plugins/replication/repl5_replica_config.c +++ b/ldap/servers/plugins/replication/repl5_replica_config.c @@ -1439,6 +1439,11 @@ replica_execute_cleanruv_task (Object *r, ReplicaId rid, char *returntext /* not */ cl5CleanRUV(rid); + /* + * Now purge the changelog + */ + trigger_cl_purging(rid); + if (rc != RUV_SUCCESS){ slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, "cleanruv_task: task failed(%d)\n",rc); return LDAP_OPERATIONS_ERROR; @@ -1837,7 +1842,7 @@ replica_cleanallruv_thread(void *arg) /* no agmts, just clean this replica */ break; } - while (agmt_obj){ + while (agmt_obj && !slapi_is_shutting_down()){ agmt = (Repl_Agmt*)object_get_data (agmt_obj); if(!agmt_is_enabled(agmt) || get_agmt_agreement_type(agmt) == REPLICA_TYPE_WINDOWS){ agmt_obj = agmtlist_get_next_agreement_for_replica (data->replica, agmt_obj); @@ -1919,13 +1924,15 @@ replica_cleanallruv_thread(void *arg) break; } /* - * need to sleep between passes + * Need to sleep between passes unless we are shutting down */ - cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Replicas have not been cleaned yet, " - "retrying in %d seconds", interval); - PR_Lock( notify_lock ); - PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); - PR_Unlock( notify_lock ); + if (!slapi_is_shutting_down()){ + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, "Replicas have not been cleaned yet, " + "retrying in %d seconds", interval); + PR_Lock( notify_lock ); + PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); + PR_Unlock( notify_lock ); + } if(interval < 14400){ /* 4 hour max */ interval = interval * 2; @@ -1936,10 +1943,9 @@ replica_cleanallruv_thread(void *arg) done: /* - * If the replicas are cleaned, release the rid, and trim the changelog + * If the replicas are cleaned, release the rid */ if(!aborted){ - trigger_cl_trimming(data->rid); delete_cleaned_rid_config(data); /* make sure all the replicas have been "pre_cleaned" before finishing */ check_replicas_are_done_cleaning(data); @@ -1949,7 +1955,7 @@ done: /* * Shutdown or abort */ - if(!is_task_aborted(data->rid)){ + if(!is_task_aborted(data->rid) || slapi_is_shutting_down()){ cleanruv_log(data->task, data->rid, CLEANALLRUV_ID,"Server shutting down. Process will resume at server startup"); } else { cleanruv_log(data->task, data->rid, CLEANALLRUV_ID,"Task aborted for rid(%d).",data->rid); @@ -2184,7 +2190,7 @@ check_agmts_are_caught_up(cleanruv_data *data, char *maxcsn) not_all_caughtup = 0; break; } - while (agmt_obj){ + while (agmt_obj && !slapi_is_shutting_down()){ agmt = (Repl_Agmt*)object_get_data (agmt_obj); if(!agmt_is_enabled(agmt) || get_agmt_agreement_type(agmt) == REPLICA_TYPE_WINDOWS){ agmt_obj = agmtlist_get_next_agreement_for_replica (data->replica, agmt_obj); @@ -2242,7 +2248,7 @@ check_agmts_are_alive(Replica *replica, ReplicaId rid, Slapi_Task *task) not_all_alive = 0; break; } - while (agmt_obj){ + while (agmt_obj && !slapi_is_shutting_down()){ agmt = (Repl_Agmt*)object_get_data (agmt_obj); if(!agmt_is_enabled(agmt) || get_agmt_agreement_type(agmt) == REPLICA_TYPE_WINDOWS){ agmt_obj = agmtlist_get_next_agreement_for_replica (replica, agmt_obj); @@ -3022,12 +3028,14 @@ replica_abort_task_thread(void *arg) break; } /* - * need to sleep between passes + * Need to sleep between passes. unless we are shutting down */ - cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID,"Retrying in %d seconds",interval); - PR_Lock( notify_lock ); - PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); - PR_Unlock( notify_lock ); + if (!slapi_is_shutting_down()){ + cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID,"Retrying in %d seconds",interval); + PR_Lock( notify_lock ); + PR_WaitCondVar( notify_cvar, PR_SecondsToInterval(interval) ); + PR_Unlock( notify_lock ); + } if(interval < 14400){ /* 4 hour max */ interval = interval * 2; @@ -3045,7 +3053,7 @@ done: * Wait for this server to stop its cleanallruv task(which removes the rid from the cleaned list) */ cleanruv_log(data->task, data->rid, ABORT_CLEANALLRUV_ID, "Waiting for CleanAllRUV task to abort..."); - while(is_cleaned_rid(data->rid)){ + while(is_cleaned_rid(data->rid) && !slapi_is_shutting_down()){ DS_Sleep(PR_SecondsToInterval(1)); count++; if(count == 60){ /* it should not take this long */

8 years, 11 months

1
0
0 / 0

Branch '389-ds-base-1.3.4' - ldap/servers

by Noriko Hosoi

ldap/servers/slapd/conntable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) New commits: commit a741911c9a5090d78f7a81c475bea3f6593d72ad Author: Noriko Hosoi <nhosoi(a)redhat.com> Date: Tue Jul 7 12:54:38 2015 -0700 Ticket #48203 - Fix coverity issues - 07/07/2015 Description: 1. Defect type: CLANG_WARNING 389-ds-base-1.3.4.0/ldap/servers/slapd/conntable.c:161:11: warning: Access to field 'c_ct' results in a dereference of a null pointer (loaded from variable 'c') Thanks to rmeggins(a)redhat.com for the advice: > PR_NewLock() returns NULL then the server is severely out of some > resource (like RAM, stack space, etc.) and probably should just exit. https://fedorahosted.org/389/ticket/48203#comment:8 (cherry picked from commit bca0908b1e10ada69cdc051d4aaceda73a940597) diff --git a/ldap/servers/slapd/conntable.c b/ldap/servers/slapd/conntable.c index 0364d94..d5b9058 100644 --- a/ldap/servers/slapd/conntable.c +++ b/ldap/servers/slapd/conntable.c @@ -147,7 +147,7 @@ connection_table_get_connection(Connection_Table *ct, int sd) c->c_mutex = NULL; c->c_pdumutex = NULL; LDAPDebug( LDAP_DEBUG_ANY,"PR_NewLock failed\n",0, 0, 0 ); - c= NULL; + exit(1); } } /* Let's make sure there's no cruft left on there from the last time this connection was used. */

8 years, 11 months

1
0
0 / 0

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

2006

2005

389-commits July 2015