Hi Thierry,
Just to be clear, changelogmaxage was changed to -1 by me after the upgrade and I've
confirmed it is now set to -1.
The reason for me to change the value was because of the deadlock.
Apparently, it did not make much of a difference. It still gets into a deadlock
with the value -1.
BTW. I've sent the whole stacktrace directly to you to avoid the 4000+ lines to
this mailing list.
Here is the stack trace of one of the threads. The one that hangs in trim_changelog
Thread 2 (Thread 0x7f96ede68700 (LWP 2151)):
#0 0x00007f96eaf3939e in pthread_rwlock_wrlock () at /lib64/libpthread.so.0
#1 0x00007f96da9d281f in map_wrlock () at
/usr/lib64/dirsrv/plugins/schemacompat-plugin.so
#2 0x00007f96da9c1e58 in backend_shr_delete_cb.part.5 () at
/usr/lib64/dirsrv/plugins/schemacompat-plugin.so
#3 0x00007f96da9c1fd1 in backend_shr_betxn_post_delete_cb () at
/usr/lib64/dirsrv/plugins/schemacompat-plugin.so
#4 0x00007f96ed7ec688 in plugin_call_func (list=0x55cb03cdd8c0,
operation=operation@entry=563, pb=pb@entry=0x55cb4c670fc0, call_one=call_one@entry=0) at
ldap/servers/slapd/plugin.c:2028
n = <optimized out>
func = 0x7f96da9c1f70 <backend_shr_betxn_post_delete_cb>
rc = <optimized out>
return_value = 0
count = 2
#5 0x00007f96ed7ec943 in plugin_call_list (pb=0x55cb4c670fc0, operation=563,
list=<optimized out>) at ldap/servers/slapd/plugin.c:1972
p = 0x55cb03cb0000
locked = <optimized out>
plugin_list_number = 21
rc = 0
do_op = <optimized out>
#6 0x00007f96ed7ec943 in plugin_call_plugins (pb=pb@entry=0x55cb4c670fc0,
whichfunction=whichfunction@entry=563) at ldap/servers/slapd/plugin.c:442
p = 0x55cb03cb0000
locked = <optimized out>
plugin_list_number = 21
rc = 0
do_op = <optimized out>
#7 0x00007f96dc990def in ldbm_back_delete (pb=0x55cb4c670fc0) at
ldap/servers/slapd/back-ldbm/ldbm_delete.c:1267
be = 0x55cb03c31040
inst = 0x55cb03a8c680
li = 0x55cb03a017c0
e = 0x55cb15499a40
tombstone = 0x0
original_tombstone = 0x0
tmptombstone = 0x0
dn = 0x55cb1742a600 "changenumber=891343,cn=changelog"
txn = {back_txn_txn = 0x55cb2b7b6dc0}
parent_txn = 0x0
retval = 0
msg = <optimized out>
errbuf = 0x0
retry_count = <optimized out>
disk_full = 0
parent_found = <optimized out>
ruv_c_init = 0
parent_modify_c = {old_entry = 0x55cb159982a0, new_entry = 0x55cb19e03030, smods
= 0x55cb47da1ae0, attr_encrypt = 1}
ruv_c = {old_entry = 0x0, new_entry = 0x0, smods = 0x0, attr_encrypt = 0}
rc = 0
ldap_result_code = 0
ldap_result_message = 0x0
sdnp = 0x55cb5fd93bc0
e_uniqueid = 0x0
nscpEntrySDN = {flag = 0 '\000', udn = 0x0, dn = 0x0, ndn = 0x0, ndn_len
= 0}
operation = 0x55cb5ef86000
opcsn = 0x0
is_fixup_operation = 0
is_ruv = 0
is_replicated_operation = 0
is_tombstone_entry = <optimized out>
delete_tombstone_entry = 0
create_tombstone_entry = 0
addr = 0x55cb5ef86100
addordel_flags = 38
entryusn_str = 0x0
orig_entry = 0x0
parentsdn = {flag = 2 '\002', udn = 0x0, dn = 0x55cb516c3580
"cn=changelog", ndn = 0x0, ndn_len = 12}
opreturn = 0
free_delete_existing_entry = 1
not_an_error = 0
parent_switched = 0
myrc = 0
conn_id = 0
tombstone_csn = <optimized out>
deletion_csn_str =
"\247\001\000\000\000\000\000\000\000T\334YHi\365\357\300\017g", <incomplete
sequence \313>
op_id = 0
ep_id = <optimized out>
tomb_ep_id = 0
result_sent = 0
pb_conn = 0x0
parent_op = 1
parent_time = {tv_sec = 183860, tv_nsec = 41743941}
#8 0x00007f96ed79d3bb in op_shared_delete (pb=pb@entry=0x55cb4c670fc0) at
ldap/servers/slapd/delete.c:324
rc = 0
rawdn = 0x55cb5b28de00 "changenumber=891343, cn=changelog"
dn = <optimized out>
be = 0x55cb03c31040
internal_op = 32
sdn = 0x55cb5fd93bc0
operation = 0x55cb5ef86000
referral = 0x0
ecopy = 0x0
errorbuf = "\000\066\062\062Z\nmodifyTime", '\000' <repeats
16 times>, "\071\063\066\062\062Z\nnsUniqueI", '\000' <repeats 16
times>,
"\327\320~\003ڇ\316c!\f=\037$z\215\301;\215\006\065\341\n\310V\300\006\365I\344|x\210\216\f\215\060e\243S\350\000T\334YHi\365\357\016\237V\202\332PG\347\300\070\314\003\313U\000\000\300\070\314\003\313U\000\000\000T\334YHi\365\357\000\000\000\000\000\000\000\000\300\070\314\003\313U\000\000\300\070\314\003\313U\000\000\240<\250\355\226\177\000\000\000\243\002\000\000\000\000\000p|\346\355\226\177\000\000ϙ\r\000\000\000\000\000\312\360"...
err = <optimized out>
proxydn = 0x0
proxystr = 0x0
proxy_err = <optimized out>
errtext = 0x0
#9 0x00007f96ed79d598 in delete_internal_pb (pb=pb@entry=0x55cb4c670fc0) at
ldap/servers/slapd/delete.c:208
controls = 0x0
op = 0x55cb5ef86000
opresult = 0
#10 0x00007f96ed79d813 in slapi_delete_internal_pb (pb=pb@entry=0x55cb4c670fc0) at
ldap/servers/slapd/delete.c:151
#11 0x00007f96daff731e in delete_changerecord (cnum=cnum@entry=891343) at
ldap/servers/plugins/retrocl/retrocl_trim.c:89
pb = 0x55cb4c670fc0
dnbuf = 0x55cb5b28de00 "changenumber=891343, cn=changelog"
delrc = 32662
#12 0x00007f96daff7605 in trim_changelog () at
ldap/servers/plugins/retrocl/retrocl_trim.c:290
did_delete = 0
done = 0
last_in_log = <optimized out>
me = 172800
lt = <optimized out>
ldrc = <optimized out>
now = 1620819600
first_in_log = 891343
num_deleted = 3655
#13 0x00007f96daff7605 in changelog_trim_thread_fn (arg=<optimized out>) at
ldap/servers/plugins/retrocl/retrocl_trim.c:333
#14 0x00007f96eb595c5b in _pt_root () at /lib64/libnspr4.so
#15 0x00007f96eaf35ea5 in start_thread () at /lib64/libpthread.so.0
#16 0x00007f96ea5e19fd in clone () at /lib64/libc.so.6
-- Kees
On 12-05-2021 15:49, Thierry Bordaz wrote:
Hi Kees,
Is changelogmaxage=-1 after the upgrade ?
would you send a full pstack when it hangs ? If pthread_rwlock_wrlock is
trim_changelog then you may hit another flavor of [1] (without known
reason).
regards
thierry
On 5/12/21 2:40 PM, Kees Bakker wrote:
> Sorry to revive an old thread. I'm getting deadlocks again. See below
>
> On 20-04-2020 15:16, thierry bordaz wrote:
>> [...]This is a known bug [1].
>> With the same bug there are two deadlock scenario but only one is
>> fixed (for example in slapi-nis-0.56.4-1 [2]).
>> A fix for the second one is under tests.
>>
>> At the moment I would recommend the workaround [3]. The drawback is a
>> growth of the retroCL database but unless you have a very high rate
>> of update it should not be a concern.
>>
>> [1]
https://bugzilla.redhat.com/show_bug.cgi?id=1751295
>> [2]
https://koji.fedoraproject.org/koji/buildinfo?buildID=1457771
>> [3]
https://bugzilla.redhat.com/show_bug.cgi?id=1751295#c5
>>
>> best regards
>> thierry
>>
>
> I followed the recommendation to set nsslapd-changelogmaxage to -1.
> The system
> has been running successfully for a year.
>
> Recently I upgraded all packages in this CentOS7 system. Ever since
> that moment
> the server is quite unusable.
>
> [root@linge ~]# gdb -ex 'set confirm off' -ex 'set pagination off'
-ex
> 'thread apply all bt full' -ex 'quit' /usr/sbin/ns-slapd `pidof
> ns-slapd` |& grep '^#0.*lock'
> #0 0x00007f96eaf3939e in pthread_rwlock_wrlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf39184 in pthread_rwlock_rdlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf39184 in pthread_rwlock_rdlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf39184 in pthread_rwlock_rdlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf39184 in pthread_rwlock_rdlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf39184 in pthread_rwlock_rdlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf39184 in pthread_rwlock_rdlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf39184 in pthread_rwlock_rdlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf39184 in pthread_rwlock_rdlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf39184 in pthread_rwlock_rdlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf39184 in pthread_rwlock_rdlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf39184 in pthread_rwlock_rdlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf39184 in pthread_rwlock_rdlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf39184 in pthread_rwlock_rdlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf39184 in pthread_rwlock_rdlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf39184 in pthread_rwlock_rdlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf39184 in pthread_rwlock_rdlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf39184 in pthread_rwlock_rdlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf39184 in pthread_rwlock_rdlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf39184 in pthread_rwlock_rdlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf39184 in pthread_rwlock_rdlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf39184 in pthread_rwlock_rdlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf39184 in pthread_rwlock_rdlock () at
> /lib64/libpthread.so.0
> #0 0x00007f96eaf3939e in pthread_rwlock_wrlock () at
> /lib64/libpthread.so.0
>
> [root@linge ~]# rpm -qa 389\*
> 389-ds-base-libs-1.3.10.2-10.el7_9.x86_64
> 389-ds-base-1.3.10.2-10.el7_9.x86_64
> 389-ds-base-debuginfo-1.3.10.2-10.el7_9.x86_64
>
> [root@linge ~]# rpm -qa slapi\*
> slapi-nis-0.56.5-3.el7_9.x86_64
>
> [root@linge ~]# rpm -qa centos-release
> centos-release-7-9.2009.1.el7.centos.x86_64
>
> Are there any new hints to avoid the deadlock?